Publications

90 / 3,034 publications found.


  •  Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation for Speech Recognition Via Uncertainty Driven Self-Training", arXiv, December 2020.
    BibTeX arXiv
    • @article{Khurana2020dec,
    • author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Domain Adaptation for Speech Recognition Via Uncertainty Driven Self-Training},
    • journal = {arXiv},
    • year = 2020,
    • month = dec,
    • url = {https://arxiv.org/abs/2011.13439}
    • }
  •  Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", arXiv, November 2020.
    BibTeX arXiv
    • @article{Hung2020nov,
    • author = {Hung, Yun-Ning and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Transcription is All You Need: Learning to Separate Musical Mixtures with Score as Supervision},
    • journal = {arXiv},
    • year = 2020,
    • month = nov,
    • url = {https://arxiv.org/abs/2010.11904}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition Via Graph-Based Temporal Classification", arXiv, October 2020.
    BibTeX arXiv
    • @article{Moritz2020oct2,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Semi-Supervised Speech Recognition Via Graph-Based Temporal Classification},
    • journal = {arXiv},
    • year = 2020,
    • month = oct,
    • url = {https://arxiv.org/abs/2010.15653}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/Interspeech.2020-2928, October 2020, pp. 5011-5015.
    BibTeX TR2020-139 PDF
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Transformer-based Long-context End-to-end Speech Recognition},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {5011--5015},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2928},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/Interspeech.2020-1846, October 2020, pp. 4671-4675.
    BibTeX TR2020-137 PDF
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and Le Roux, Jonathan and Moulin, Pierre},
    • title = {Detecting Audio Attacks on ASR Systems with Dropout Uncertainty},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {4671--4675},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-1846},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/Interspeech.2020-2757, October 2020, pp. 3112-3116.
    BibTeX TR2020-138 PDF
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and Le Roux, Jonathan},
    • title = {All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {3112--3116},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2757},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
    BibTeX TR2020-136 PDF
    • @inproceedings{Manilow2020oct,
    • author = {Manilow, Ethan and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Hierarchical Musical Instrument Separation},
    • booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
    • year = 2020,
    • pages = {376--383},
    • month = oct,
    • isbn = {978-0-9813537-0-8},
    • url = {https://www.merl.com/publications/TR2020-136}
    • }
  •  Seetharaman, P., Wichern, G., Pardo, B., Le Roux, J., "Autoclip: Adaptive Gradient Clipping For Source Separation Networks", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: https://doi.org/10.1109/MLSP49062.2020.9231926, September 2020.
    BibTeX TR2020-132 PDF
    • @inproceedings{Seetharaman2020sep,
    • author = {Seetharaman, Prem and Wichern, Gordon and Pardo, Bryan and Le Roux, Jonathan},
    • title = {Autoclip: Adaptive Gradient Clipping For Source Separation Networks},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2020,
    • month = sep,
    • publisher = {IEEE},
    • doi = {https://doi.org/10.1109/MLSP49062.2020.9231926},
    • url = {https://www.merl.com/publications/TR2020-132}
    • }
  •  Gao, P., Hori, C., Geng, S., Hori, T., Le Roux, J., "Multi-Pass Transformer for Machine Translation", arXiv, September 2020.
    BibTeX arXiv
    • @article{Gao2020sep,
    • author = {Gao, Peng and Hori, Chiori and Geng, Shijie and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Multi-Pass Transformer for Machine Translation},
    • journal = {arXiv},
    • year = 2020,
    • month = sep,
    • url = {http://arxiv.org/abs/2009.11382}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
    BibTeX TR2020-126 PDF
    • @article{Pishdadian2020sep,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {2386--2399},
    • month = sep,
    • doi = {10.1109/TASLP.2020.3013105},
    • url = {https://www.merl.com/publications/TR2020-126}
    • }
  •  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
    BibTeX TR2020-111 PDF
    • @inproceedings{Seetharaman2020jul,
    • author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
    • title = {Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles},
    • booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
    • year = 2020,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2020-111}
    • }
  •  Geng, S., Gao, P., Hori, C., Le Roux, J., Cherian, A., "Spatio-Temporal Scene Graphs for Video Dialo", arXiv, July 2020.
    BibTeX arXiv
    • @article{Geng2020jul,
    • author = {Geng, Shijie and Gao, Peng and Hori, Chiori and Le Roux, Jonathan and Cherian, Anoop},
    • title = {Spatio-Temporal Scene Graphs for Video Dialo},
    • journal = {arXiv},
    • year = 2020,
    • month = jul,
    • url = {https://arxiv.org/abs/2007.03848}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053055, April 2020, pp. 91-95.
    BibTeX TR2020-038 PDF Video
    • @inproceedings{Pishdadian2020apr,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Learning to Separate Sounds From Weakly Labeled Scenes},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {91--95},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053055},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-038}
    • }
  •  Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053327, April 2020, pp. 696-700.
    BibTeX TR2020-042 PDF Video
    • @inproceedings{Maciejewski2020apr,
    • author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and Le Roux, Jonathan},
    • title = {WHAMR!: Noisy and Reverberant Single-Channel Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {696--700},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053327},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-042}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming Automatic Speech Recognition With The Transformer Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054476, April 2020, pp. 6074-6078.
    BibTeX TR2020-040 PDF Video
    • @inproceedings{Moritz2020apr,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming Automatic Speech Recognition With The Transformer Model},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6074--6078},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054476},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-040}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep clustering-based single-channel speech separation and recent advances", Acoustical Science and Technology, DOI: 10.1250/ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
    BibTeX J-STAGE
    • @article{Aihara2020jun,
    • author = {Aihara, Ryo and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Deep clustering-based single-channel speech separation and recent advances},
    • journal = {Acoustical Science and Technology},
    • year = 2020,
    • volume = 41,
    • number = 2,
    • pages = {465--471},
    • month = mar,
    • doi = {10.1250/ast.41.465},
    • url = {https://www.jstage.jst.go.jp/article/ast/41/2/41_E20202/_article}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 936-943.
    BibTeX TR2019-159 PDF
    • @inproceedings{Moritz2019dec,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {936--943},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-159}
    • }
  •  Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937253, October 2019, pp. 170-174.
    BibTeX TR2019-123 PDF
    • @inproceedings{Kavalerov2019oct,
    • author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and Le Roux, Jonathan and Hershey, John},
    • title = {Universal Sound Separation},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2019,
    • pages = {170--174},
    • month = oct,
    • doi = {10.1109/WASPAA.2019.8937253},
    • issn = {1947-1629},
    • isbn = {978-1-7281-1123-0},
    • url = {https://www.merl.com/publications/TR2019-123}
    • }
  •  Manilow, E., Wichern, G., Seetharaman, P., Le Roux, J., "Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937170, October 2019, pp. 45-49.
    BibTeX TR2019-124 PDF
    • @inproceedings{Manilow2019oct,
    • author = {Manilow, Ethan and Wichern, Gordon and Seetharaman, Prem and Le Roux, Jonathan},
    • title = {Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2019,
    • pages = {45--49},
    • month = oct,
    • doi = {10.1109/WASPAA.2019.8937170},
    • issn = {1947-1629},
    • isbn = {978-1-7281-1123-0},
    • url = {https://www.merl.com/publications/TR2019-124}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
    BibTeX TR2019-098 PDF
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {76--80},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2837},
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
    BibTeX TR2019-101 PDF
    • @inproceedings{Seki2019sep,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3755--3759},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-3038},
    • url = {https://www.merl.com/publications/TR2019-101}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
    BibTeX TR2019-102 PDF
    • @inproceedings{Seki2019sep2,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
    • title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3825--3829},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2860},
    • url = {https://www.merl.com/publications/TR2019-102}
    • }