Publications

63 / 3,006 publications found.


  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Annual Conference of the International Speech Communication Association (Interspeech), October 2020.
    BibTeX TR2020-139 PDF
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Transformer-based Long-context End-to-end Speech Recognition},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Annual Conference of the International Speech Communication Association (Interspeech), October 2020.
    BibTeX TR2020-137 PDF
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and Le Roux, Jonathan and Moulin, Pierre},
    • title = {Detecting Audio Attacks on ASR Systems with Dropout Uncertainty},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Annual Conference of the International Speech Communication Association (Interspeech), October 2020.
    BibTeX TR2020-138 PDF
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and Le Roux, Jonathan},
    • title = {All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming Automatic Speech Recognition With The Transformer Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054476, April 2020, pp. 6074-6078.
    BibTeX TR2020-040 PDF Video
    • @inproceedings{Moritz2020apr,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming Automatic Speech Recognition With The Transformer Model},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6074--6078},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054476},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-040}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Li, R., Wang, X., Mallidi, H., Watanabe, S., Hori, T., Hermansky, H., "Multi-Stream End-to-End Speech Recognition", IEEE/ACM Transactions on Audio, Speech and Language Processing, DOI: 10.1109/TASLP.2019.2959721, Vol. 28, pp. 646-655, March 2020.
    BibTeX TR2020-030 PDF
    • @article{Li2020mar,
    • author = {Li, Ruizhi and Wang, Xiaofei and Mallidi, Harish and Watanabe, Shinji and Hori, Takaaki and Hermansky, Hynek},
    • title = {Multi-Stream End-to-End Speech Recognition},
    • journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {646--655},
    • month = mar,
    • doi = {10.1109/TASLP.2019.2959721},
    • url = {https://www.merl.com/publications/TR2020-030}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Enrique Yalta Soplin, N., Yamamoto, R., Wang, X., Watanabe, S., Yoshimura, T., Zhang, W., "A Comparative Study on Transformer Vs RNN in Speech Applications", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU46091.2019.9003750, December 2019, pp. 449-456.
    BibTeX TR2019-158 PDF
    • @inproceedings{Karita2019dec,
    • author = {Karita, Shigeki and Chen, Nanxin and Hayashi, Tomoki and Hori, Takaaki and Inaguma, Hirofumi and Jiang, Ziyan and Someki, Masao and Enrique Yalta Soplin, Nelson and Yamamoto, Ryuichi and Wang, Xiaofei and Watanabe, Shinji and Yoshimura, Takenori and Zhang, Wangyou},
    • title = {A Comparative Study on Transformer Vs RNN in Speech Applications},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {449--456},
    • month = dec,
    • doi = {10.1109/ASRU46091.2019.9003750},
    • url = {https://www.merl.com/publications/TR2019-158}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 936-943.
    BibTeX TR2019-159 PDF
    • @inproceedings{Moritz2019dec,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {936--943},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-159}
    • }
  •  Baskar, M.K., Watanabe, S., Astudillo, R., Hori, T., Burget, L., Cernocky, J.H., "Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text", Interspeech, DOI: 10.21437/Interspeech.2019-3167, September 2019, pp. 3790-3794.
    BibTeX TR2019-100 PDF
    • @inproceedings{Baskar2019sep,
    • author = {Baskar, Murali Karthick and Watanabe, Shinji and Astudillo, Ramon and Hori, Takaaki and Burget, Lukas and Cernocky, Jan, Honza},
    • title = {Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3790--3794},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-3167},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2019-100}
    • }
  •  Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/Interspeech.2019-2355//, September 2019, pp. 2019-2355.
    BibTeX TR2019-103 PDF
    • @inproceedings{Karafiat2019sep,
    • author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
    • title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {2019--2355},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2355//},
    • url = {https://www.merl.com/publications/TR2019-103}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
    BibTeX TR2019-098 PDF
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {76--80},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2837},
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
    BibTeX TR2019-101 PDF
    • @inproceedings{Seki2019sep,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3755--3759},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-3038},
    • url = {https://www.merl.com/publications/TR2019-101}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
    BibTeX TR2019-102 PDF
    • @inproceedings{Seki2019sep2,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
    • title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3825--3829},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2860},
    • url = {https://www.merl.com/publications/TR2019-102}
    • }
  •  Baskar, M.K., Burget, L., Watanabe, S., Karafiat, M., Hori, T., Cernocky, J.H., "Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682782, May 2019, pp. 5646-5650.
    BibTeX TR2019-006 PDF
    • @inproceedings{Baskar2019may,
    • author = {Baskar, Murali Karthick and Burget, Lukas and Watanabe, Shinji and Karafiat, Martin and Hori, Takaaki and Cernocky, Jan, Honza},
    • title = {Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • pages = {5646--5650},
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682782},
    • issn = {2379-190X},
    • isbn = {978-1-4799-8131-1},
    • url = {https://www.merl.com/publications/TR2019-006}
    • }
  •  Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683380, May 2019.
    BibTeX TR2019-007 PDF
    • @inproceedings{Cho2019may,
    • author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
    • title = {Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683380},
    • url = {https://www.merl.com/publications/TR2019-007}
    • }
  •  Hori, T., Astudillo, R., Hayashi, T., Zhang, Y., Watanabe, S., Le Roux, J., "Cycle-Consistency Training for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683307, May 2019.
    BibTeX TR2019-002 PDF
    • @inproceedings{Hori2019may,
    • author = {Hori, Takaaki and Astudillo, Ramon and Hayashi, Tomoki and Zhang, Yu and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Cycle-Consistency Training for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683307},
    • url = {https://www.merl.com/publications/TR2019-002}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Triggered Attention for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683510, May 2019.
    BibTeX TR2019-015 PDF
    • @inproceedings{Moritz2019may,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Triggered Attention for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683510},
    • url = {https://www.merl.com/publications/TR2019-015}
    • }
  •  Wang, X., Li, R., Mallidi, S.H., Hori, T., Watanabe, S., Hermansky, H., "Stream Attention-Based Multi-Array End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682650, May 2019.
    BibTeX TR2019-005 PDF
    • @inproceedings{Wang2019may,
    • author = {Wang, Xiaofei and Li, Ruizhi and Mallidi, Sri Harish and Hori, Takaaki and Watanabe, Shinji and Hermansky, Hynek},
    • title = {Stream Attention-Based Multi-Array End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682650},
    • url = {https://www.merl.com/publications/TR2019-005}
    • }
  •  Cho, J., Baskar, M.K., Li, R., Wiesner, M., Mallidi, S.H., Yalta, N., Karafiat, M., Watanabe, S., Hori, T., "Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2018.8639655, December 2018.
    BibTeX TR2018-175 PDF
    • @inproceedings{Cho2018dec,
    • author = {Cho, Jaejin and Baskar, Murali Karthick and Li, Ruizhi and Wiesner, Matthew and Mallidi, Sri Harish and Yalta, Nelson and Karafiat, Martin and Watanabe, Shinji and Hori, Takaaki},
    • title = {Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639655},
    • url = {https://www.merl.com/publications/TR2018-175}
    • }
  •  Hayashi, T., Watanabe, S., Zhang, Y., Toda, T., Hori, T., Astudillo, R., Takeda, K., "Back-Translation-Style Data Augmentation for End-to-End ASR", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2018.8639619, December 2018.
    BibTeX TR2018-174 PDF
    • @inproceedings{Hayashi2018dec,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Zhang, Yu and Toda, Tomoki and Hori, Takaaki and Astudillo, Ramon and Takeda, Kazuya},
    • title = {Back-Translation-Style Data Augmentation for End-to-End ASR},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639619},
    • url = {https://www.merl.com/publications/TR2018-174}
    • }
  •  Hori, T., Cho, J., Watanabe, S., "End-to-End Speech Recognition with Word-Based RNN Language Models", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2018.8639693, December 2018.
    BibTeX TR2018-176 PDF
    • @inproceedings{Hori2018dec,
    • author = {Hori, Takaaki and Cho, Jaejin and Watanabe, Shinji},
    • title = {End-to-End Speech Recognition with Word-Based RNN Language Models},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639693},
    • url = {https://www.merl.com/publications/TR2018-176}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit", Interspeech, September 2018.
    BibTeX TR2018-136 PDF
    • @inproceedings{Watanabe2018sep,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • booktitle = {Interspeech},
    • year = 2018,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2018-136}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Seki, H., Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462180, April 2018, pp. 4919-4923.
    BibTeX TR2018-002 PDF Video
    • @inproceedings{Seki2018apr,
    • author = {Seki, Hiroshi and Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4919--4923},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462180},
    • url = {https://www.merl.com/publications/TR2018-002}
    • }