Publications

58 / 2,868 publications found.


  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), ISBN: 978-1-7281-0305-1, December 2019, pp. 237-144.
    BibTeX Download PDFAbout TR2019-157
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Enrique Yalta Soplin, N., Yamamoto, R., Wang, X., Watanabe, S., Yoshimura, T., Zhang, W., "A Comparative Study on Transformer Vs RNN in Speech Applications", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019.
    BibTeX Download PDFAbout TR2019-158
    • @inproceedings{Karita2019dec,
    • author = {Karita, Shigeki and Chen, Nanxin and Hayashi, Tomoki and Hori, Takaaki and Inaguma, Hirofumi and Jiang, Ziyan and Someki, Masao and Enrique Yalta Soplin, Nelson and Yamamoto, Ryuichi and Wang, Xiaofei and Watanabe, Shinji and Yoshimura, Takenori and Zhang, Wangyou},
    • title = {A Comparative Study on Transformer Vs RNN in Speech Applications},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2019-158}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), ISBN: 978-1-7281-0305-1, December 2019, pp. 936-943.
    BibTeX Download PDFAbout TR2019-159
    • @inproceedings{Moritz2019dec,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {936--943},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-159}
    • }
  •  Baskar, M.K., Watanabe, S., Astudillo, R., Hori, T., Burget, L., Cernocky, J.H., "Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-100
    • @inproceedings{Baskar2019sep,
    • author = {Baskar, Murali Karthick and Watanabe, Shinji and Astudillo, Ramon and Hori, Takaaki and Burget, Lukas and Cernocky, Jan, Honza},
    • title = {Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-100}
    • }
  •  Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-103
    • @inproceedings{Karafiat2019sep,
    • author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
    • title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-103}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
    BibTeX Download PDFAbout TR2019-098
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {76--80},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2837},
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
    BibTeX Download PDFAbout TR2019-101
    • @inproceedings{Seki2019sep,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3755--3759},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-3038},
    • url = {https://www.merl.com/publications/TR2019-101}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
    BibTeX Download PDFAbout TR2019-102
    • @inproceedings{Seki2019sep2,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
    • title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {3825--3829},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2860},
    • url = {https://www.merl.com/publications/TR2019-102}
    • }
  •  Baskar, M.K., Burget, L., Watanabe, S., Karafiat, M., Hori, T., Cernocky, J.H., "Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2019.
    BibTeX Download PDFAbout TR2019-006
    • @inproceedings{Baskar2019may,
    • author = {Baskar, Murali Karthick and Burget, Lukas and Watanabe, Shinji and Karafiat, Martin and Hori, Takaaki and Cernocky, Jan, Honza},
    • title = {Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • url = {https://www.merl.com/publications/TR2019-006}
    • }
  •  Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683380, May 2019.
    BibTeX Download PDFAbout TR2019-007
    • @inproceedings{Cho2019may,
    • author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
    • title = {Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683380},
    • url = {https://www.merl.com/publications/TR2019-007}
    • }
  •  Hori, T., Astudillo, R., Hayashi, T., Zhang, Y., Watanabe, S., Le Roux, J., "Cycle-Consistency Training for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683307, May 2019.
    BibTeX Download PDFAbout TR2019-002
    • @inproceedings{Hori2019may,
    • author = {Hori, Takaaki and Astudillo, Ramon and Hayashi, Tomoki and Zhang, Yu and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Cycle-Consistency Training for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683307},
    • url = {https://www.merl.com/publications/TR2019-002}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Triggered Attention for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683510, May 2019.
    BibTeX Download PDFAbout TR2019-015
    • @inproceedings{Moritz2019may,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Triggered Attention for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683510},
    • url = {https://www.merl.com/publications/TR2019-015}
    • }
  •  Wang, X., Li, R., Mallidi, S.H., Hori, T., Watanabe, S., Hermansky, H., "Stream Attention-Based Multi-Array End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682650, May 2019.
    BibTeX Download PDFAbout TR2019-005
    • @inproceedings{Wang2019may,
    • author = {Wang, Xiaofei and Li, Ruizhi and Mallidi, Sri Harish and Hori, Takaaki and Watanabe, Shinji and Hermansky, Hynek},
    • title = {Stream Attention-Based Multi-Array End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682650},
    • url = {https://www.merl.com/publications/TR2019-005}
    • }
  •  Cho, J., Baskar, M.K., Li, R., Wiesner, M., Mallidi, S.H., Yalta, N., Karafiat, M., Watanabe, S., Hori, T., "Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639655, December 2018.
    BibTeX Download PDFAbout TR2018-175
    • @inproceedings{Cho2018dec,
    • author = {Cho, Jaejin and Baskar, Murali Karthick and Li, Ruizhi and Wiesner, Matthew and Mallidi, Sri Harish and Yalta, Nelson and Karafiat, Martin and Watanabe, Shinji and Hori, Takaaki},
    • title = {Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639655},
    • url = {https://www.merl.com/publications/TR2018-175}
    • }
  •  Hayashi, T., Watanabe, S., Zhang, Y., Toda, T., Hori, T., Astudillo, R., Takeda, K., "Back-Translation-Style Data Augmentation for End-to-End ASR", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639619, December 2018.
    BibTeX Download PDFAbout TR2018-174
    • @inproceedings{Hayashi2018dec,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Zhang, Yu and Toda, Tomoki and Hori, Takaaki and Astudillo, Ramon and Takeda, Kazuya},
    • title = {Back-Translation-Style Data Augmentation for End-to-End ASR},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639619},
    • url = {https://www.merl.com/publications/TR2018-174}
    • }
  •  Hori, T., Cho, J., Watanabe, S., "End-to-End Speech Recognition with Word-Based RNN Language Models", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639693, December 2018.
    BibTeX Download PDFAbout TR2018-176
    • @inproceedings{Hori2018dec,
    • author = {Hori, Takaaki and Cho, Jaejin and Watanabe, Shinji},
    • title = {End-to-End Speech Recognition with Word-Based RNN Language Models},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639693},
    • url = {https://www.merl.com/publications/TR2018-176}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit", Interspeech, September 2018.
    BibTeX Download PDFAbout TR2018-136
    • @inproceedings{Watanabe2018sep,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • booktitle = {Interspeech},
    • year = 2018,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2018-136}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX Download PDFAbout TR2018-006
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Seki, H., Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462180, April 2018, pp. 4919-4923.
    BibTeX Download PDFAbout TR2018-002
    • @inproceedings{Seki2018apr,
    • author = {Seki, Hiroshi and Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4919--4923},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462180},
    • url = {https://www.merl.com/publications/TR2018-002}
    • }
  •  Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461893, April 2018, pp. 4819-4823.
    BibTeX Download PDFAbout TR2018-001
    • @inproceedings{Settle2018apr,
    • author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {End-to-End Multi-Speaker Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4819--4823},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461893},
    • url = {https://www.merl.com/publications/TR2018-001}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit," Tech. Rep. TR2018-036, arXiv, March 2018.
    BibTeX Download PDFAbout TR2018-036
    • @techreport{Watanabe2018mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2018-036}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268948, December 2017.
    BibTeX Download PDFAbout TR2017-181
    • @inproceedings{Hori2017dec,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268948},
    • url = {https://www.merl.com/publications/TR2017-181}
    • }
  •  Watanabe, S., Hori, T., Hershey, J.R., "Language Independent End-to-End Architecture For Joint Language and Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268945, December 2017.
    BibTeX Download PDFAbout TR2017-182
    • @inproceedings{Watanabe2017dec,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Language Independent End-to-End Architecture For Joint Language and Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268945},
    • url = {https://www.merl.com/publications/TR2017-182}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX Download PDFAbout TR2017-139
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, ISSN: 1941-0484, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX Download PDFAbout TR2017-192
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }