Publications

51 / 2,829 publications found.


  •  Baskar, M.K., Watanabe, S., Astudillo, R., Hori, T., Burget, L., Cernocky, J.H., "Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-100
    • @inproceedings{Baskar2019sep,
    • author = {Baskar, Murali Karthick and Watanabe, Shinji and Astudillo, Ramon and Hori, Takaaki and Burget, Lukas and Cernocky, Jan, Honza},
    • title = {Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-100}
    • }
  •  Hori, C., Cherian, A., Marks, T., Hori, T., "Joint Student-Teacher Learning for Audio-Visual Scene-Aware Dialog", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-097
    • @inproceedings{Hori2019sep,
    • author = {Hori, Chiori and Cherian, Anoop and Marks, Tim and Hori, Takaaki},
    • title = {Joint Student-Teacher Learning for Audio-Visual Scene-Aware Dialog},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-097}
    • }
  •  Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-103
    • @inproceedings{Karafiat2019sep,
    • author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
    • title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-103}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-098
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-101
    • @inproceedings{Seki2019sep,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-101}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, September 2019.
    BibTeX Download PDFAbout TR2019-102
    • @inproceedings{Seki2019sep2,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
    • title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-102}
    • }
  •  Yalta, N., Watanabe, S., Hori, T., Nakadai, K., Ogata, T., "CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments", European Signal Processing Conference (EUSIPCO), September 2019.
    BibTeX Download PDFAbout TR2019-094
    • @inproceedings{Yalta2019sep,
    • author = {Yalta, Nelson and Watanabe, Shinji and Hori, Takaaki and Nakadai, Kazuhiro and Ogata, Tetsuya},
    • title = {CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments},
    • booktitle = {European Signal Processing Conference (EUSIPCO)},
    • year = 2019,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2019-094}
    • }
  •  Baskar, M.K., Burget, L., Watanabe, S., Karafiat, M., Hori, T., Cernocky, J.H., "Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2019.
    BibTeX Download PDFAbout TR2019-006
    • @inproceedings{Baskar2019may,
    • author = {Baskar, Murali Karthick and Burget, Lukas and Watanabe, Shinji and Karafiat, Martin and Hori, Takaaki and Cernocky, Jan, Honza},
    • title = {Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • url = {https://www.merl.com/publications/TR2019-006}
    • }
  •  Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683380, May 2019.
    BibTeX Download PDFAbout TR2019-007
    • @inproceedings{Cho2019may,
    • author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
    • title = {Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683380},
    • url = {https://www.merl.com/publications/TR2019-007}
    • }
  •  Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682583, May 2019.
    BibTeX Download PDFAbout TR2019-016
    • @inproceedings{Hori2019may2,
    • author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
    • title = {End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682583},
    • url = {https://www.merl.com/publications/TR2019-016}
    • }
  •  Hori, T., Astudillo, R., Hayashi, T., Zhang, Y., Watanabe, S., Le Roux, J., "Cycle-Consistency Training for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683307, May 2019.
    BibTeX Download PDFAbout TR2019-002
    • @inproceedings{Hori2019may,
    • author = {Hori, Takaaki and Astudillo, Ramon and Hayashi, Tomoki and Zhang, Yu and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Cycle-Consistency Training for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683307},
    • url = {https://www.merl.com/publications/TR2019-002}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Triggered Attention for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683510, May 2019.
    BibTeX Download PDFAbout TR2019-015
    • @inproceedings{Moritz2019may,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Triggered Attention for End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683510},
    • url = {https://www.merl.com/publications/TR2019-015}
    • }
  •  Wang, X., Li, R., Mallidi, S.H., Hori, T., Watanabe, S., Hermansky, H., "Stream Attention-Based Multi-Array End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682650, May 2019.
    BibTeX Download PDFAbout TR2019-005
    • @inproceedings{Wang2019may,
    • author = {Wang, Xiaofei and Li, Ruizhi and Mallidi, Sri Harish and Hori, Takaaki and Watanabe, Shinji and Hermansky, Hynek},
    • title = {Stream Attention-Based Multi-Array End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682650},
    • url = {https://www.merl.com/publications/TR2019-005}
    • }
  •  Cho, J., Baskar, M.K., Li, R., Wiesner, M., Mallidi, S.H., Yalta, N., Karafiat, M., Watanabe, S., Hori, T., "Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639655, December 2018.
    BibTeX Download PDFAbout TR2018-175
    • @inproceedings{Cho2018dec,
    • author = {Cho, Jaejin and Baskar, Murali Karthick and Li, Ruizhi and Wiesner, Matthew and Mallidi, Sri Harish and Yalta, Nelson and Karafiat, Martin and Watanabe, Shinji and Hori, Takaaki},
    • title = {Multilingual Sequence-to-Sequence Speech Recognition: Architecture, Transfer Learning, and Language Modeling},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639655},
    • url = {https://www.merl.com/publications/TR2018-175}
    • }
  •  Hayashi, T., Watanabe, S., Zhang, Y., Toda, T., Hori, T., Astudillo, R., Takeda, K., "Back-Translation-Style Data Augmentation for End-to-End ASR", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639619, December 2018.
    BibTeX Download PDFAbout TR2018-174
    • @inproceedings{Hayashi2018dec,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Zhang, Yu and Toda, Tomoki and Hori, Takaaki and Astudillo, Ramon and Takeda, Kazuya},
    • title = {Back-Translation-Style Data Augmentation for End-to-End ASR},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639619},
    • url = {https://www.merl.com/publications/TR2018-174}
    • }
  •  Hori, T., Cho, J., Watanabe, S., "End-to-End Speech Recognition with Word-Based RNN Language Models", IEEE Spoken Language Technology Workshop, DOI: 10.1109/SLT.2018.8639693, December 2018.
    BibTeX Download PDFAbout TR2018-176
    • @inproceedings{Hori2018dec,
    • author = {Hori, Takaaki and Cho, Jaejin and Watanabe, Shinji},
    • title = {End-to-End Speech Recognition with Word-Based RNN Language Models},
    • booktitle = {IEEE Spoken Language Technology Workshop},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639693},
    • url = {https://www.merl.com/publications/TR2018-176}
    • }
  •  Hori, T., Wang, W., Koji, Y., Hori, C., Harsham, B.A., Hershey, J., "Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models", Computer Speech and Language, DOI: 10.1016/j.csl.2018.08.006, Vol. 54, pp. 122-139, December 2018.
    BibTeX Download PDFAbout TR2018-161
    • @article{Hori2018dec2,
    • author = {Hori, Takaaki and Wang, Wen and Koji, Yusuke and Hori, Chiori and Harsham, Bret A. and Hershey, John},
    • title = {Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models},
    • journal = {Computer Speech and Language},
    • year = 2018,
    • volume = 54,
    • pages = {122--139},
    • month = dec,
    • doi = {10.1016/j.csl.2018.08.006},
    • url = {https://www.merl.com/publications/TR2018-161}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit", Interspeech, September 2018.
    BibTeX Download PDFAbout TR2018-136
    • @inproceedings{Watanabe2018sep,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • booktitle = {Interspeech},
    • year = 2018,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2018-136}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "A Purely End-to-end System for Multi-speaker Speech Recognition", Annual Meeting of the Association for Computational Linguistics (ACL), July 2018, pp. 2620-2630.
    BibTeX Download PDFAbout TR2018-104
    • @inproceedings{Seki2018jul,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {A Purely End-to-end System for Multi-speaker Speech Recognition},
    • booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
    • year = 2018,
    • pages = {2620--2630},
    • month = jul,
    • url = {https://www.merl.com/publications/TR2018-104}
    • }
  •  Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog using Multimodal Attention-Based Video Features", arXiv, July 13, 2018.
    BibTeX Download PDFAbout TR2018-085
    • @article{Hori2018jul,
    • author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
    • title = {End-to-End Audio Visual Scene-Aware Dialog using Multimodal Attention-Based Video Features},
    • journal = {arXiv},
    • year = 2018,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2018-085}
    • }