Publications

53 / 2,975 publications found.


  •  Hori, T., Cho, J., Watanabe, S., "End-to-End Speech Recognition with Word-Based RNN Language Models", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2018.8639693, December 2018.
    BibTeX TR2018-176 PDF
    • @inproceedings{Hori2018dec,
    • author = {Hori, Takaaki and Cho, Jaejin and Watanabe, Shinji},
    • title = {End-to-End Speech Recognition with Word-Based RNN Language Models},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/SLT.2018.8639693},
    • url = {https://www.merl.com/publications/TR2018-176}
    • }
  •  Hori, T., Wang, W., Koji, Y., Hori, C., Harsham, B.A., Hershey, J., "Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models", Computer Speech and Language, DOI: 10.1016/j.csl.2018.08.006, Vol. 54, pp. 122-139, December 2018.
    BibTeX TR2018-161 PDF
    • @article{Hori2018dec2,
    • author = {Hori, Takaaki and Wang, Wen and Koji, Yusuke and Hori, Chiori and Harsham, Bret A. and Hershey, John},
    • title = {Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models},
    • journal = {Computer Speech and Language},
    • year = 2018,
    • volume = 54,
    • pages = {122--139},
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2018.08.006},
    • url = {https://www.merl.com/publications/TR2018-161}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit", Interspeech, September 2018.
    BibTeX TR2018-136 PDF
    • @inproceedings{Watanabe2018sep,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • booktitle = {Interspeech},
    • year = 2018,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2018-136}
    • }
  •  Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "A Purely End-to-end System for Multi-speaker Speech Recognition", Annual Meeting of the Association for Computational Linguistics (ACL), July 2018, pp. 2620-2630.
    BibTeX TR2018-104 PDF Video
    • @inproceedings{Seki2018jul,
    • author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
    • title = {A Purely End-to-end System for Multi-speaker Speech Recognition},
    • booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
    • year = 2018,
    • pages = {2620--2630},
    • month = jul,
    • publisher = {Elsevier},
    • url = {https://www.merl.com/publications/TR2018-104}
    • }
  •  Watanabe, S., Hori, T., Miao, Y., Delcroix, M., Metze, F., Hershey, J., "Toolkits for robust speech processing" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S., Delcroix, M., Metze, F., Hershey, J.R., Eds., chapter 14, Springer, July 9, 2018.
    BibTeX
    • @incollection{Watanabe2018jul,
    • author = {Watanabe, Shinji and Hori, Takaaki and Miao, Yajie and Delcroix, Marc and Metze, Florian and Hershey, John},
    • title = {Toolkits for robust speech processing},
    • booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
    • year = 2018,
    • editor = {Watanabe, S., Delcroix, M., Metze, F., Hershey, J.R.},
    • chapter = 14,
    • month = jul,
    • publisher = {Springer}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Seki, H., Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462180, April 2018, pp. 4919-4923.
    BibTeX TR2018-002 PDF Video
    • @inproceedings{Seki2018apr,
    • author = {Seki, Hiroshi and Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4919--4923},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462180},
    • url = {https://www.merl.com/publications/TR2018-002}
    • }
  •  Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461893, April 2018, pp. 4819-4823.
    BibTeX TR2018-001 PDF Video
    • @inproceedings{Settle2018apr,
    • author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {End-to-End Multi-Speaker Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4819--4823},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461893},
    • url = {https://www.merl.com/publications/TR2018-001}
    • }
  •  Hori, C., Hori, T., "End-to-end Conversation Modeling Track in DSTC6", Dialog System Technology Challenges, December 2017.
    BibTeX TR2017-188 PDF
    • @inproceedings{Hori2017dec3,
    • author = {Hori, Chiori and Hori, Takaaki},
    • title = {End-to-end Conversation Modeling Track in DSTC6},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-188}
    • }
  •  Hori, C., Hori, T., Marks, T.K., Hershey, J.R., "Early and Late Integration of Audio Features for Automatic Video Description", IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), DOI: 10.1109/ASRU.2017.8268968, December 2017.
    BibTeX TR2017-183 PDF
    • @inproceedings{Hori2017dec2,
    • author = {Hori, Chiori and Hori, Takaaki and Marks, Tim K. and Hershey, John R.},
    • title = {Early and Late Integration of Audio Features for Automatic Video Description},
    • booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268968},
    • url = {https://www.merl.com/publications/TR2017-183}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268948, December 2017.
    BibTeX TR2017-181 PDF
    • @inproceedings{Hori2017dec,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268948},
    • url = {https://www.merl.com/publications/TR2017-181}
    • }
  •  Watanabe, S., Hori, T., Hershey, J.R., "Language Independent End-to-End Architecture For Joint Language and Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268945, December 2017.
    BibTeX TR2017-182 PDF Video
    • @inproceedings{Watanabe2017dec,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Language Independent End-to-End Architecture For Joint Language and Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268945},
    • url = {https://www.merl.com/publications/TR2017-182}
    • }
  •  Wang, W., Koji, Y., Harsham, B.A., Hori, T., Hershey, J.R., "Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models", Dialog System Technology Challenges, December 2017.
    BibTeX TR2017-180 PDF
    • @inproceedings{Wang2017dec,
    • author = {Wang, Wen and Koji, Yusuke and Harsham, Bret A. and Hori, Takaaki and Hershey, John R.},
    • title = {Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-180}
    • }
  •  Hori, C., Hori, T., Lee, T.-Y., Zhang, Z., Harsham, B.A., Sumi, K., Marks, T.K., Hershey, J.R., "Attention-Based Multimodal Fusion for Video Description", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV.2017.450, October 2017.
    BibTeX TR2017-156 PDF
    • @inproceedings{Hori2017oct,
    • author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Zhang, Ziming and Harsham, Bret A. and Sumi, Kazuhiko and Marks, Tim K. and Hershey, John R.},
    • title = {Attention-Based Multimodal Fusion for Video Description},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/ICCV.2017.450},
    • url = {https://www.merl.com/publications/TR2017-156}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
    BibTeX TR2017-150 PDF
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Joint CTC/attention decoding for end-to-end speech recognition", Association for Computational Linguistics (ACL), DOI: 10.18653/v1/P17-1048, July 2017, pp. 518-529.
    BibTeX TR2017-103 PDF Video
    • @inproceedings{Hori2017jul,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Joint CTC/attention decoding for end-to-end speech recognition},
    • booktitle = {Association for Computational Linguistics (ACL)},
    • year = 2017,
    • pages = {518--529},
    • month = jul,
    • doi = {10.18653/v1/P17-1048},
    • url = {https://www.merl.com/publications/TR2017-103}
    • }
  •  Watanabe, S., Hori, T., Hayashi, T., Kim, S., "End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model", Acoustical Society of Japan Spring Meeting (ASJ), March 2017.
    BibTeX TR2017-021 PDF
    • @inproceedings{Watanabe2017mar2,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hayashi, Tomoki and Kim, Suyoun},
    • title = {End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model},
    • booktitle = {Acoustical Society of Japan Spring Meeting (ASJ)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-021}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-014 PDF
    • @inproceedings{Hayashi2017mar,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-014}
    • }
  •  Kim, S., Hori, T., Watanabe, S., "Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-016 PDF Video
    • @inproceedings{Kim2017mar,
    • author = {Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji},
    • title = {Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-016}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-011 PDF
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/SLT.2016.7846317, December 2016, pp. 552-558.
    BibTeX TR2016-163 PDF
    • @inproceedings{Hori2016dec,
    • author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and Le Roux, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
    • title = {Dialog State Tracking with Attention-based Sequence-to-sequence Learning},
    • booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
    • year = 2016,
    • pages = {552--558},
    • month = dec,
    • doi = {10.1109/SLT.2016.7846317},
    • url = {https://www.merl.com/publications/TR2016-163}
    • }