Publications

241 / 2,863 publications found.


  •  Seki, H., Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462180, April 2018, pp. 4919-4923.
    BibTeX Download PDFAbout TR2018-002
    • @inproceedings{Seki2018apr,
    • author = {Seki, Hiroshi and Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4919--4923},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462180},
    • url = {https://www.merl.com/publications/TR2018-002}
    • }
  •  Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461893, April 2018, pp. 4819-4823.
    BibTeX Download PDFAbout TR2018-001
    • @inproceedings{Settle2018apr,
    • author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {End-to-End Multi-Speaker Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4819--4823},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461893},
    • url = {https://www.merl.com/publications/TR2018-001}
    • }
  •  Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Alternative Objective Functions for Deep Clustering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462507, April 2018, pp. 686-690.
    BibTeX Download PDFAbout TR2018-005
    • @inproceedings{Wang2018apr,
    • author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
    • title = {Alternative Objective Functions for Deep Clustering},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {686--690},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462507},
    • url = {https://www.merl.com/publications/TR2018-005}
    • }
  •  Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461639, April 2018, pp. 1-5.
    BibTeX Download PDFAbout TR2018-007
    • @inproceedings{Wang2018apr2,
    • author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
    • title = {Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {1--5},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461639},
    • url = {https://www.merl.com/publications/TR2018-007}
    • }
  •  Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Enrique Yalta Soplin, N., Heymann, J., Wiesner, M., Chen, N., Renduchintala, A., Ochiai, T., "ESPnet: End-to-End Speech Processing Toolkit," Tech. Rep. TR2018-036, arXiv, March 2018.
    BibTeX Download PDFAbout TR2018-036
    • @techreport{Watanabe2018mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Karita, Shigeki and Hayashi, Tomoki and Nishitoba, Jiro and Unno, Yuya and Enrique Yalta Soplin, Nelson and Heymann, Jahn and Wiesner, Matthew and Chen, Nanxin and Renduchintala, Adithya and Ochiai, Tsubasa},
    • title = {ESPnet: End-to-End Speech Processing Toolkit},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2018-036}
    • }
  •  Hori, C., Hori, T., "End-to-end Conversation Modeling Track in DSTC6", Dialog System Technology Challenges, December 2017.
    BibTeX Download PDFAbout TR2017-188
    • @inproceedings{Hori2017dec3,
    • author = {Hori, Chiori and Hori, Takaaki},
    • title = {End-to-end Conversation Modeling Track in DSTC6},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-188}
    • }
  •  Hori, C., Hori, T., Marks, T.K., Hershey, J.R., "Early and Late Integration of Audio Features for Automatic Video Description", IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), DOI: 10.1109/ASRU.2017.8268968, December 2017.
    BibTeX Download PDFAbout TR2017-183
    • @inproceedings{Hori2017dec2,
    • author = {Hori, Chiori and Hori, Takaaki and Marks, Tim K. and Hershey, John R.},
    • title = {Early and Late Integration of Audio Features for Automatic Video Description},
    • booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268968},
    • url = {https://www.merl.com/publications/TR2017-183}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268948, December 2017.
    BibTeX Download PDFAbout TR2017-181
    • @inproceedings{Hori2017dec,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268948},
    • url = {https://www.merl.com/publications/TR2017-181}
    • }
  •  Watanabe, S., Hori, T., Hershey, J.R., "Language Independent End-to-End Architecture For Joint Language and Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268945, December 2017.
    BibTeX Download PDFAbout TR2017-182
    • @inproceedings{Watanabe2017dec,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Language Independent End-to-End Architecture For Joint Language and Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268945},
    • url = {https://www.merl.com/publications/TR2017-182}
    • }
  •  Wang, W., Koji, Y., Harsham, B.A., Hori, T., Hershey, J.R., "Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models", Dialog System Technology Challenges, December 2017.
    BibTeX Download PDFAbout TR2017-180
    • @inproceedings{Wang2017dec,
    • author = {Wang, Wen and Koji, Yusuke and Harsham, Bret A. and Hori, Takaaki and Hershey, John R.},
    • title = {Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-180}
    • }
  •  Hori, C., Hori, T., Lee, T.-Y., Zhang, Z., Harsham, B.A., Sumi, K., Marks, T.K., Hershey, J.R., "Attention-Based Multimodal Fusion for Video Description", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV.2017.450, October 2017.
    BibTeX Download PDFAbout TR2017-156
    • @inproceedings{Hori2017oct,
    • author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Zhang, Ziming and Harsham, Bret A. and Sumi, Kazuhiko and Marks, Tim K. and Hershey, John R.},
    • title = {Attention-Based Multimodal Fusion for Video Description},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/ICCV.2017.450},
    • url = {https://www.merl.com/publications/TR2017-156}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX Download PDFAbout TR2017-139
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, ISSN: 1941-0484, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX Download PDFAbout TR2017-192
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, ISSN: 1941-0484, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX Download PDFAbout TR2017-190
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Magron, P., Le Roux, J., Virtanen, T., "Consistent Anisotropic Wiener Filtering for Audio Source", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2017.8170037, October 2017.
    BibTeX Download PDFAbout TR2017-151
    • @inproceedings{Magron2017oct,
    • author = {Magron, Paul and Le Roux, Jonathan and Virtanen, Tuomas},
    • title = {Consistent Anisotropic Wiener Filtering for Audio Source},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/WASPAA.2017.8170037},
    • url = {https://www.merl.com/publications/TR2017-151}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX Download PDFAbout TR2017-132
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Shinozaki, T., Watanabe, S., Mochihashi, D., Neubig, G., "Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text", Interspeech, August 2017.
    BibTeX Download PDFAbout TR2017-133
    • @inproceedings{Shinozaki2017aug,
    • author = {Shinozaki, Takahiro and Watanabe, Shinji and Mochihashi, Daichi and Neubig, Graham},
    • title = {Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-133}
    • }
  •  Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
    BibTeX Download PDFAbout TR2017-134
    • @inproceedings{Tachioka2017aug,
    • author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-134}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, ISSN: 2329-9304, Vol. 25, No. 11, August 2017.
    BibTeX Download PDFAbout TR2017-150
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX Download PDFAbout TR2017-107
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }