Publications

241 / 2,868 publications found.


  •  Hori, T., Watanabe, S., Hershey, J.R., "Joint CTC/attention decoding for end-to-end speech recognition", Association for Computational Linguistics (ACL), DOI: 10.18653/v1/P17-1048, July 2017, pp. 518-529.
    BibTeX Download PDFAbout TR2017-103
    • @inproceedings{Hori2017jul,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Joint CTC/attention decoding for end-to-end speech recognition},
    • booktitle = {Association for Computational Linguistics (ACL)},
    • year = 2017,
    • pages = {518--529},
    • month = jul,
    • doi = {10.18653/v1/P17-1048},
    • url = {https://www.merl.com/publications/TR2017-103}
    • }
  •  Watanabe, S., Hori, T., Hayashi, T., Kim, S., "End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model", Acoustical Society of Japan Spring Meeting (ASJ), March 2017.
    BibTeX Download PDFAbout TR2017-021
    • @inproceedings{Watanabe2017mar2,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hayashi, Tomoki and Kim, Suyoun},
    • title = {End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model},
    • booktitle = {Acoustical Society of Japan Spring Meeting (ASJ)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-021}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", Tech. Rep. TR2017-035, Mitsubishi Electric Research Laboratories, Cambridge, MA, March 2017.
    BibTeX Download PDFAbout TR2017-035
    • @techreport{MERL_TR2017-035,
    • author = {Ochiai, T.; Watanabe, S.; Hori, T.; Hershey, J.R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • institution = {MERL - Mitsubishi Electric Research Laboratories},
    • address = {Cambridge, MA 02139},
    • number = {TR2017-035},
    • month = mar,
    • year = 2017,
    • url = {https://www.merl.com/publications/TR2017-035/}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX Download PDFAbout TR2017-014
    • @inproceedings{Hayashi2017mar,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-014}
    • }
  •  Kim, S., Hori, T., Watanabe, S., "Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX Download PDFAbout TR2017-016
    • @inproceedings{Kim2017mar,
    • author = {Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji},
    • title = {Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-016}
    • }
  •  Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX Download PDFAbout TR2017-010
    • @inproceedings{Luo2017mar,
    • author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and Le Roux, Jonathan and Mesgarani, Nima},
    • title = {Deep Clustering and Conventional Networks for Music Separation: Strong Together},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-010}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX Download PDFAbout TR2017-012
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX Download PDFAbout TR2017-011
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Hori, C., Hori, T., Lee, T.-Y., Sumi, K., Hershey, J.R., Marks, T.K., "Attention-Based Multimodal Fusion for Video Description," Tech. Rep. TR2017-008, arXiv, January 2017.
    BibTeX Download PDFAbout TR2017-008
    • @techreport{Hori2017jan,
    • author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Sumi, Kazuhiko and Hershey, John R. and Marks, Tim K.},
    • title = {Attention-Based Multimodal Fusion for Video Description},
    • journal = {arXiv},
    • year = 2017,
    • month = jan,
    • url = {https://www.merl.com/publications/TR2017-008}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asian-Pacific Signal and Information Processing Association Annual Summit and Conference, DOI: 10.1109/APSIPA.2016.7820724, December 2016.
    BibTeX Download PDFAbout TR2016-162
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
    • booktitle = {Asian-Pacific Signal and Information Processing Association Annual Summit and Conference},
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/SLT.2016.7846317, December 2016, pp. 552-558.
    BibTeX Download PDFAbout TR2016-163
    • @inproceedings{Hori2016dec,
    • author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and Le Roux, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
    • title = {Dialog State Tracking with Attention-based Sequence-to-sequence Learning},
    • booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
    • year = 2016,
    • pages = {552--558},
    • month = dec,
    • doi = {10.1109/SLT.2016.7846317},
    • url = {https://www.merl.com/publications/TR2016-163}
    • }
  •  Takano, T., Moriya, T., Shinozaki, T., Watanabe, S., Hori, T., Duh, K., "Automated structure discovery and parameter tuning of neural network language model based on evolution strategy", IEEE Spoken Language Technology (SLT) Workshop, DOI: 10.1109/SLT.2016.7846334, December 2016.
    BibTeX Download PDFAbout TR2016-173
    • @inproceedings{Takano2016dec,
    • author = {Takano, Tomihiro and Moriya, Takafumi and Shinozaki, Takahiro and Watanabe, Shinji and Hori, Takaaki and Duh, Kevin},
    • title = {Automated structure discovery and parameter tuning of neural network language model based on evolution strategy},
    • booktitle = {IEEE Spoken Language Technology (SLT) Workshop},
    • year = 2016,
    • month = dec,
    • doi = {10.1109/SLT.2016.7846334},
    • url = {https://www.merl.com/publications/TR2016-173}
    • }
  •  Barker, J., Marxer, R., Vincent, E., Watanabe, S., "The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes", Computer Speech & Language, DOI: 10.1016/j.csl.2016.10.005, December 2016.
    BibTeX Download PDFAbout TR2016-171
    • @article{Barker2016dec,
    • author = {Barker, Jon and Marxer, Ricard and Vincent, Emmanuel and Watanabe, Shinji},
    • title = {The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes},
    • journal = {Computer Speech & Language},
    • year = 2016,
    • month = dec,
    • doi = {10.1016/j.csl.2016.10.005},
    • url = {https://www.merl.com/publications/TR2016-171}
    • }
  •  Wisdom, S., Powers, T., Hershey, J.R., Le Roux, J., Atlas, L., "Full-Capacity Unitary Recurrent Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2016.
    BibTeX Download PDFAbout TR2016-155
    • @inproceedings{Wisdom2016dec,
    • author = {Wisdom, Scott and Powers, Thomas and Hershey, John R. and Le Roux, Jonathan and Atlas, Les},
    • title = {Full-Capacity Unitary Recurrent Neural Networks},
    • booktitle = {Advances in Neural Information Processing Systems (NIPS)},
    • year = 2016,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2016-155}
    • }
  •  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/j.csl.2016.11.005, December 2016.
    BibTeX Download PDFAbout TR2016-172
    • @article{Vincent2016dec,
    • author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
    • title = {An analysis of environment, microphone and data simulation mismatches in robust speech recognition},
    • journal = {Computer Speech & Language},
    • year = 2016,
    • month = dec,
    • doi = {10.1016/j.csl.2016.11.005},
    • url = {https://www.merl.com/publications/TR2016-172}
    • }
  •  Tawara, N., Ogawa, T., Watanabe, S., Kobayashi, T., "Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/ATSIP.2016.15, Vol. 5, October 2016.
    BibTeX Download PDFAbout TR2016-138
    • @article{Tawara2016oct,
    • author = {Tawara, Naohiro and Ogawa, Tetsuji and Watanabe, Shinji and Kobayashi, Tetsunori},
    • title = {Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering},
    • journal = {APSIPA Transactions on Signal and Information Processing},
    • year = 2016,
    • volume = 5,
    • month = oct,
    • doi = {10.1017/ATSIP.2016.15},
    • url = {https://www.merl.com/publications/TR2016-138}
    • }
  •  Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
    BibTeX Download PDFAbout TR2016-115
    • @techreport{Delcroix2016sep,
    • author = {Delcroix, Marc and Watanabe, Shinji},
    • title = {Recent Advances in Distant Speech Recognition},
    • booktitle = {Interspeech Tutorials},
    • institution = {Interspeech},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-115}
    • }
  •  Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
    BibTeX Download PDFAbout TR2016-113
    • @techreport{LeRoux2016sep,
    • author = {Le Roux, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
    • title = {Learning-Based Approaches to Speech Enhancement and Separation},
    • booktitle = {Interspeech Tutorials},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-113}
    • }
  •  Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
    BibTeX Download PDFAbout TR2016-072
    • @inproceedings{Erdogan2016sep,
    • author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and Le Roux, Jonathan},
    • title = {Improved MVDR beamforming using single-channel mask prediction networks},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {1981--1985},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-552},
    • url = {https://www.merl.com/publications/TR2016-072}
    • }
  •  Hori, C., Hori, T., Watanabe, S., Hershey, J.R., "Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs", Interspeech, DOI: 10.21437/Interspeech.2016-1171, September 2016, pp. 3236-3240.
    BibTeX Download PDFAbout TR2016-074
    • @inproceedings{Hori2016sep,
    • author = {Hori, Chiori and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {3236--3240},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-1171},
    • url = {https://www.merl.com/publications/TR2016-074}
    • }
  •  Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
    BibTeX Download PDFAbout TR2016-073
    • @inproceedings{Isik2016sep,
    • author = {Isik, Yusuf and Le Roux, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
    • title = {Single-Channel Multi-Speaker Separation using Deep Clustering},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {545--549},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-1176},
    • url = {https://www.merl.com/publications/TR2016-073}
    • }
  •  Zmolikova, K., Karafiat, M., Vesely, K., Delcroix, M., Watanabe, S., Burget, L., Cernocky, J.H., "Data selection by sequence summarizing neural network in mismatch condition training", Interspeech, DOI: 10.21437/Interspeech.2016-741, September 2016, pp. 2354-2358.
    BibTeX Download PDFAbout TR2016-075
    • @inproceedings{Zmolikova2016sep,
    • author = {Zmolikova, Katerina and Karafiat, Martin and Vesely, Karel and Delcroix, Marc and Watanabe, Shinji and Burget, Lukas and Cernocky, Jan, Honza},
    • title = {Data selection by sequence summarizing neural network in mismatch condition training},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {2354--2358},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-741},
    • url = {https://www.merl.com/publications/TR2016-075}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Detection and Classification of Acoustic Scenes and Events Workshop, September 2016, pp. 35-39.
    BibTeX Download PDFAbout TR2016-114
    • @inproceedings{Hayashi2016sep,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection},
    • booktitle = {Detection and Classification of Acoustic Scenes and Events Workshop},
    • year = 2016,
    • pages = {35--39},
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-114}
    • }
  •  Hori, C., Watanabe, S., Hori, T., Harsham, B.A., Hershey, J.R., Koji, Y., Fujii, Y., Furumoto, Y., "Driver Confusion Status Detection Using Recurrent Neural Networks", IEEE International Conference on Multimedia and Expo (ICME), DOI: 10.1109/ICME.2016.7552966, July 2016.
    BibTeX Download PDFAbout TR2016-088
    • @inproceedings{Hori2016jul,
    • author = {Hori, Chiori and Watanabe, Shinji and Hori, Takaaki and Harsham, Bret A. and Hershey, John R. and Koji, Yusuke and Fujii, Youichi and Furumoto, Yuki},
    • title = {Driver Confusion Status Detection Using Recurrent Neural Networks},
    • booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
    • year = 2016,
    • month = jul,
    • doi = {10.1109/ICME.2016.7552966},
    • url = {https://www.merl.com/publications/TR2016-088}
    • }
  •  Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
    BibTeX Download PDFAbout TR2016-003
    • @inproceedings{Hershey2016mar,
    • author = {Hershey, John R. and Chen, Zhuo and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {Deep Clustering: Discriminative Embeddings for Segmentation and Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {31--35},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7471631},
    • url = {https://www.merl.com/publications/TR2016-003}
    • }