Publications

73 / 3,591 publications found.


  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
    BibTeX TR2017-134 PDF
    • @inproceedings{Tachioka2017aug,
    • author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-134}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Joint CTC/attention decoding for end-to-end speech recognition", Association for Computational Linguistics (ACL), DOI: 10.18653/​v1/​P17-1048, July 2017, pp. 518-529.
    BibTeX TR2017-103 PDF Video
    • @inproceedings{Hori2017jul,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Joint CTC/attention decoding for end-to-end speech recognition},
    • booktitle = {Association for Computational Linguistics (ACL)},
    • year = 2017,
    • pages = {518--529},
    • month = jul,
    • doi = {10.18653/v1/P17-1048},
    • url = {https://www.merl.com/publications/TR2017-103}
    • }
  •  Watanabe, S., Hori, T., Hayashi, T., Kim, S., "End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model", Acoustical Society of Japan Spring Meeting (ASJ), March 2017.
    BibTeX TR2017-021 PDF
    • @inproceedings{Watanabe2017mar2,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hayashi, Tomoki and Kim, Suyoun},
    • title = {End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model},
    • booktitle = {Acoustical Society of Japan Spring Meeting (ASJ)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-021}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-011 PDF
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/​APSIPA.2016.7820724, December 2016.
    BibTeX TR2016-162 PDF
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
      },
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/​j.csl.2016.11.005, December 2016.
    BibTeX TR2016-172 PDF
    • @article{Vincent2016dec,
    • author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
    • title = {An analysis of environment, microphone and data simulation mismatches in robust speech recognition},
    • journal = {Computer Speech \& Language},
    • year = 2016,
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2016.11.005},
    • url = {https://www.merl.com/publications/TR2016-172}
    • }
  •  Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
    BibTeX TR2016-115 PDF
    • @techreport{Delcroix2016sep,
    • author = {Delcroix, Marc and Watanabe, Shinji},
    • title = {Recent Advances in Distant Speech Recognition},
    • booktitle = {Interspeech Tutorials},
    • institution = {Interspeech},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-115}
    • }
  •  Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
    BibTeX TR2016-113 PDF
    • @techreport{LeRoux2016sep,
    • author = {Le Roux, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
    • title = {Learning-Based Approaches to Speech Enhancement and Separation},
    • booktitle = {Interspeech Tutorials},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-113}
    • }
  •  Hsiao, R., Ma, J., Hartmann, W., Karafiat, M., Grezl, F., Burget, L., Szoke, I., Cernocky, J., Watanabe, S., Chen, Z., Mallidi, S.H., Hermansky, H., Tsakalidis, S., Schwartz, R., "Robust Speech Recognition in Unknown Reverberant and Noisy Conditions", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ARSU.2015.7404841, December 2015, pp. 533-538.
    BibTeX TR2015-138 PDF
    • @inproceedings{Hsiao2015dec,
    • author = {Hsiao, R. and Ma, J. and Hartmann, W. and Karafiat, M. and Grezl, F. and Burget, L. and Szoke, I. and Cernocky, J. and Watanabe, S. and Chen, Z. and Mallidi, S.H. and Hermansky, H. and Tsakalidis, S. and Schwartz, R.},
    • title = {Robust Speech Recognition in Unknown Reverberant and Noisy Conditions},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2015,
    • pages = {533--538},
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/ARSU.2015.7404841},
    • url = {https://www.merl.com/publications/TR2015-138}
    • }
  •  Harsham, B.A., Watanabe, S., Esenther, A., Hershey, J.R., Le Roux, J., Luan, Y., Nikovski, D.N., Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety (DSP), October 2015.
    BibTeX TR2015-120 PDF
    • @inproceedings{Harsham2015oct,
    • author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
    • title = {Driver Prediction to Improve Interaction with In-Vehicle HMI},
    • booktitle = {Workshop on DSP for In-Vehicle Systems and Safety (DSP)},
    • year = 2015,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2015-120}
    • }
  •  Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
    BibTeX TR2015-098 PDF
    • @inproceedings{Abdelaziz2015sep,
    • author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
    • title = {Uncertainty Propagation Through Deep Neural Networks},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 or 5},
    • pages = 3561,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {https://www.merl.com/publications/TR2015-098}
    • }
  •  Chen, Z., Watanabe, S., Erdogan, H., Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, September 2015, vol. 1 of 5, pp. 1278.
    BibTeX TR2015-100 PDF
    • @inproceedings{Chen2015sep,
    • author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
    • title = {Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 of 5},
    • pages = 1278,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {https://www.merl.com/publications/TR2015-100}
    • }
  •  Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
    BibTeX TR2015-099 PDF
    • @inproceedings{Tachioka2015sep,
    • author = {Tachioka, Y. and Watanabe, S.},
    • title = {Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 or 5},
    • pages = 3541,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {https://www.merl.com/publications/TR2015-099}
    • }
  •  Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/​978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
    BibTeX TR2015-094 PDF
    • @inproceedings{Weninger2015aug,
    • author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
    • title = {Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR},
    • booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
    • year = 2015,
    • volume = 9237,
    • pages = {91--99},
    • month = aug,
    • doi = {10.1007/978-3-319-22482-4_11},
    • isbn = {978-3-319-22482-4},
    • url = {https://www.merl.com/publications/TR2015-094}
    • }
  •  Tachioka, Y., Narita, T., Watanabe, S., "Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments", EURASIP Journal on Advances in Signal Processing, DOI: 10.1186/​s13634-015-0241-y, June 2015.
    BibTeX TR2015-152 PDF
    • @article{Tachioka2015jun,
    • author = {Tachioka, Y. and Narita, T. and Watanabe, S.},
    • title = {Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments},
    • journal = {EURASIP Journal on Advances in Signal Processing},
    • year = 2015,
    • month = jun,
    • doi = {10.1186/s13634-015-0241-y},
    • url = {https://www.merl.com/publications/TR2015-152}
    • }
  •  Le Roux, J., Vincent, E., Hershey, J.R., Ellis, D.P.W., "Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2015.7179050, April 2015, pp. 5635-5639.
    BibTeX TR2015-030 PDF
    • @inproceedings{LeRoux2015apr2,
    • author = {{Le Roux}, J. and Vincent, E. and Hershey, J.R. and Ellis, D.P.W.},
    • title = {Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2015,
    • pages = {5635--5639},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2015.7179050},
    • url = {https://www.merl.com/publications/TR2015-030}
    • }
  •  Tachioka, Y., Watanabe, S., "Discriminative Method for Recurrent Neural Network Language Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2015.7179000, April 2015, pp. 5386-5390.
    BibTeX TR2015-033 PDF
    • @inproceedings{Tachioka2015apr,
    • author = {Tachioka, Y. and Watanabe, S.},
    • title = {Discriminative Method for Recurrent Neural Network Language Models},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2015,
    • pages = {5386--5390},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2015.7179000},
    • url = {https://www.merl.com/publications/TR2015-033}
    • }
  •  Le Roux, J., Vincent, E., "A Categorization of Robust Speech Processing Datasets," Tech. Rep. TR2014-116, Mitsubishi Electric Research Laboratories, September 2014.
    BibTeX TR2014-116 PDF
    • @techreport{LeRouxVincent2014TRdatasets,
    • author = {{Le Roux}, J. and Vincent, E.},
    • title = {A Categorization of Robust Speech Processing Datasets},
    • institution = {Mitsubishi Electric Research Laboratories},
    • year = 2014,
    • number = {TR2014-116},
    • address = {Cambridge MA, USA},
    • month = sep,
    • note = {v2014-09},
    • url = {https://www.merl.com/publications/TR2014-116}
    • }
  •  Tachioka, Y., Narita, T., Weninger, F., Watanabe, S., "Dual system combination approach for various reverberant environments with dereverberation techniques", IEEE REVERB Workshop, May 2014.
    BibTeX TR2014-032 PDF
    • @inproceedings{Tachioka2014may,
    • author = {Tachioka, Y. and Narita, T. and Weninger, F. and Watanabe, S.},
    • title = {Dual system combination approach for various reverberant environments with dereverberation techniques},
    • booktitle = {IEEE REVERB Workshop},
    • year = 2014,
    • month = may,
    • url = {https://www.merl.com/publications/TR2014-032}
    • }
  •  Weninger, F., Watanabe, S., Le Roux, J., Hershey, J.R., Tachioka, Y., Geiger, J., Schuller, B., Rigoll, G., "The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement", IEEE REVERB Workshop, May 2014.
    BibTeX TR2014-033 PDF
    • @inproceedings{Weninger2014may2,
    • author = {Weninger, F. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R. and Tachioka, Y. and Geiger, J. and Schuller, B. and Rigoll, G.},
    • title = {The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement},
    • booktitle = {IEEE REVERB Workshop},
    • year = 2014,
    • month = may,
    • url = {https://www.merl.com/publications/TR2014-033}
    • }
  •  Watanabe, S., Le Roux, J., "Black Box Optimization for Automatic Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2014.6854202, May 2014, pp. 3256-3260.
    BibTeX TR2014-021 PDF
    • @inproceedings{Watanabe2014may,
    • author = {Watanabe, S. and {Le Roux}, J.},
    • title = {Black Box Optimization for Automatic Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2014,
    • pages = {3256--3260},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2014.6854202},
    • url = {https://www.merl.com/publications/TR2014-021}
    • }
  •  Weng, C., Yu, D., Watanabe, S., Juang, B.-H.F., "Recurrent Deep Neural Networks for Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2014.6854661, May 2014, pp. 5532-5536.
    BibTeX TR2014-023 PDF
    • @inproceedings{Weng2014may,
    • author = {Weng, C. and Yu, D. and Watanabe, S. and Juang, B.-H.F.},
    • title = {Recurrent Deep Neural Networks for Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2014,
    • pages = {5532--5536},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2014.6854661},
    • url = {https://www.merl.com/publications/TR2014-023}
    • }
  •  Weninger, F., Watanabe, S., Tachioka, Y., Schuller, B., "Deep Recurrent De-noising Auto-encoder and Blind De-reverberation for Reverberated Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2014.6854478, May 2014, pp. 4623-4627.
    BibTeX TR2014-022 PDF
    • @inproceedings{Weninger2014may1,
    • author = {Weninger, F. and Watanabe, S. and Tachioka, Y. and Schuller, B.},
    • title = {Deep Recurrent De-noising Auto-encoder and Blind De-reverberation for Reverberated Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2014,
    • pages = {4623--4627},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP.2014.6854478},
    • url = {https://www.merl.com/publications/TR2014-022}
    • }