Publications

202 / 2,511 publications found.


  •  Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
  •  Le Roux, J., Vincent, E., Erdogan, H., "Learning- Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
  •  Zmolikova, K., Karafiat, M., Vesely, K., Delcroix, M., Watanabe, S., Burget, L., Cernocky, J.H., "Data selection by sequence summarizing neural network in mismatch condition training", Interspeech, DOI: 10.21437/Interspeech.2016-741, September 2016, pp. 2354-2358.
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Detection and Classification of Acoustic Scenes and Events Workshop, September 2016, pp. 35-39.
  •  Hori, C., Watanabe, S., Hori, T., Harsham, B.A., Hershey, J.R., Koji, Y., Fujii, Y., Furumoto, Y., "Driver Confusion Status Detection Using Recurrent Neural Networks", IEEE International Conference on Multimedia and Expo (ICME), DOI: 10.1109/ICME.2016.7552966, July 2016.
  •  Hershey, J.R.; Chen, Z.; Le Roux, J.; Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
    BibTeX Download PDFRead TR2016-003
    • @inproceedings{Hershey2016mar,
    • author = {Hershey, J.R. and Chen, Z. and {Le Roux}, J. and Watanabe, S.},
    • title = {Deep Clustering: Discriminative Embeddings for Segmentation and Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {31--35},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7471631},
    • url = {http://www.merl.com/publications/TR2016-003}
    • }
  •  Hori, T.; Hori, C.; Watanabe, S.; Hershey, J.R., "Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472827, March 2016, pp. 5990-5994.
    BibTeX Download PDFRead TR2016-011
    • @inproceedings{Hori2016mar,
    • author = {Hori, T. and Hori, C. and Watanabe, S. and Hershey, J.R.},
    • title = {Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {5990--5994},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7472827},
    • url = {http://www.merl.com/publications/TR2016-011}
    • }
  •  Vesely, K.; Watanabe, S.; Zmolikova, K.; Karafiat, M.; Burget, L.; Cernocky, J.H., "Sequence Summarizing Neural Network for Speaker Adaptation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472692, March 2016, pp. 5315-5319.
    BibTeX Download PDFRead TR2016-001
    • @inproceedings{Vesely2016mar,
    • author = {Vesely, K. and Watanabe, S. and Zmolikova, K. and Karafiat, M. and Burget, L. and Cernocky, J.H.},
    • title = {Sequence Summarizing Neural Network for Speaker Adaptation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {5315--5319},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7472692},
    • url = {http://www.merl.com/publications/TR2016-001}
    • }
  •  Wisdom, S.; Hershey, J.R.; Le Roux, J.; Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
    BibTeX Download PDFRead TR2016-008
    • @inproceedings{Wisdom2016mar,
    • author = {Wisdom, S. and Hershey, J.R. and {Le Roux}, J. and Watanabe, S.},
    • title = {Deep Unfolding for Multichannel Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {121--125},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7471649},
    • url = {http://www.merl.com/publications/TR2016-008}
    • }
  •  Xiao, X.; Watanabe, S.; Erdogan, H.; Lu, L.; Hershey, J.; Seltzer, M.; Chen, G.; Zhang, Y.; Mandel, M.; Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
    BibTeX Download PDFRead TR2016-002
    • @inproceedings{Xiao2016mar,
    • author = {Xiao, X. and Watanabe, S. and Erdogan, H. and Lu, L. and Hershey, J. and Seltzer, M. and Chen, G. and Zhang, Y. and Mandel, M. and Yu, D.},
    • title = {Deep Beamforming Networks for Multi-Channel Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2016,
    • pages = {5745--5749},
    • month = mar,
    • doi = {10.1109/ICASSP.2016.7472778},
    • url = {http://www.merl.com/publications/TR2016-002}
    • }
  •  Kanagawa, H.; Tachioka, Y.; Watanabe, S.; Ishii, J., "Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA), DOI: 10.1109/APSIPA.2015.7415425, December 2015, pp. 86-92.
    BibTeX Download PDFRead TR2015-150
    • @inproceedings{Kanagawa2015dec,
    • author = {Kanagawa, H. and Tachioka, Y. and Watanabe, S. and Ishii, J.},
    • title = {Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA)},
    • year = 2015,
    • pages = {86--92},
    • month = dec,
    • doi = {10.1109/APSIPA.2015.7415425},
    • url = {http://www.merl.com/publications/TR2015-150}
    • }
  •  Barker, J.; Marxer, R.; Vincent, E.; Watanabe, S., "The Third 'CHiME' Speech Separation and Recognition Challenge: Dataset, Task and Baselines", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.75404837, December 2015, pp. 504-511.
    BibTeX Download PDFRead TR2015-136
    • @inproceedings{Barker2015dec,
    • author = {Barker, J. and Marxer, R. and Vincent, E. and Watanabe, S.},
    • title = {The Third 'CHiME' Speech Separation and Recognition Challenge: Dataset, Task and Baselines},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2015,
    • pages = {504--511},
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/ASRU.2015.75404837},
    • url = {http://www.merl.com/publications/TR2015-136}
    • }
  •  Hori, T.; Chen, Z.; Erdogan, H.; Hershey, J.R.; Le Roux, J.; Mitra, V.; Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
    BibTeX Download PDFRead TR2015-135
    • @inproceedings{Hori2015dec2,
    • author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
    • title = {The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2015,
    • pages = {475--481},
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/ASRU.2015.7404833},
    • url = {http://www.merl.com/publications/TR2015-135}
    • }
  •  Hsiao, R.; Ma, J.; Hartmann, W.; Karafiat, M.; Grezl, F.; Burget, L.; Szoke, I.; Cernocky, J.; Watanabe, S.; Chen, Z.; Mallidi, S.H.; Hermansky, H.; Tsakalidis, S.; Schwartz, R., "Robust Speech Recognition in Unknown Reverberant and Noisy Conditions", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ARSU.2015.7404841, December 2015, pp. 533-538.
    BibTeX Download PDFRead TR2015-138
    • @inproceedings{Hsiao2015dec,
    • author = {Hsiao, R. and Ma, J. and Hartmann, W. and Karafiat, M. and Grezl, F. and Burget, L. and Szoke, I. and Cernocky, J. and Watanabe, S. and Chen, Z. and Mallidi, S.H. and Hermansky, H. and Tsakalidis, S. and Schwartz, R.},
    • title = {Robust Speech Recognition in Unknown Reverberant and Noisy Conditions},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2015,
    • pages = {533--538},
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/ARSU.2015.7404841},
    • url = {http://www.merl.com/publications/TR2015-138}
    • }
  •  Moriya, T.; Shinozaki, T.; Watanabe, S.; Duh, K., "Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404852, December 2015, pp. 610-616.
    BibTeX Download PDFRead TR2015-137
    • @inproceedings{Moriya2015dec,
    • author = {Moriya, T. and Shinozaki, T. and Watanabe, S. and Duh, K.},
    • title = {Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2015,
    • pages = {610--616},
    • month = dec,
    • doi = {10.1109/ASRU.2015.7404852},
    • url = {http://www.merl.com/publications/TR2015-137}
    • }
  •  Hori, C.; Hori, T.; Watanabe, S.; Hershey, J.R., "Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers", NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction , December 2015.
    BibTeX Download PDFRead TR2015-134
    • @inproceedings{Hori2015dec1,
    • author = {Hori, C. and Hori, T. and Watanabe, S. and Hershey, J.R.},
    • title = {Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers},
    • booktitle = {NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction},
    • year = 2015,
    • month = dec,
    • url = {http://www.merl.com/publications/TR2015-134}
    • }
  •  Tawara, N.; Ogawa, T.; Watanabe, S.; Nakamura, A.; Kobayashi, T., "A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/ATSIP.2015.19, ISSN: 2048-7703, Vol. 4, October 2015.
    BibTeX Download PDFRead TR2015-153
    • @article{Tawara2015oct,
    • author = {Tawara, N. and Ogawa, T. and Watanabe, S. and Nakamura, A. and Kobayashi, T.},
    • title = {A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data},
    • journal = {APSIPA Transactions on Signal and Information Processing},
    • year = 2015,
    • volume = 4,
    • month = oct,
    • doi = {10.1017/ATSIP.2015.19},
    • issn = {2048-7703},
    • url = {http://www.merl.com/publications/TR2015-153}
    • }
  •  Harsham, B.A.; Watanabe, S.; Esenther, A.; Hershey, J.R.; Le Roux, J.; Luan, Y.; Nikovski, D.N.; Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety, October 2015.
    BibTeX Download PDFRead TR2015-120
    • @inproceedings{Harsham2015oct,
    • author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
    • title = {Driver Prediction to Improve Interaction with In-Vehicle HMI},
    • booktitle = {Workshop on DSP for In-Vehicle Systems and Safety},
    • year = 2015,
    • month = oct,
    • url = {http://www.merl.com/publications/TR2015-120}
    • }
  •  Abdelaziz, A.H.; Watanabe, S.; Hershey, J.R.; Vincent, E.; Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, ISBN: 978-1-5108-1790-6, September 2015, vol. 1 or 5, pp. 3561.
    BibTeX Download PDFRead TR2015-098
    • @inproceedings{Abdelaziz2015sep,
    • author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
    • title = {Uncertainty Propagation Through Deep Neural Networks},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 or 5},
    • pages = 3561,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {http://www.merl.com/publications/TR2015-098}
    • }
  •  Chen, Z.; Watanabe, S.; Erdogan, H.; Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, ISBN: 978-1-5108-1790-6, September 2015, vol. 1 of 5, pp. 1278.
    BibTeX Download PDFRead TR2015-100
    • @inproceedings{Chen2015sep,
    • author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
    • title = {Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks},
    • booktitle = {Interspeech},
    • year = 2015,
    • volume = {1 of 5},
    • pages = 1278,
    • month = sep,
    • isbn = {978-1-5108-1790-6},
    • url = {http://www.merl.com/publications/TR2015-100}
    • }