Publications

Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Hori, C.., Hori, T., Watanabe, S., Hershey, J.R., "Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers", NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction, December 2015.
BibTeX TR2015-134 PDF
- @inproceedings{Hori2015dec1,
- author = {Hori, C. and Hori, T. and Watanabe, S. and Hershey, J.R.},
- title = {Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers},
- booktitle = {NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction},
- year = 2015,
- month = dec,
- url = {https://www.merl.com/publications/TR2015-134}
- }
Harsham, B.A., Watanabe, S., Esenther, A., Hershey, J.R., Le Roux, J., Luan, Y., Nikovski, D.N., Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety (DSP), October 2015.
BibTeX TR2015-120 PDF
- @inproceedings{Harsham2015oct,
- author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
- title = {Driver Prediction to Improve Interaction with In-Vehicle HMI},
- booktitle = {Workshop on DSP for In-Vehicle Systems and Safety (DSP)},
- year = 2015,
- month = oct,
- url = {https://www.merl.com/publications/TR2015-120}
- }
Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
BibTeX TR2015-098 PDF
- @inproceedings{Abdelaziz2015sep,
- author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
- title = {Uncertainty Propagation Through Deep Neural Networks},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3561,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-098}
- }
Chen, Z., Watanabe, S., Erdogan, H., Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, September 2015, vol. 1 of 5, pp. 1278.
BibTeX TR2015-100 PDF
- @inproceedings{Chen2015sep,
- author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
- title = {Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 of 5},
- pages = 1278,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-100}
- }
Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
BibTeX TR2015-094 PDF
- @inproceedings{Weninger2015aug,
- author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
- title = {Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR},
- booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
- year = 2015,
- volume = 9237,
- pages = {91--99},
- month = aug,
- doi = {10.1007/978-3-319-22482-4_11},
- isbn = {978-3-319-22482-4},
- url = {https://www.merl.com/publications/TR2015-094}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Le Roux, J., Hershey, J.R., Weninger, F.J., "Deep NMF for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7177933, April 2015, pp. 66-70.
BibTeX TR2015-029 PDF
- @inproceedings{LeRoux2015apr1,
- author = {{Le Roux}, J. and Hershey, J.R. and Weninger, F.J.},
- title = {Deep NMF for Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {66--70},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7177933},
- url = {https://www.merl.com/publications/TR2015-029}
- }
Le Roux, J., Vincent, E., Hershey, J.R., Ellis, D.P.W., "Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7179050, April 2015, pp. 5635-5639.
BibTeX TR2015-030 PDF
- @inproceedings{LeRoux2015apr2,
- author = {{Le Roux}, J. and Vincent, E. and Hershey, J.R. and Ellis, D.P.W.},
- title = {Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {5635--5639},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7179050},
- url = {https://www.merl.com/publications/TR2015-030}
- }
Le Roux, J., Weninger, F.J., Hershey, J.R., "Sparse NMF -- half-baked or well done?," Tech. Rep. TR2015-023, Mitsubishi Electric Research Laboratories, March 2015.
BibTeX TR2015-023 PDF
- @techreport{LeRoux2015mar,
- author = {{Le Roux}, J. and Weninger, F.J. and Hershey, J.R.},
- title = {Sparse NMF -- half-baked or well done?},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2015,
- number = {TR2015-023},
- address = {Cambridge MA, USA},
- month = mar,
- url = {https://www.merl.com/publications/TR2015-023}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Schuller, B., "Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation", IEEE Global Conference on Signal and Information Processing (GlobalSIP), DOI: 10.1109/GlobalSIP.2014.7032183, December 2014, pp. 577-581.
BibTeX TR2014-104 PDF
- @inproceedings{Weninger2014dec,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.},
- title = {Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation},
- booktitle = {IEEE Global Conference on Signal and Information Processing (GlobalSIP)},
- year = 2014,
- pages = {577--581},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GlobalSIP.2014.7032183},
- url = {https://www.merl.com/publications/TR2014-104}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition", Interspeech, September 2014, vol. 15, pp. 2415-2419.
BibTeX TR2014-079 PDF
- @inproceedings{Tachioka2014sep,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {2415--2419},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-079}
- }
Watanabe, S., Hershey, J.R., Marks, T.K., Fujii, Y., Koji, Y., "Cost-level integration of statistical and rule-based dialog managers", Interspeech, September 2014, vol. 15, pp. 323-327.
BibTeX TR2014-082 PDF
- @inproceedings{Watanabe2014sep,
- author = {Watanabe, S. and Hershey, J.R. and Marks, T.K. and Fujii, Y. and Koji, Y.},
- title = {Cost-level integration of statistical and rule-based dialog managers},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {323--327},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {308-457X},
- url = {https://www.merl.com/publications/TR2014-082}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Watanabe, S., "Discriminative NMF and its application to single-channel source separation", Interspeech, September 2014, vol. 15, pp. 865-869.
BibTeX TR2014-081 PDF
- @inproceedings{Weninger2014sep,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Watanabe, S.},
- title = {Discriminative NMF and its application to single-channel source separation},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {865--869},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-081}
- }
Hershey, J.R., Le Roux, J., Weninger, F., "Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures", arXiv, August 2014.
BibTeX arXiv
- @article{Hershey2014aug,
- author = {Hershey, J.R. and {Le Roux}, J. and Weninger, F.},
- title = {Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures},
- journal = {arXiv},
- year = 2014,
- month = aug,
- url = {https://arxiv.org/abs/1409.2574v1}
- }
Weninger, F., Watanabe, S., Le Roux, J., Hershey, J.R., Tachioka, Y., Geiger, J., Schuller, B., Rigoll, G., "The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-033 PDF
- @inproceedings{Weninger2014may2,
- author = {Weninger, F. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R. and Tachioka, Y. and Geiger, J. and Schuller, B. and Rigoll, G.},
- title = {The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-033}
- }
Simsekli, U., Le Roux, J., Hershey, J.R., "Non-negative Source-filter Dynamical System for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854797, May 2014, pp. 6206-6210.
BibTeX TR2014-027 PDF Software
- @inproceedings{Simsekli2014may,
- author = {Simsekli, U. and {Le Roux}, J. and Hershey, J.R.},
- title = {Non-negative Source-filter Dynamical System for Speech Enhancement},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {6206--6210},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854797},
- url = {https://www.merl.com/publications/TR2014-027}
- }
Tang, H., Watanabe, S., Marks, T.K., Hershey, J.R., "Log-linear Dialog Manager", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854371, May 2014, pp. 4092-4096.
BibTeX TR2014-024 PDF
- @inproceedings{Tang2014may,
- author = {Tang, H. and Watanabe, S. and Marks, T.K. and Hershey, J.R.},
- title = {Log-linear Dialog Manager},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {4092--4096},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854371},
- url = {https://www.merl.com/publications/TR2014-024}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "A Generalized Discriminative Training Framework for System Combination", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2013.6707703, December 2013, pp. 43-48.
BibTeX TR2013-118 PDF
- @inproceedings{Tachioka2013dec,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {A Generalized Discriminative Training Framework for System Combination},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2013,
- pages = {43--48},
- month = dec,
- doi = {10.1109/ASRU.2013.6707703},
- url = {https://www.merl.com/publications/TR2013-118}
- }
Potluru, V.K., Le Roux, J., Pearlmutter, B.A., Hershey, J.R., Brand, M., "Coordinate Descent for Mixed-norm NMF", NIPS Workshop on Greedy Algorithms, Frank-Wolfe and Friends - A Modern Perspective, December 2013.
BibTeX TR2013-130 PDF
- @inproceedings{Potluru2013dec,
- author = {Potluru, V.K. and {Le Roux}, J. and Pearlmutter, B.A. and Hershey, J.R. and Brand, M.},
- title = {Coordinate Descent for Mixed-norm NMF},
- booktitle = {NIPS Workshop on Greedy Algorithms, Frank-Wolfe and Friends - A Modern Perspective},
- year = 2013,
- month = dec,
- url = {https://www.merl.com/publications/TR2013-130}
- }
Le Roux, J., Watanabe, S., Hershey, J.R., "Ensemble Learning for Speech Enhancement", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2013.6701888, October 2013, pp. 1-4.
BibTeX TR2013-098 PDF
- @inproceedings{LeRoux2013oct,
- author = {{Le Roux}, J. and Watanabe, S. and Hershey, J.R.},
- title = {Ensemble Learning for Speech Enhancement},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2013,
- pages = {1--4},
- month = oct,
- doi = {10.1109/WASPAA.2013.6701888},
- issn = {1931-1168},
- url = {https://www.merl.com/publications/TR2013-098}
- }
Simsekli, U., Le Roux, J., Hershey, J.R., "Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2013.6701891, October 2013, pp. 1-4.
BibTeX TR2013-097 PDF
- @inproceedings{Simsekli2013oct,
- author = {Simsekli, U. and {Le Roux}, J. and Hershey, J.R.},
- title = {Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2013,
- pages = {1--4},
- month = oct,
- doi = {10.1109/WASPAA.2013.6701891},
- issn = {1931-1168},
- url = {https://www.merl.com/publications/TR2013-097}
- }
Yoshino, K., Watanabe, S., Le Roux, J., Hershey, J.R., "Statistical Dialogue Management using Intention Dependency Graph", International Joint Conference on Natural Language Processing (IJCNLP), October 2013.
BibTeX TR2013-096 PDF
- @inproceedings{Yoshino2013oct,
- author = {Yoshino, K. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Statistical Dialogue Management using Intention Dependency Graph},
- booktitle = {International Joint Conference on Natural Language Processing (IJCNLP)},
- year = 2013,
- month = oct,
- url = {https://www.merl.com/publications/TR2013-096}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Discriminative Methods for Noise Robust Speech Recognition: A CHiME Challenge Benchmark", International Workshop on Machine Listening in Multisource Environments (CHiME), June 2013.
BibTeX TR2013-044 PDF
- @inproceedings{Tachioka2013jun,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Discriminative Methods for Noise Robust Speech Recognition: A CHiME Challenge Benchmark},
- booktitle = {International Workshop on Machine Listening in Multisource Environments (CHiME)},
- year = 2013,
- month = jun,
- url = {https://www.merl.com/publications/TR2013-044}
- }
Fevotte, C., Le Roux, J., Hershey, J.R., "Non-negative Dynamical System with Application to Speech and Audio", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2013.
BibTeX TR2013-021 PDF Software
- @inproceedings{Fevotte2013may,
- author = {Fevotte, C. and {Le Roux}, J. and Hershey, J.R.},
- title = {Non-negative Dynamical System with Application to Speech and Audio},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- month = may,
- url = {https://www.merl.com/publications/TR2013-021}
- }