Publications

Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
BibTeX TR2015-094 PDF
- @inproceedings{Weninger2015aug,
- author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
- title = {Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR},
- booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
- year = 2015,
- volume = 9237,
- pages = {91--99},
- month = aug,
- doi = {10.1007/978-3-319-22482-4_11},
- isbn = {978-3-319-22482-4},
- url = {https://www.merl.com/publications/TR2015-094}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Le Roux, J., Hershey, J.R., Weninger, F.J., "Deep NMF for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7177933, April 2015, pp. 66-70.
BibTeX TR2015-029 PDF
- @inproceedings{LeRoux2015apr1,
- author = {{Le Roux}, J. and Hershey, J.R. and Weninger, F.J.},
- title = {Deep NMF for Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {66--70},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7177933},
- url = {https://www.merl.com/publications/TR2015-029}
- }
Le Roux, J., Vincent, E., Hershey, J.R., Ellis, D.P.W., "Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7179050, April 2015, pp. 5635-5639.
BibTeX TR2015-030 PDF
- @inproceedings{LeRoux2015apr2,
- author = {{Le Roux}, J. and Vincent, E. and Hershey, J.R. and Ellis, D.P.W.},
- title = {Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {5635--5639},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7179050},
- url = {https://www.merl.com/publications/TR2015-030}
- }
Le Roux, J., Weninger, F.J., Hershey, J.R., "Sparse NMF -- half-baked or well done?," Tech. Rep. TR2015-023, Mitsubishi Electric Research Laboratories, March 2015.
BibTeX TR2015-023 PDF
- @techreport{LeRoux2015mar,
- author = {{Le Roux}, J. and Weninger, F.J. and Hershey, J.R.},
- title = {Sparse NMF -- half-baked or well done?},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2015,
- number = {TR2015-023},
- address = {Cambridge MA, USA},
- month = mar,
- url = {https://www.merl.com/publications/TR2015-023}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Schuller, B., "Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation", IEEE Global Conference on Signal and Information Processing (GlobalSIP), DOI: 10.1109/GlobalSIP.2014.7032183, December 2014, pp. 577-581.
BibTeX TR2014-104 PDF
- @inproceedings{Weninger2014dec,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.},
- title = {Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation},
- booktitle = {IEEE Global Conference on Signal and Information Processing (GlobalSIP)},
- year = 2014,
- pages = {577--581},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GlobalSIP.2014.7032183},
- url = {https://www.merl.com/publications/TR2014-104}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition", Interspeech, September 2014, vol. 15, pp. 2415-2419.
BibTeX TR2014-079 PDF
- @inproceedings{Tachioka2014sep,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {2415--2419},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-079}
- }
Watanabe, S., Hershey, J.R., Marks, T.K., Fujii, Y., Koji, Y., "Cost-level integration of statistical and rule-based dialog managers", Interspeech, September 2014, vol. 15, pp. 323-327.
BibTeX TR2014-082 PDF
- @inproceedings{Watanabe2014sep,
- author = {Watanabe, S. and Hershey, J.R. and Marks, T.K. and Fujii, Y. and Koji, Y.},
- title = {Cost-level integration of statistical and rule-based dialog managers},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {323--327},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {308-457X},
- url = {https://www.merl.com/publications/TR2014-082}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Watanabe, S., "Discriminative NMF and its application to single-channel source separation", Interspeech, September 2014, vol. 15, pp. 865-869.
BibTeX TR2014-081 PDF
- @inproceedings{Weninger2014sep,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Watanabe, S.},
- title = {Discriminative NMF and its application to single-channel source separation},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {865--869},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-081}
- }
Hershey, J.R., Le Roux, J., Weninger, F., "Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures", arXiv, August 2014.
BibTeX arXiv
- @article{Hershey2014aug,
- author = {Hershey, J.R. and {Le Roux}, J. and Weninger, F.},
- title = {Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures},
- journal = {arXiv},
- year = 2014,
- month = aug,
- url = {https://arxiv.org/abs/1409.2574v1}
- }
Weninger, F., Watanabe, S., Le Roux, J., Hershey, J.R., Tachioka, Y., Geiger, J., Schuller, B., Rigoll, G., "The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-033 PDF
- @inproceedings{Weninger2014may2,
- author = {Weninger, F. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R. and Tachioka, Y. and Geiger, J. and Schuller, B. and Rigoll, G.},
- title = {The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-033}
- }
Simsekli, U., Le Roux, J., Hershey, J.R., "Non-negative Source-filter Dynamical System for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854797, May 2014, pp. 6206-6210.
BibTeX TR2014-027 PDF Software
- @inproceedings{Simsekli2014may,
- author = {Simsekli, U. and {Le Roux}, J. and Hershey, J.R.},
- title = {Non-negative Source-filter Dynamical System for Speech Enhancement},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {6206--6210},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854797},
- url = {https://www.merl.com/publications/TR2014-027}
- }
Tang, H., Watanabe, S., Marks, T.K., Hershey, J.R., "Log-linear Dialog Manager", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854371, May 2014, pp. 4092-4096.
BibTeX TR2014-024 PDF
- @inproceedings{Tang2014may,
- author = {Tang, H. and Watanabe, S. and Marks, T.K. and Hershey, J.R.},
- title = {Log-linear Dialog Manager},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {4092--4096},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854371},
- url = {https://www.merl.com/publications/TR2014-024}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "A Generalized Discriminative Training Framework for System Combination", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2013.6707703, December 2013, pp. 43-48.
BibTeX TR2013-118 PDF
- @inproceedings{Tachioka2013dec,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {A Generalized Discriminative Training Framework for System Combination},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2013,
- pages = {43--48},
- month = dec,
- doi = {10.1109/ASRU.2013.6707703},
- url = {https://www.merl.com/publications/TR2013-118}
- }
Potluru, V.K., Le Roux, J., Pearlmutter, B.A., Hershey, J.R., Brand, M., "Coordinate Descent for Mixed-norm NMF", NIPS Workshop on Greedy Algorithms, Frank-Wolfe and Friends - A Modern Perspective, December 2013.
BibTeX TR2013-130 PDF
- @inproceedings{Potluru2013dec,
- author = {Potluru, V.K. and {Le Roux}, J. and Pearlmutter, B.A. and Hershey, J.R. and Brand, M.},
- title = {Coordinate Descent for Mixed-norm NMF},
- booktitle = {NIPS Workshop on Greedy Algorithms, Frank-Wolfe and Friends - A Modern Perspective},
- year = 2013,
- month = dec,
- url = {https://www.merl.com/publications/TR2013-130}
- }
Le Roux, J., Watanabe, S., Hershey, J.R., "Ensemble Learning for Speech Enhancement", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2013.6701888, October 2013, pp. 1-4.
BibTeX TR2013-098 PDF
- @inproceedings{LeRoux2013oct,
- author = {{Le Roux}, J. and Watanabe, S. and Hershey, J.R.},
- title = {Ensemble Learning for Speech Enhancement},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2013,
- pages = {1--4},
- month = oct,
- doi = {10.1109/WASPAA.2013.6701888},
- issn = {1931-1168},
- url = {https://www.merl.com/publications/TR2013-098}
- }
Simsekli, U., Le Roux, J., Hershey, J.R., "Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2013.6701891, October 2013, pp. 1-4.
BibTeX TR2013-097 PDF
- @inproceedings{Simsekli2013oct,
- author = {Simsekli, U. and {Le Roux}, J. and Hershey, J.R.},
- title = {Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2013,
- pages = {1--4},
- month = oct,
- doi = {10.1109/WASPAA.2013.6701891},
- issn = {1931-1168},
- url = {https://www.merl.com/publications/TR2013-097}
- }
Yoshino, K., Watanabe, S., Le Roux, J., Hershey, J.R., "Statistical Dialogue Management using Intention Dependency Graph", International Joint Conference on Natural Language Processing (IJCNLP), October 2013.
BibTeX TR2013-096 PDF
- @inproceedings{Yoshino2013oct,
- author = {Yoshino, K. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Statistical Dialogue Management using Intention Dependency Graph},
- booktitle = {International Joint Conference on Natural Language Processing (IJCNLP)},
- year = 2013,
- month = oct,
- url = {https://www.merl.com/publications/TR2013-096}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Discriminative Methods for Noise Robust Speech Recognition: A CHiME Challenge Benchmark", International Workshop on Machine Listening in Multisource Environments (CHiME), June 2013.
BibTeX TR2013-044 PDF
- @inproceedings{Tachioka2013jun,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Discriminative Methods for Noise Robust Speech Recognition: A CHiME Challenge Benchmark},
- booktitle = {International Workshop on Machine Listening in Multisource Environments (CHiME)},
- year = 2013,
- month = jun,
- url = {https://www.merl.com/publications/TR2013-044}
- }
Fevotte, C., Le Roux, J., Hershey, J.R., "Non-negative Dynamical System with Application to Speech and Audio", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2013.
BibTeX TR2013-021 PDF Software
- @inproceedings{Fevotte2013may,
- author = {Fevotte, C. and {Le Roux}, J. and Hershey, J.R.},
- title = {Non-negative Dynamical System with Application to Speech and Audio},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- month = may,
- url = {https://www.merl.com/publications/TR2013-021}
- }
Le Roux, J., Boufounos, P.T., Kang, K., Hershey, J.R., "Source Localization in Reverberant Environments using Sparse Optimization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2013.6638473, May 2013, pp. 4310-4314.
BibTeX TR2013-022 PDF
- @inproceedings{LeRoux2013may,
- author = {{Le Roux}, J. and Boufounos, P.T. and Kang, K. and Hershey, J.R.},
- title = {Source Localization in Reverberant Environments using Sparse Optimization},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- pages = {4310--4314},
- month = may,
- doi = {10.1109/ICASSP.2013.6638473},
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2013-022}
- }
Tachioka, Y., Watanabe, S., Hershey, J.R., "Effectiveness of Discriminative Training and Feature Transformation for Reverberated and Noisy Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2013.
BibTeX TR2013-020 PDF
- @inproceedings{Tachioka2013may,
- author = {Tachioka, Y. and Watanabe, S. and Hershey, J.R.},
- title = {Effectiveness of Discriminative Training and Feature Transformation for Reverberated and Noisy Speech},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- month = may,
- url = {https://www.merl.com/publications/TR2013-020}
- }
Watanabe, S., Hershey, J.R., "Stereo-based Feature Enhancement Using Dictionary Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2013.6639034, May 2013, pp. 7073-7077.
BibTeX TR2013-019 PDF
- @inproceedings{Watanabe2013may,
- author = {Watanabe, S. and Hershey, J.R.},
- title = {Stereo-based Feature Enhancement Using Dictionary Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- pages = {7073--7077},
- month = may,
- doi = {10.1109/ICASSP.2013.6639034},
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2013-019}
- }
Hershey, J.R., Rennie, S.J., Le Roux, J., "Factorial Models for Noise Robust Speech Recognition" in Techniques for Noise Robustness in Automatic Speech Recognition, Virtanen, T. and Singh, R. and Raj, B., Eds., chapter 12, Wiley, November 2012.
BibTeX TR2012-002 PDF
- @incollection{Hershey2012nov,
- author = {Hershey, J.R. and Rennie, S.J. and {Le Roux}, J.},
- title = {Factorial Models for Noise Robust Speech Recognition},
- booktitle = {Techniques for Noise Robustness in Automatic Speech Recognition},
- year = 2012,
- editor = {Virtanen, T. and Singh, R. and Raj, B.},
- chapter = 12,
- month = nov,
- publisher = {Wiley},
- url = {https://www.merl.com/publications/TR2012-002}
- }
Heaukulani, C., Le Roux, J., Hershey, J.R., "Latent Dirichlet Reallocation for Term Swapping", International Workshop on Statistical Machine Learning for Speech Processing (IWSML), March 2012.
BibTeX TR2012-022 PDF
- @inproceedings{Heaukulani2012mar,
- author = {Heaukulani, C. and {Le Roux}, J. and Hershey, J.R.},
- title = {Latent Dirichlet Reallocation for Term Swapping},
- booktitle = {International Workshop on Statistical Machine Learning for Speech Processing (IWSML)},
- year = 2012,
- month = mar,
- url = {https://www.merl.com/publications/TR2012-022}
- }