Publications

Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Alternative Objective Functions for Deep Clustering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462507, April 2018, pp. 686-690.
BibTeX TR2018-005 PDF
- @inproceedings{Wang2018apr,
- author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
- title = {Alternative Objective Functions for Deep Clustering},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {686--690},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462507},
- url = {https://www.merl.com/publications/TR2018-005}
- }
Magron, P., Le Roux, J., Virtanen, T., "Consistent Anisotropic Wiener Filtering for Audio Source", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2017.8170037, October 2017.
BibTeX TR2017-151 PDF
- @inproceedings{Magron2017oct,
- author = {Magron, Paul and Le Roux, Jonathan and Virtanen, Tuomas},
- title = {Consistent Anisotropic Wiener Filtering for Audio Source},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2017,
- month = oct,
- doi = {10.1109/WASPAA.2017.8170037},
- url = {https://www.merl.com/publications/TR2017-151}
- }
Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
BibTeX TR2017-134 PDF
- @inproceedings{Tachioka2017aug,
- author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-134}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
BibTeX TR2017-150 PDF
- @article{Hayashi2017aug,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
- title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2017,
- volume = 25,
- number = 11,
- month = aug,
- doi = {10.1109/TASLP.2017.2740002},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2017-150}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-014 PDF
- @inproceedings{Hayashi2017mar,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
- title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-014}
- }
Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-010 PDF
- @inproceedings{Luo2017mar,
- author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and Le Roux, Jonathan and Mesgarani, Nima},
- title = {Deep Clustering and Conventional Networks for Music Separation: Strong Together},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-010}
- }
Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-011 PDF
- @inproceedings{Watanabe2017mar,
- author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
- title = {Student-Teacher Network Learning with Enhanced Features},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-011}
- }
Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/SLT.2016.7846317, December 2016, pp. 552-558.
BibTeX TR2016-163 PDF
- @inproceedings{Hori2016dec,
- author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and Le Roux, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
- title = {Dialog State Tracking with Attention-based Sequence-to-sequence Learning},
- booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
- year = 2016,
- pages = {552--558},
- month = dec,
- doi = {10.1109/SLT.2016.7846317},
- url = {https://www.merl.com/publications/TR2016-163}
- }
Wisdom, S., Powers, T., Hershey, J.R., Le Roux, J., Atlas, L., "Full-Capacity Unitary Recurrent Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2016.
BibTeX TR2016-155 PDF
- @inproceedings{Wisdom2016dec,
- author = {Wisdom, Scott and Powers, Thomas and Hershey, John R. and Le Roux, Jonathan and Atlas, Les},
- title = {Full-Capacity Unitary Recurrent Neural Networks},
- booktitle = {Advances in Neural Information Processing Systems (NIPS)},
- year = 2016,
- month = dec,
- url = {https://www.merl.com/publications/TR2016-155}
- }
Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
BibTeX TR2016-113 PDF
- @techreport{LeRoux2016sep,
- author = {Le Roux, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
- title = {Learning-Based Approaches to Speech Enhancement and Separation},
- booktitle = {Interspeech Tutorials},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-113}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX TR2016-072 PDF
- @inproceedings{Erdogan2016sep,
- author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and Le Roux, Jonathan},
- title = {Improved MVDR beamforming using single-channel mask prediction networks},
- booktitle = {Interspeech},
- year = 2016,
- pages = {1981--1985},
- month = sep,
- doi = {10.21437/Interspeech.2016-552},
- url = {https://www.merl.com/publications/TR2016-072}
- }
Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
BibTeX TR2016-073 PDF
- @inproceedings{Isik2016sep,
- author = {Isik, Yusuf and Le Roux, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
- title = {Single-Channel Multi-Speaker Separation using Deep Clustering},
- booktitle = {Interspeech},
- year = 2016,
- pages = {545--549},
- month = sep,
- doi = {10.21437/Interspeech.2016-1176},
- url = {https://www.merl.com/publications/TR2016-073}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
BibTeX TR2016-114 PDF
- @inproceedings{Hayashi2016sep,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
- title = {Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection},
- booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
- year = 2016,
- pages = {35--39},
- month = sep,
- url = {https://www.merl.com/publications/TR2016-114}
- }
Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX TR2016-003 PDF
- @inproceedings{Hershey2016mar,
- author = {Hershey, John R. and Chen, Zhuo and Le Roux, Jonathan and Watanabe, Shinji},
- title = {Deep Clustering: Discriminative Embeddings for Segmentation and Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {31--35},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471631},
- url = {https://www.merl.com/publications/TR2016-003}
- }
Wisdom, S., Hershey, J.R., Le Roux, J., Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
BibTeX TR2016-008 PDF
- @inproceedings{Wisdom2016mar,
- author = {Wisdom, Scott and Hershey, John R. and Le Roux, Jonathan and Watanabe, Shinji},
- title = {Deep Unfolding for Multichannel Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {121--125},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471649},
- url = {https://www.merl.com/publications/TR2016-008}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Harsham, B.A., Watanabe, S., Esenther, A., Hershey, J.R., Le Roux, J., Luan, Y., Nikovski, D.N., Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety (DSP), October 2015.
BibTeX TR2015-120 PDF
- @inproceedings{Harsham2015oct,
- author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
- title = {Driver Prediction to Improve Interaction with In-Vehicle HMI},
- booktitle = {Workshop on DSP for In-Vehicle Systems and Safety (DSP)},
- year = 2015,
- month = oct,
- url = {https://www.merl.com/publications/TR2015-120}
- }
Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
BibTeX TR2015-094 PDF
- @inproceedings{Weninger2015aug,
- author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
- title = {Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR},
- booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
- year = 2015,
- volume = 9237,
- pages = {91--99},
- month = aug,
- doi = {10.1007/978-3-319-22482-4_11},
- isbn = {978-3-319-22482-4},
- url = {https://www.merl.com/publications/TR2015-094}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Le Roux, J., Hershey, J.R., Weninger, F.J., "Deep NMF for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7177933, April 2015, pp. 66-70.
BibTeX TR2015-029 PDF
- @inproceedings{LeRoux2015apr1,
- author = {{Le Roux}, J. and Hershey, J.R. and Weninger, F.J.},
- title = {Deep NMF for Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {66--70},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7177933},
- url = {https://www.merl.com/publications/TR2015-029}
- }
Le Roux, J., Vincent, E., Hershey, J.R., Ellis, D.P.W., "Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7179050, April 2015, pp. 5635-5639.
BibTeX TR2015-030 PDF
- @inproceedings{LeRoux2015apr2,
- author = {{Le Roux}, J. and Vincent, E. and Hershey, J.R. and Ellis, D.P.W.},
- title = {Micbots: Collecting Large Realistic Datasets for Speech and Audio Research Using Mobile Robots},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {5635--5639},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7179050},
- url = {https://www.merl.com/publications/TR2015-030}
- }
Le Roux, J., Weninger, F.J., Hershey, J.R., "Sparse NMF -- half-baked or well done?," Tech. Rep. TR2015-023, Mitsubishi Electric Research Laboratories, March 2015.
BibTeX TR2015-023 PDF
- @techreport{LeRoux2015mar,
- author = {{Le Roux}, J. and Weninger, F.J. and Hershey, J.R.},
- title = {Sparse NMF -- half-baked or well done?},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2015,
- number = {TR2015-023},
- address = {Cambridge MA, USA},
- month = mar,
- url = {https://www.merl.com/publications/TR2015-023}
- }
Gerkmann, T., Krawczyk, M., Le Roux, J., "Phase Processing for Single Channel Speech Enhancement: History and Recent Advances", IEEE Signal Processing Magazine, DOI: 10.1109/MSP.2014.2369251, Vol. 32, No. 2, pp. 55-66, March 2015.
BibTeX TR2014-122 PDF
- @article{Gerkmann2015mar,
- author = {Gerkmann, T. and Krawczyk, M. and {Le Roux}, J.},
- title = {Phase Processing for Single Channel Speech Enhancement: History and Recent Advances},
- journal = {IEEE Signal Processing Magazine},
- year = 2015,
- volume = 32,
- number = 2,
- pages = {55--66},
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/MSP.2014.2369251},
- issn = {1053-5888},
- url = {https://www.merl.com/publications/TR2014-122}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Schuller, B., "Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation", IEEE Global Conference on Signal and Information Processing (GlobalSIP), DOI: 10.1109/GlobalSIP.2014.7032183, December 2014, pp. 577-581.
BibTeX TR2014-104 PDF
- @inproceedings{Weninger2014dec,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.},
- title = {Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation},
- booktitle = {IEEE Global Conference on Signal and Information Processing (GlobalSIP)},
- year = 2014,
- pages = {577--581},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GlobalSIP.2014.7032183},
- url = {https://www.merl.com/publications/TR2014-104}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition", Interspeech, September 2014, vol. 15, pp. 2415-2419.
BibTeX TR2014-079 PDF
- @inproceedings{Tachioka2014sep,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {2415--2419},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-079}
- }