Publications

Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
BibTeX TR2017-134 PDF
- @inproceedings{Tachioka2017aug,
- author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information}},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-134}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
BibTeX TR2017-150 PDF
- @article{Hayashi2017aug,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Duration-Controlled LSTM for Polyphonic Sound Event Detection}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2017,
- volume = 25,
- number = 11,
- month = aug,
- doi = {10.1109/TASLP.2017.2740002},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2017-150}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-014 PDF
- @inproceedings{Hayashi2017mar,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-014}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
BibTeX TR2016-114 PDF
- @inproceedings{Hayashi2016sep,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection}},
- booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
- year = 2016,
- pages = {35--39},
- month = sep,
- url = {https://www.merl.com/publications/TR2016-114}
- }
Le Roux, J., Hershey, J.R., Weninger, F.J., "Deep NMF for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7177933, April 2015, pp. 66-70.
BibTeX TR2015-029 PDF
- @inproceedings{LeRoux2015apr1,
- author = {{Le Roux}, J. and Hershey, J.R. and Weninger, F.J.},
- title = {{Deep NMF for Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {66--70},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7177933},
- url = {https://www.merl.com/publications/TR2015-029}
- }
Le Roux, J., Weninger, F.J., Hershey, J.R., "Sparse NMF -- half-baked or well done?," Tech. Rep. TR2015-023, Mitsubishi Electric Research Laboratories, March 2015.
BibTeX TR2015-023 PDF
- @techreport{LeRoux2015mar,
- author = {{Le Roux}, J. and Weninger, F.J. and Hershey, J.R.},
- title = {{Sparse NMF -- half-baked or well done?}},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2015,
- number = {TR2015-023},
- address = {Cambridge MA, USA},
- month = mar,
- url = {https://www.merl.com/publications/TR2015-023}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Schuller, B., "Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation", IEEE Global Conference on Signal and Information Processing (GlobalSIP), DOI: 10.1109/GlobalSIP.2014.7032183, December 2014, pp. 577-581.
BibTeX TR2014-104 PDF
- @inproceedings{Weninger2014dec,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.},
- title = {{Discriminatively Trained Recurrent Neural Networks for Single-Channel Speech Separation}},
- booktitle = {IEEE Global Conference on Signal and Information Processing (GlobalSIP)},
- year = 2014,
- pages = {577--581},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GlobalSIP.2014.7032183},
- url = {https://www.merl.com/publications/TR2014-104}
- }
Mansour, H., Rane, S., Boufounos, P.T., Vetro, A., "Video Querying Via Compact Descriptors of Visually Salient Objects", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP.2014.7025564, October 2014, pp. 2789-2793.
BibTeX TR2014-101 PDF
- @inproceedings{Mansour2014oct,
- author = {Mansour, H. and Rane, S. and Boufounos, P.T. and Vetro, A.},
- title = {{Video Querying Via Compact Descriptors of Visually Salient Objects}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2014,
- pages = {2789--2793},
- month = oct,
- doi = {10.1109/ICIP.2014.7025564},
- url = {https://www.merl.com/publications/TR2014-101}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Watanabe, S., "Discriminative NMF and its application to single-channel source separation", Interspeech, September 2014, vol. 15, pp. 865-869.
BibTeX TR2014-081 PDF
- @inproceedings{Weninger2014sep,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Watanabe, S.},
- title = {{Discriminative NMF and its application to single-channel source separation}},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {865--869},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-081}
- }
Hershey, J.R., Le Roux, J., Weninger, F., "Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures", arXiv, August 2014.
BibTeX arXiv
- @article{Hershey2014aug,
- author = {Hershey, J.R. and {Le Roux}, J. and Weninger, F.},
- title = {{Deep Unfolding: Model-Based Inspiration of Novel Deep Architectures}},
- journal = {arXiv},
- year = 2014,
- month = aug,
- url = {https://arxiv.org/abs/1409.2574}
- }
Ni, J., Marks, T.K., Tuzel, O., Porikli, F., "Detecting 3D geometric Boundaries of Indoor Scenes Under Varying Lighting", IEEE Winter Conference on Applications of Computer Vision (WACV), March 2014.
BibTeX TR2014-013 PDF
- @inproceedings{Ni2014mar,
- author = {Ni, J. and Marks, T.K. and Tuzel, O. and Porikli, F.},
- title = {{Detecting 3D geometric Boundaries of Indoor Scenes Under Varying Lighting}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2014,
- month = mar,
- url = {https://www.merl.com/publications/TR2014-013}
- }
Simsekli, U., Le Roux, J., Hershey, J.R., "Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2013.6701891, October 2013, pp. 1-4.
BibTeX TR2013-097 PDF
- @inproceedings{Simsekli2013oct,
- author = {Simsekli, U. and {Le Roux}, J. and Hershey, J.R.},
- title = {{Hierarchical and Coupled Non-negative Dynamical Systems with Application to Audio Modeling}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2013,
- pages = {1--4},
- month = oct,
- doi = {10.1109/WASPAA.2013.6701891},
- issn = {1931-1168},
- url = {https://www.merl.com/publications/TR2013-097}
- }
Fevotte, C., Le Roux, J., Hershey, J.R., "Non-negative Dynamical System with Application to Speech and Audio", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2013.
BibTeX TR2013-021 PDF Software
- @inproceedings{Fevotte2013may,
- author = {Fevotte, C. and {Le Roux}, J. and Hershey, J.R.},
- title = {{Non-negative Dynamical System with Application to Speech and Audio}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2013,
- month = may,
- url = {https://www.merl.com/publications/TR2013-021}
- }
Brand, M., Chen, D., "Parallel Quadratic Programming for Image Processing", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP.2011.6116089, September 2011, pp. 2261-2264.
BibTeX TR2011-064 PDF Software
- @inproceedings{Brand2011sep,
- author = {Brand, M. and Chen, D.},
- title = {{Parallel Quadratic Programming for Image Processing}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2011,
- pages = {2261--2264},
- month = sep,
- doi = {10.1109/ICIP.2011.6116089},
- url = {https://www.merl.com/publications/TR2011-064}
- }
Raj, B., Wilson, K.W., Krueger, A., Haeb-Umbach, R., "Ungrounded Independent Non-Negative Factor Analysis", Interspeech, September 2010, pp. 330-333.
BibTeX TR2010-122 PDF
- @inproceedings{Raj2010sep,
- author = {Raj, B. and Wilson, K.W. and Krueger, A. and Haeb-Umbach, R.},
- title = {{Ungrounded Independent Non-Negative Factor Analysis}},
- booktitle = {Interspeech},
- year = 2010,
- pages = {330--333},
- month = sep,
- url = {https://www.merl.com/publications/TR2010-122}
- }
Wilson, K.W., Raj, B., "Spectrogram Dimensionality Reduction with Independence Constraints", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2010.
BibTeX TR2010-023 PDF
- @inproceedings{Wilson2010mar,
- author = {Wilson, K.W. and Raj, B.},
- title = {{Spectrogram Dimensionality Reduction with Independence Constraints}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2010,
- month = mar,
- url = {https://www.merl.com/publications/TR2010-023}
- }
Wilson, K.W., Raj, B., Smaragdis, P., "Regularized Non-negative Matrix Factorization with Temporal Dependencies for Speech Denoising", Interspeech, September 2008.
BibTeX TR2008-075 PDF
- @inproceedings{Wilson2008sep,
- author = {Wilson, K.W. and Raj, B. and Smaragdis, P.},
- title = {{Regularized Non-negative Matrix Factorization with Temporal Dependencies for Speech Denoising}},
- booktitle = {Interspeech},
- year = 2008,
- month = sep,
- url = {https://www.merl.com/publications/TR2008-075}
- }
Madhusudana Shashanka, Bhiksha Raj, Paris Smaragdis, "Probabilistic Latent Variable Models as Non-Negative Factorizations", Tech. Rep. TR2007-083, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2007.
BibTeX TR2007-083 PDF
- @techreport{MERL_TR2007-083,
- author = {Madhusudana Shashanka, Bhiksha Raj, Paris Smaragdis},
- title = {Probabilistic Latent Variable Models as Non-Negative Factorizations},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2007-083},
- month = dec,
- year = 2007,
- url = {https://www.merl.com/publications/TR2007-083/}
- }
Paris Smaragdis, Bhiksha Raj, "Shift-Invariant Probabilistic Latent Component Analysis", Tech. Rep. TR2007-009, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 2007.
BibTeX TR2007-009 PDF
- @techreport{MERL_TR2007-009,
- author = {Paris Smaragdis, Bhiksha Raj},
- title = {Shift-Invariant Probabilistic Latent Component Analysis},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2007-009},
- month = dec,
- year = 2007,
- url = {https://www.merl.com/publications/TR2007-009/}
- }
Smaragdis, P., "Convolutive Speech Bases and their Application to Supervised Speech Separation", IEEE Transactions on Audio, Speech and Language Processing, Vol. 15, No. 1, pp. 1-12, January 2007.
BibTeX TR2007-002 PDF
- @article{Smaragdis2007jan2,
- author = {Smaragdis, P.},
- title = {{Convolutive Speech Bases and their Application to Supervised Speech Separation}},
- journal = {IEEE Transactions on Audio, Speech and Language Processing},
- year = 2007,
- volume = 15,
- number = 1,
- pages = {1--12},
- month = jan,
- issn = {1558-7916},
- url = {https://www.merl.com/publications/TR2007-002}
- }
Lawrence, J., Ben-Artzi, A., DeCoro, C., Matusik, W., Pfister, H., Ramamoorthi, R., Rusinkiewicz, S., "Inverse Shade Trees for Non-Parametric Material Representation and Editing", ACM Transactions on Graphics (TOG), Vol. 25, No. 3, pp. 735-745, July 2006.
BibTeX TR2006-104 PDF
- @article{Lawrence2006jul,
- author = {Lawrence, J. and Ben-Artzi, A. and DeCoro, C. and Matusik, W. and Pfister, H. and Ramamoorthi, R. and Rusinkiewicz, S.},
- title = {{Inverse Shade Trees for Non-Parametric Material Representation and Editing}},
- journal = {ACM Transactions on Graphics (TOG)},
- year = 2006,
- volume = 25,
- number = 3,
- pages = {735--745},
- month = jul,
- issn = {0730-0301},
- url = {https://www.merl.com/publications/TR2006-104}
- }
Bansal, D., Raj, B., Smaragdis, P., "Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization", Eurospeech, September 2005.
BibTeX TR2005-135 PDF
- @inproceedings{Bansal2005sep,
- author = {Bansal, D. and Raj, B. and Smaragdis, P.},
- title = {{Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization}},
- booktitle = {Eurospeech},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-135}
- }
Tim Weyrich, Wojciech Matusik, Hanspeter Pfister, Jinho Lee, Addy Ngan, Henrik Wann Jensen, Markus Gross, "A Measurement-Based Skin Reflectance Model for Face Rendering and Editing", Tech. Rep. TR2005-047, Mitsubishi Electric Research Laboratories, Cambridge, MA, July 2005.
BibTeX TR2005-047 PDF
- @techreport{MERL_TR2005-047,
- author = {Tim Weyrich, Wojciech Matusik, Hanspeter Pfister, Jinho Lee, Addy Ngan, Henrik Wann Jensen, Markus Gross},
- title = {A Measurement-Based Skin Reflectance Model for Face Rendering and Editing},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2005-047},
- month = jul,
- year = 2005,
- url = {https://www.merl.com/publications/TR2005-047/}
- }
T. Weyrich, W. Matusik, H. Pfister, J. Lee, A. Ngan, H.W. Jensen, M. Gross, "A Measurement-Based Skin Reflectance Model for Face Rendering and Editing", Tech. Rep. TR2005-071, Mitsubishi Electric Research Laboratories, Cambridge, MA, July 2005.
BibTeX TR2005-071 PDF
- @techreport{MERL_TR2005-071,
- author = {T. Weyrich, W. Matusik, H. Pfister, J. Lee, A. Ngan, H.W. Jensen, M. Gross},
- title = {A Measurement-Based Skin Reflectance Model for Face Rendering and Editing},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2005-071},
- month = jul,
- year = 2005,
- url = {https://www.merl.com/publications/TR2005-071/}
- }
Smaragdis, P., "Non-negative Matrix Factor Deconvolution; Extraction of Multiple Sound Sources from Monophonic Inputs", International Congress on Independent Component Analysis and Blind Signal Separation, September 2004, vol. 3195, pp. 494.
BibTeX TR2004-104 PDF
- @inproceedings{Smaragdis2004sep,
- author = {Smaragdis, P.},
- title = {{Non-negative Matrix Factor Deconvolution; Extraction of Multiple Sound Sources from Monophonic Inputs}},
- booktitle = {International Congress on Independent Component Analysis and Blind Signal Separation},
- year = 2004,
- volume = 3195,
- pages = 494,
- month = sep,
- isbn = {3-540-23056-4},
- url = {https://www.merl.com/publications/TR2004-104}
- }