Publications

Weinberg, G., Raj, B., Kalgaonkar, K., "Two New Techniques for Natural Spoken User Interfaces", ACM Symposium on User Interface Software and Technology (UIST), October 2006.
BibTeX TR2006-098 PDF
- @inproceedings{Weinberg2006oct,
- author = {Weinberg, G. and Raj, B. and Kalgaonkar, K.},
- title = {{Two New Techniques for Natural Spoken User Interfaces}},
- booktitle = {ACM Symposium on User Interface Software and Technology (UIST)},
- year = 2006,
- month = oct,
- url = {https://www.merl.com/publications/TR2006-098}
- }
Radhakrishnan, R., Divakaran, A., "Merging Segmentations of Low-level and Mid-level Time Series for Audio Class Discovery", Asilomar Conference on Signals, Systems and Computers (ACSSC), October - November 2006, pp. 64-68.
BibTeX IEEE Xplore
- @inproceedings{Radhakrishnan2006oct,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Merging Segmentations of Low-level and Mid-level Time Series for Audio Class Discovery}},
- booktitle = {Asilomar Conference on Signals, Systems and Computers (ACSSC)},
- year = 2006,
- pages = {64--68},
- month = oct,
- issn = {1058-6393},
- url = {https://ieeexplore.ieee.org/document/4176513}
- }
Paris Smaragdis, Bhiksha Raj, Madhusudana Shashanka, "Supervised and Semi-Supervised Separation of Sounds from Single-Channel Mixtures", Tech. Rep. TR2007-062, Mitsubishi Electric Research Laboratories, Cambridge, MA, July 2006.
BibTeX TR2007-062 PDF
- @techreport{MERL_TR2007-062,
- author = {Paris Smaragdis, Bhiksha Raj, Madhusudana Shashanka},
- title = {Supervised and Semi-Supervised Separation of Sounds from Single-Channel Mixtures},
- institution = {MERL - Mitsubishi Electric Research Laboratories},
- address = {Cambridge, MA 02139},
- number = {TR2007-062},
- month = jul,
- year = 2006,
- url = {https://www.merl.com/publications/TR2007-062/}
- }
Wittenburg, K., Lanning, T., Schwenke, D., Shubin, H., Vetro, A., "The Prospects for Unrestricted Speech Input for TV Content Search", International Working Conference on Advanced Visual Interfaces (AVI), May 2006.
BibTeX TR2006-045 PDF
- @inproceedings{Wittenburg2006may,
- author = {Wittenburg, K. and Lanning, T. and Schwenke, D. and Shubin, H. and Vetro, A.},
- title = {{The Prospects for Unrestricted Speech Input for TV Content Search}},
- booktitle = {International Working Conference on Advanced Visual Interfaces (AVI)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-045}
- }
Radhakrishnan, R., Divakaran, A., "Generative Process Tracking for Audio Analysis", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006, vol. 5, pp. V.
BibTeX TR2006-053 PDF
- @inproceedings{Radhakrishnan2006may,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Generative Process Tracking for Audio Analysis}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- volume = 5,
- pages = {V},
- month = may,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2006-053}
- }
Raj, B., Shashanka, M.V.S., Smaragdis, P., "Latent Dirichlet Decomposition for Single Channel Speaker Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006.
BibTeX TR2006-064 PDF
- @inproceedings{Raj2006may,
- author = {Raj, B. and Shashanka, M.V.S. and Smaragdis, P.},
- title = {{Latent Dirichlet Decomposition for Single Channel Speaker Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-064}
- }
Shashanka, M.V.S., Smaragdis, P., "Secure Sound Classification: Gaussian Mixture Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2006.
BibTeX TR2006-065 PDF
- @inproceedings{Shashanka2006may,
- author = {Shashanka, M.V.S. and Smaragdis, P.},
- title = {{Secure Sound Classification: Gaussian Mixture Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2006,
- month = may,
- url = {https://www.merl.com/publications/TR2006-065}
- }
Raj, B., Singh, R., "Reconstructing Spectral Vectors with Uncertain Spectrographic Masks for Robust Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), November 2005, pp. 27-32.
BibTeX TR2005-160 PDF
- @inproceedings{Raj2005nov,
- author = {Raj, B. and Singh, R.},
- title = {{Reconstructing Spectral Vectors with Uncertain Spectrographic Masks for Robust Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2005,
- pages = {27--32},
- month = nov,
- url = {https://www.merl.com/publications/TR2005-160}
- }
Hu, R., Raj, B., "A Robust Voice Activity Detector Using an Acoustic Doppler Radar", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), November 2005, pp. 171-176.
BibTeX TR2005-159 PDF
- @inproceedings{Hu2005nov,
- author = {Hu, R. and Raj, B.},
- title = {{A Robust Voice Activity Detector Using an Acoustic Doppler Radar}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2005,
- pages = {171--176},
- month = nov,
- url = {https://www.merl.com/publications/TR2005-159}
- }
Radhakrishnan, R., Divakaran, A., Smaragdis, P., "Audio Analysis for Surveillance Applications", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 158-161.
BibTeX TR2005-139 PDF
- @inproceedings{Radhakrishnan2005oct,
- author = {Radhakrishnan, R. and Divakaran, A. and Smaragdis, P.},
- title = {{Audio Analysis for Surveillance Applications}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {158--161},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-139}
- }
Raj, B., Smaragdis, P., "Latent Variable Decomposition of Spectrograms for Single Channel Speaker Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 17-20.
BibTeX TR2005-137 PDF
- @inproceedings{Raj2005oct,
- author = {Raj, B. and Smaragdis, P.},
- title = {{Latent Variable Decomposition of Spectrograms for Single Channel Speaker Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {17--20},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-137}
- }
Smaragdis, P., Boufounos, P., "Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 114-117.
BibTeX TR2005-138 PDF
- @inproceedings{Smaragdis2005oct,
- author = {Smaragdis, P. and Boufounos, P.},
- title = {{Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {114--117},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-138}
- }
Forlines, C., Schmidt-Nielsen, B., Raj, B., Wittenburg, K., Wolf, P., "A Comparison between Spoken Queries and Menu-based Interfaces for In-Car Digital Music Selection", IFIP TC13 International Conference on Human-Computer Interaction (INTERACT), September 2005.
BibTeX TR2005-020 PDF
- @inproceedings{Forlines2005sep1,
- author = {Forlines, C. and Schmidt-Nielsen, B. and Raj, B. and Wittenburg, K. and Wolf, P.},
- title = {{A Comparison between Spoken Queries and Menu-based Interfaces for In-Car Digital Music Selection}},
- booktitle = {IFIP TC13 International Conference on Human-Computer Interaction (INTERACT)},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-020}
- }
Bansal, D., Raj, B., Smaragdis, P., "Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization", Eurospeech, September 2005.
BibTeX TR2005-135 PDF
- @inproceedings{Bansal2005sep,
- author = {Bansal, D. and Raj, B. and Smaragdis, P.},
- title = {{Bandwidth Expansion of Narrowband Speech Using non-Negative Matrix Factorization}},
- booktitle = {Eurospeech},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-135}
- }
Raj, B., Singh, R., Smaragdis, P., "Recognizing Speech from Simultaneous Speakers", Eurospeech, September 2005.
BibTeX TR2005-136 PDF
- @inproceedings{Raj2005sep,
- author = {Raj, B. and Singh, R. and Smaragdis, P.},
- title = {{Recognizing Speech from Simultaneous Speakers}},
- booktitle = {Eurospeech},
- year = 2005,
- month = sep,
- url = {https://www.merl.com/publications/TR2005-136}
- }
Smaragdis, P., "From Learning Music to Learning to Separate", Forum Acusticum, August 2005.
BibTeX TR2005-134 PDF
- @inproceedings{Smaragdis2005aug,
- author = {Smaragdis, P.},
- title = {{From Learning Music to Learning to Separate}},
- booktitle = {Forum Acusticum},
- year = 2005,
- month = aug,
- url = {https://www.merl.com/publications/TR2005-134}
- }
Guinness, J., Raj, B., Schmidt-Nielsen, B., Turicchia, L., Sarpeshkar, R., "A Companding Front End for Noise-Robust Automatic Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2005, vol. 1, pp. 249-252.
BibTeX TR2005-023 PDF
- @inproceedings{Guinness2005mar,
- author = {Guinness, J. and Raj, B. and Schmidt-Nielsen, B. and Turicchia, L. and Sarpeshkar, R.},
- title = {{A Companding Front End for Noise-Robust Automatic Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2005,
- volume = 1,
- pages = {249--252},
- month = mar,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2005-023}
- }
Radhakrishnan, R., Divakaran, A., "Systematic Acquisition of Audio Classes for Elevator Surveillance", SPIE Conference on Image and Video Communications and Processing, March 2005, vol. 5685, pp. 64-71.
BibTeX TR2005-076 PDF
- @inproceedings{Radhakrishnan2005mar,
- author = {Radhakrishnan, R. and Divakaran, A.},
- title = {{Systematic Acquisition of Audio Classes for Elevator Surveillance}},
- booktitle = {SPIE Conference on Image and Video Communications and Processing},
- year = 2005,
- volume = 5685,
- pages = {64--71},
- month = mar,
- url = {https://www.merl.com/publications/TR2005-076}
- }
Radhakrishnan, R., Divakaran, A., Xiong, Z., "A Time Series Clustering based Framework for Multimedia Mining and Summarization", ACM SIGMM International Workshop on Multimedia Information Retrieval (MIR), October 2004, pp. 157-164.
BibTeX TR2004-046 PDF
- @inproceedings{Radhakrishnan2004oct,
- author = {Radhakrishnan, R. and Divakaran, A. and Xiong, Z.},
- title = {{A Time Series Clustering based Framework for Multimedia Mining and Summarization}},
- booktitle = {ACM SIGMM International Workshop on Multimedia Information Retrieval (MIR)},
- year = 2004,
- pages = {157--164},
- month = oct,
- isbn = {1-58113-940-3},
- url = {https://www.merl.com/publications/TR2004-046}
- }
Wolf, P., Woelfel, J., van Gemert, J., Raj, B., Wong, D., "SpokenQuery: An Alternate Approach to Choosing Items with Speech", International Conference on Spoken Language Processing (ICSLP), October 2004.
BibTeX TR2004-121 PDF
- @inproceedings{Wolf2004oct,
- author = {Wolf, P. and Woelfel, J. and {van Gemert}, J. and Raj, B. and Wong, D.},
- title = {{SpokenQuery: An Alternate Approach to Choosing Items with Speech}},
- booktitle = {International Conference on Spoken Language Processing (ICSLP)},
- year = 2004,
- month = oct,
- url = {https://www.merl.com/publications/TR2004-121}
- }
Smaragdis, P., "Discovering Auditory Objects Through Non-Negativity Constraints", Statistical and Perceptual Audio Processing (SAPA), October 2004.
BibTeX TR2004-094 PDF
- @inproceedings{Smaragdis2004oct,
- author = {Smaragdis, P.},
- title = {{Discovering Auditory Objects Through Non-Negativity Constraints}},
- booktitle = {Statistical and Perceptual Audio Processing (SAPA)},
- year = 2004,
- month = oct,
- url = {https://www.merl.com/publications/TR2004-094}
- }
Seltzer, M.L., Raj, B., Stern, R.M., "Likelihood-Maximizing Beamforming for Robust Hands-Free Speech Recognition", IEEE Transactions on Speech and Audio Processing, Vol. 12, No. 5, pp. 489-498, September 2004.
BibTeX TR2004-088 PDF
- @article{Seltzer2004sep1,
- author = {Seltzer, M.L. and Raj, B. and Stern, R.M.},
- title = {{Likelihood-Maximizing Beamforming for Robust Hands-Free Speech Recognition}},
- journal = {IEEE Transactions on Speech and Audio Processing},
- year = 2004,
- volume = 12,
- number = 5,
- pages = {489--498},
- month = sep,
- note = {Awarded Best Young Author, March 2007},
- issn = {1063-6676},
- url = {https://www.merl.com/publications/TR2004-088}
- }
Smaragdis, P., "Non-negative Matrix Factor Deconvolution; Extraction of Multiple Sound Sources from Monophonic Inputs", International Congress on Independent Component Analysis and Blind Signal Separation, September 2004, vol. 3195, pp. 494.
BibTeX TR2004-104 PDF
- @inproceedings{Smaragdis2004sep,
- author = {Smaragdis, P.},
- title = {{Non-negative Matrix Factor Deconvolution; Extraction of Multiple Sound Sources from Monophonic Inputs}},
- booktitle = {International Congress on Independent Component Analysis and Blind Signal Separation},
- year = 2004,
- volume = 3195,
- pages = 494,
- month = sep,
- isbn = {3-540-23056-4},
- url = {https://www.merl.com/publications/TR2004-104}
- }
Seltzer, M.L., Raj, B., Stern, R.M., "A Bayesian Classifier for Spectrographic Mask Estimation for Missing Feature Speech Recognition", Speech Communication, Vol. 43, No. 4, pp. 379-393, September 2004.
BibTeX TR2004-086 PDF
- @article{Seltzer2004sep2,
- author = {Seltzer, M.L. and Raj, B. and Stern, R.M.},
- title = {{A Bayesian Classifier for Spectrographic Mask Estimation for Missing Feature Speech Recognition}},
- journal = {Speech Communication},
- year = 2004,
- volume = 43,
- number = 4,
- pages = {379--393},
- month = sep,
- url = {https://www.merl.com/publications/TR2004-086}
- }
Raj, B., Seltzer, M.L., Stern, R.M., "Reconstruction of Missing Features for Robust Speech Recognition", Speech Communication, Vol. 43, No. 4, pp. 275-296, September 2004.
BibTeX TR2004-087 PDF
- @article{Raj2004sep,
- author = {Raj, B. and Seltzer, M.L. and Stern, R.M.},
- title = {{Reconstruction of Missing Features for Robust Speech Recognition}},
- journal = {Speech Communication},
- year = 2004,
- volume = 43,
- number = 4,
- pages = {275--296},
- month = sep,
- url = {https://www.merl.com/publications/TR2004-087}
- }