Publications

Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
BibTeX TR2022-166 PDF
- @article{Wang2022dec2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
- title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2022,
- volume = 31,
- pages = {397--410},
- month = dec,
- doi = {10.1109/TASLP.2022.3224285},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2022-166}
- }
Venkatesh, S., Wichern, G., Subramanian, A.S., Le Roux, J., "Improved Domain Generalization via Disentangled Multi-Task Learning in Unsupervised Anomalous Sound Detection", DCASE Workshop, Lagrange, M. and Mesaros, A. and Pellegrini, T. and Richard, G. and Serizel, R. and Stowell, D., Eds., November 2022.
BibTeX TR2022-146 PDF Presentation
- @inproceedings{Venkatesh2022nov,
- author = {Venkatesh, Satvik and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Improved Domain Generalization via Disentangled Multi-Task Learning in Unsupervised Anomalous Sound Detection},
- booktitle = {DCASE Workshop},
- year = 2022,
- editor = {Lagrange, M. and Mesaros, A. and Pellegrini, T. and Richard, G. and Serizel, R. and Stowell, D.},
- month = nov,
- isbn = {978-952-03-2677-7},
- url = {https://www.merl.com/publications/TR2022-146}
- }
Pan, Z., Wichern, G., Germain, F., Subramanian, A.S., Le Roux, J., "Towards End-to-end Speaker Diarization in the Wild", arXiv, November 2022.
BibTeX arXiv
- @article{Pan2022nov,
- author = {Pan, Zexu and Wichern, Gordon and Germain, Francois and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Towards End-to-end Speaker Diarization in the Wild},
- journal = {arXiv},
- year = 2022,
- month = nov,
- url = {https://arxiv.org/abs/2211.01299}
- }
Tzinis, E., Wichern, G., Subramanian, A.S., Smaragdis, P., Le Roux, J., "Heterogeneous Target Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2022-10717, September 2022, pp. 1796-1800.
BibTeX TR2022-115 PDF Video Presentation
- @inproceedings{Tzinis2022sep,
- author = {Tzinis, Efthymios and Wichern, Gordon and Subramanian, Aswin Shanmugam and Smaragdis, Paris and Le Roux, Jonathan},
- title = {Heterogeneous Target Speech Separation},
- booktitle = {Interspeech},
- year = 2022,
- pages = {1796--1800},
- month = sep,
- doi = {10.21437/Interspeech.2022-10717},
- url = {https://www.merl.com/publications/TR2022-115}
- }
Zhan, S., Wichern, G., Laughman, C.R., Chong, A., Chakrabarty, A., "Calibrating building simulation models using multi-source datasets and meta-learned Bayesian optimization", Energy and Buildings, DOI: 10.1016/j.enbuild.2022.112278, Vol. 270, pp. 112278, September 2022.
BibTeX TR2022-072 PDF
- @article{Zhan2023jan,
- author = {Zhan, Sicheng and Wichern, Gordon and Laughman, Christopher R. and Chong, Adrian and Chakrabarty, Ankush},
- title = {Calibrating building simulation models using multi-source datasets and meta-learned Bayesian optimization},
- journal = {Energy and Buildings},
- year = 2022,
- volume = 270,
- pages = 112278,
- month = sep,
- doi = {10.1016/j.enbuild.2022.112278},
- url = {https://www.merl.com/publications/TR2022-072}
- }
Venkatesh, S., Wichern, G., Subramanian, A.S., Le Roux, J., "Disentangled Surrogate Task Learning for Improved Domain Generalization in Unsupervised Anomolous Sound Detection," Tech. Rep. TR2022-092, Detection and Classification of Acoustic Scenes and Events (DCASE) Challenge 2022, July 2022.
BibTeX TR2022-092 PDF Presentation
- @techreport{Venkatesh2022jul,
- author = {Venkatesh, Satvik and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
- title = {Disentangled Surrogate Task Learning for Improved Domain Generalization in Unsupervised Anomolous Sound Detection},
- institution = {DCASE2022 Challenge},
- year = 2022,
- month = jul,
- url = {https://www.merl.com/publications/TR2022-092}
- }
Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746005, April 2022, pp. 526-530.
BibTeX TR2022-022 PDF Software
- @inproceedings{Petermann2022apr,
- author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {526--530},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9746005},
- url = {https://www.merl.com/publications/TR2022-022}
- }
Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747604, April 2022, pp. 711-715.
BibTeX TR2022-023 PDF
- @inproceedings{Slizovskaia2022mar,
- author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {711--715},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9747604},
- url = {https://www.merl.com/publications/TR2022-023}
- }
Zhan, S., Wichern, G., Laughman, C.R., Chakrabarty, A., "Meta-Learned Bayesian Optimization for Building Model Calibration using Attentive Neural Processes", Advances in Neural Information Processing Systems (NeurIPS), December 2021.
BibTeX TR2021-149 PDF
- @inproceedings{Zhan2021dec,
- author = {Zhan, Sicheng and Wichern, Gordon and Laughman, Christopher R. and Chakrabarty, Ankush},
- title = {Meta-Learned Bayesian Optimization for Building Model Calibration using Attentive Neural Processes},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2021,
- month = dec,
- url = {https://www.merl.com/publications/TR2021-149}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
BibTeX TR2021-144 PDF
- @article{Wang2021dec,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- volume = 29,
- pages = {3476--3490},
- month = dec,
- doi = {10.1109/TASLP.2021.3129363},
- url = {https://www.merl.com/publications/TR2021-144}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
BibTeX TR2021-137 PDF
- @article{Wang2021nov2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {On The Compensation Between Magnitude and Phase in Speech Separation},
- journal = {IEEE Signal Processing Letters},
- year = 2021,
- volume = 28,
- pages = {2018--2022},
- month = nov,
- doi = {10.1109/LSP.2021.3116502},
- url = {https://www.merl.com/publications/TR2021-137}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632667, October 2021, pp. 56-60.
BibTeX TR2021-127 PDF
- @inproceedings{Wang2021oct4,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Convolutive Prediction for Reverberant Speech Separation},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {56--60},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632667},
- url = {https://www.merl.com/publications/TR2021-127}
- }
Wichern, G., Chakrabarty, A., Wang, Z.-Q., Le Roux, J., "Anomalous sound detection using attentive neural processes", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632762, October 2021, pp. 186-190.
BibTeX TR2021-129 PDF
- @inproceedings{Wichern2021oct,
- author = {Wichern, Gordon and Chakrabarty, Ankush and Wang, Zhong-Qiu and Le Roux, Jonathan},
- title = {Anomalous sound detection using attentive neural processes},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {186--190},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632762},
- url = {https://www.merl.com/publications/TR2021-129}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement", arXiv, October 2021.
BibTeX arXiv
- @article{Wang2021oct,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
- title = {Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement},
- journal = {arXiv},
- year = 2021,
- month = oct,
- url = {https://arxiv.org/abs/2110.00570}
- }
Chakrabarty, A., Wichern, G., Laughman, C.R., "ANP-BBO: Attentive Neural Processes and Batch Bayesian Optimization for Scalable Calibration of Physics-Informed Digital Twins", International Conference on Machine Learning (ICML), July 2021.
BibTeX TR2021-086 PDF
- @inproceedings{Chakrabarty2021jul,
- author = {Chakrabarty, Ankush and Wichern, Gordon and Laughman, Christopher R.},
- title = {ANP-BBO: Attentive Neural Processes and Batch Bayesian Optimization for Scalable Calibration of Physics-Informed Digital Twins},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2021,
- month = jul,
- url = {https://www.merl.com/publications/TR2021-086}
- }
Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413358, June 2021, pp. 46-50.
BibTeX TR2021-069 PDF
- @inproceedings{Hung2021jun,
- author = {Hung, Yun-Ning and Wichern, Gordon and Le Roux, Jonathan},
- title = {Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {46--50},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413358},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-069}
- }
Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/Interspeech.2020-2757, October 2020, pp. 3112-3116.
BibTeX TR2020-138 PDF Presentation
- @inproceedings{Moritz2020oct,
- author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and Le Roux, Jonathan},
- title = {All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection},
- booktitle = {Interspeech},
- year = 2020,
- pages = {3112--3116},
- month = oct,
- doi = {10.21437/Interspeech.2020-2757},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2020-138}
- }
Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
BibTeX TR2020-136 PDF Software
- @inproceedings{Manilow2020oct,
- author = {Manilow, Ethan and Wichern, Gordon and Le Roux, Jonathan},
- title = {Hierarchical Musical Instrument Separation},
- booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
- year = 2020,
- pages = {376--383},
- month = oct,
- isbn = {978-0-9813537-0-8},
- url = {https://www.merl.com/publications/TR2020-136}
- }
Seetharaman, P., Wichern, G., Pardo, B., Le Roux, J., "Autoclip: Adaptive Gradient Clipping For Source Separation Networks", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/MLSP49062.2020.9231926, September 2020.
BibTeX TR2020-132 PDF
- @inproceedings{Seetharaman2020sep,
- author = {Seetharaman, Prem and Wichern, Gordon and Pardo, Bryan and Le Roux, Jonathan},
- title = {Autoclip: Adaptive Gradient Clipping For Source Separation Networks},
- booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
- year = 2020,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/MLSP49062.2020.9231926},
- url = {https://www.merl.com/publications/TR2020-132}
- }
Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
BibTeX TR2020-126 PDF
- @article{Pishdadian2020sep,
- author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
- title = {Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2020,
- volume = 28,
- pages = {2386--2399},
- month = sep,
- doi = {10.1109/TASLP.2020.3013105},
- url = {https://www.merl.com/publications/TR2020-126}
- }
Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
BibTeX TR2020-111 PDF
- @inproceedings{Seetharaman2020jul,
- author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
- title = {Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles},
- booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
- year = 2020,
- month = jul,
- url = {https://www.merl.com/publications/TR2020-111}
- }
Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053055, April 2020, pp. 91-95.
BibTeX TR2020-038 PDF Video Presentation
- @inproceedings{Pishdadian2020apr,
- author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
- title = {Learning to Separate Sounds From Weakly Labeled Scenes},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {91--95},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053055},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-038}
- }
Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053327, April 2020, pp. 696-700.
BibTeX TR2020-042 PDF Video Presentation
- @inproceedings{Maciejewski2020apr,
- author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and Le Roux, Jonathan},
- title = {WHAMR!: Noisy and Reverberant Single-Channel Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {696--700},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053327},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-042}
- }
Aihara, R., Wichern, G., Le Roux, J., "Deep clusteringによるシングルチャネル音声分離とその発展", The Journal of the Acoustical Society of Japan, DOI: 10.20697/jasj.76.2_101, Vol. 76, No. 2, pp. 101-108, April 2020.
BibTeX J-STAGE
- @article{Aihara2020apr,
- author = {Aihara, Ryo and Wichern, Gordon and Le Roux, Jonathan},
- title = {Deep clusteringによるシングルチャネル音声分離とその発展},
- journal = {The Journal of the Acoustical Society of Japan},
- year = 2020,
- volume = 76,
- number = 2,
- pages = {101--108},
- month = apr,
- doi = {10.20697/jasj.76.2_101},
- url = {https://www.jstage.jst.go.jp/article/jasj/76/2/76_101/_article/-char/en}
- }
Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
BibTeX TR2021-020 PDF
- @article{Aihara2020jun,
- author = {Aihara, Ryo and Wichern, Gordon and Le Roux, Jonathan},
- title = {Deep Clustering-based Single Channel Speech Separation and Recent Advances},
- journal = {Acoustical Science and Technology},
- year = 2020,
- volume = 41,
- number = 2,
- pages = {465--471},
- month = mar,
- doi = {10.1250/ast.41.465},
- url = {https://www.merl.com/publications/TR2021-020}
- }