Publications

Hussein, A., Khurana, S., Wichern, G., Germain, F.G., Le Roux, J., "HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement", arXiv, June 2025.
BibTeX arXiv
- @article{Hussein2025jun,
- author = {Hussein, Amir and Khurana, Sameer and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement}},
- journal = {arXiv},
- year = 2025,
- month = jun,
- url = {https://www.arxiv.org/abs/2506.00843}
- }
Park, Y.-J., Germain, F.G., Liu, J., Wang, Y., Koike-Akino, T., Wichern, G., Azizan, N., Laughman, C.R., Chakrabarty, A., "Probabilistic Forecasting for Building Energy Systems using Time-Series Foundation Models", arXiv, May 2025.
BibTeX arXiv
- @article{Park2025may,
- author = {Park, Young-Jin and Germain, François G and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki and Wichern, Gordon and Azizan, Navid and Laughman, Christopher R. and Chakrabarty, Ankush},
- title = {{Probabilistic Forecasting for Building Energy Systems using Time-Series Foundation Models}},
- journal = {arXiv},
- year = 2025,
- month = may,
- url = {https://arxiv.org/abs/2506.00630}
- }
Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses", arXiv, May 2025.
BibTeX arXiv
- @article{Ick2025may,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses}},
- journal = {arXiv},
- year = 2025,
- month = may,
- url = {https://arxiv.org/abs/2505.13617}
- }
Araki, S., Ito, N., Haeb-Umbach, R., Wichern, G., Wang, Z.-Q., Mitsufuji, Y., "30+ Years of Source Separation Research: Achievements and Future Challenges", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2025.
BibTeX TR2025-036 PDF
- @inproceedings{Araki2025mar,
- author = {Araki, Shoko and Ito, Nobutaka and Haeb-Umbach, Reinhold and Wichern, Gordon and Wang, Zhong-Qiu and Mitsufuji, Yuki},
- title = {{30+ Years of Source Separation Research: Achievements and Future Challenges}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- url = {https://www.merl.com/publications/TR2025-036}
- }
Ebbers, J., Germain, F.G., Wilkinghoff, K., Wichern, G., Le Roux, J., "No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10890706, April 2025.
BibTeX TR2025-037 PDF
- @inproceedings{Ebbers2025mar,
- author = {Ebbers, Janek and Germain, François G and Wilkinghoff, Kevin and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10890706},
- url = {https://www.merl.com/publications/TR2025-037}
- }
Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10889481, April 2025.
BibTeX TR2025-029 PDF Software
- @inproceedings{Masuyama2025mar,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10889481},
- url = {https://www.merl.com/publications/TR2025-029}
- }
Saijo, K., Ebbers, J., Germain, F.G., Khurana, S., Wichern, G., Le Roux, J., "Leveraging Audio-Only Data for Text-Queried Target Sound Extraction", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888769, April 2025.
BibTeX TR2025-033 PDF
- @inproceedings{Saijo2025mar2,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Khurana, Sameer and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Leveraging Audio-Only Data for Text-Queried Target Sound Extraction}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10888769},
- url = {https://www.merl.com/publications/TR2025-033}
- }
Saijo, K., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Task-Aware Unified Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887819, April 2025.
BibTeX TR2025-032 PDF
- @inproceedings{Saijo2025mar,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Task-Aware Unified Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887819},
- url = {https://www.merl.com/publications/TR2025-032}
- }
Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Keeping the Balance: Anomaly Score Calculation for Domain Generalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888402, April 2025.
BibTeX TR2025-030 PDF
- @inproceedings{Wilkinghoff2025mar,
- author = {Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Keeping the Balance: Anomaly Score Calculation for Domain Generalization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10888402},
- url = {https://www.merl.com/publications/TR2025-030}
- }
Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA), April 2025.
BibTeX TR2025-045 PDF
- @inproceedings{Ick2025apr,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-045}
- }
Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3534686, Vol. 6, pp. 266-275, January 2025.
BibTeX TR2025-012 PDF Software
- @article{Koo2025jan,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 6,
- pages = {266--275},
- month = jan,
- doi = {10.1109/OJSP.2025.3534686},
- issn = {2644-1322},
- url = {https://www.merl.com/publications/TR2025-012}
- }
Chakrabarty, A., Wichern, G., Deshpande, V.M., Vinod, A.P., Berntorp, K., Laughman, C.R., "Meta-Learning for Physically-Constrained Neural System Identification", arXiv, January 2025.
BibTeX arXiv
- @article{Chakrabarty2025jan,
- author = {Chakrabarty, Ankush and Wichern, Gordon and Deshpande, Vedang M. and Vinod, Abraham P. and Berntorp, Karl and Laughman, Christopher R.},
- title = {{Meta-Learning for Physically-Constrained Neural System Identification}},
- journal = {arXiv},
- year = 2025,
- month = jan,
- url = {https://arxiv.org/abs/2501.06167v1}
- }
Chakrabarty, A., Deshpande, V.M., Wichern, G., Berntorp, K., "Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications", IEEE Conference on Decision and Control (CDC), DOI: 10.1109/CDC56724.2024.10886249, December 2024.
BibTeX TR2024-180 PDF
- @inproceedings{Chakrabarty2024dec,
- author = {Chakrabarty, Ankush and Deshpande, Vedang M. and Wichern, Gordon and Berntorp, Karl},
- title = {{Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications}},
- booktitle = {IEEE Conference on Decision and Control (CDC)},
- year = 2024,
- month = dec,
- doi = {10.1109/CDC56724.2024.10886249},
- url = {https://www.merl.com/publications/TR2024-180}
- }
Park, Y.-J., Germain, F.G., Liu, J., Wang, Y., Koike-Akino, T., Wichern, G., Laughman, C.R., Azizan, N., Chakrabarty, A., "Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?", Advances in Neural Information Processing Systems (NeurIPS), December 2024.
BibTeX TR2025-001 PDF
- @inproceedings{Park2024dec,
- author = {{{Park, Young-Jin and Germain, François G and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki and Wichern, Gordon and Laughman, Christopher R. and Azizan, Navid and Chakrabarty, Ankush}}},
- title = {{{Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?}}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2024,
- month = dec,
- url = {https://www.merl.com/publications/TR2025-001}
- }
Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Spatially-Aware Losses for Enhanced Neural Acoustic Fields", NeurIPS 2024 Audio Imagination Workshop, December 2024.
BibTeX TR2024-169 PDF
- @inproceedings{Ick2024dec,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Spatially-Aware Losses for Enhanced Neural Acoustic Fields}},
- booktitle = {NeurIPS 2024 Audio Imagination Workshop},
- year = 2024,
- month = dec,
- url = {https://www.merl.com/publications/TR2024-169}
- }
Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/IWAENC61483.2024.10694313, September 2024, pp. 205-209.
BibTeX TR2024-126 PDF Software
- @inproceedings{Saijo2024sep2,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement}},
- booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
- year = 2024,
- pages = {205--209},
- month = sep,
- doi = {10.1109/IWAENC61483.2024.10694313},
- issn = {2835-3439},
- isbn = {979-8-3503-6185-8},
- url = {https://www.merl.com/publications/TR2024-126}
- }
Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Sound Event Bounding Boxes", Interspeech, DOI: 10.21437/Interspeech.2024-2075, September 2024, pp. 562-566.
BibTeX TR2024-118 PDF Software
- @inproceedings{Ebbers2024sep,
- author = {Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Sound Event Bounding Boxes}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {562--566},
- month = sep,
- doi = {10.21437/Interspeech.2024-2075},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-118}
- }
Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/Interspeech.2024-1088, September 2024, pp. 392-396.
BibTeX TR2024-122 PDF
- @inproceedings{Khurana2024sep,
- author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{ZeroST: Zero-Shot Speech Translation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {392--396},
- month = sep,
- doi = {10.21437/Interspeech.2024-1088},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-122}
- }
Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1066, September 2024, pp. 582-586.
BibTeX TR2024-124 PDF
- @inproceedings{Pan2024sep,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
- title = {{PARIS}: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
- booktitle = {Interspeech},
- year = 2024,
- pages = {582--586},
- month = sep,
- doi = {10.21437/Interspeech.2024-1066},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-124}
- }
Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "Enhanced Reverberation as Supervision for Unsupervised Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1241, September 2024, pp. 607-611.
BibTeX TR2024-116 PDF Software
- @inproceedings{Saijo2024sep,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{Enhanced Reverberation as Supervision for Unsupervised Speech Separation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {607--611},
- month = sep,
- doi = {10.21437/Interspeech.2024-1241},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-116}
- }
He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF
- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
Uhlich, S., Fabbro, G., Hirano, M., Takahashi, S., Wichern, G., Le Roux, J., Chakraborty, D., Mohanty, S., Li, K., Luo, Y., Yu, J., Gu, R., Solovyev, R., Stempkovskiy, A., Habruseva, T., Sukhovei, M., Mitsufuji, Y., "The Sound Demixing Challenge 2023 – Cinematic Demixing Track", Transactions of the International Society for Music Information Retrieval, DOI: 10.5334/tismir.172, Vol. 7, No. 1, pp. 44-62, May 2024.
BibTeX TR2024-047 PDF
- @article{Uhlich2024may,
- author = {Uhlich, Stefan and Fabbro, Giorgio and Hirano, Masato and Takahashi, Shusuke and Wichern, Gordon and {Le Roux}, Jonathan and Chakraborty, Dipam and Mohanty, Sharada and Li, Kai and Luo, Yi and Yu, Jianwei and Gu, Rongzhi and Solovyev, Roman and Stempkovskiy, Alexander and Habruseva, Tatiana and Sukhovei, Mikhail and Mitsufuji, Yuki},
- title = {{The {S}ound {D}emixing {C}hallenge 2023 – {C}inematic {D}emixing {T}rack}},
- journal = {Transactions of the International Society for Music Information Retrieval},
- year = 2024,
- volume = 7,
- number = 1,
- pages = {44--62},
- month = may,
- doi = {10.5334/tismir.172},
- url = {https://www.merl.com/publications/TR2024-047}
- }
Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/ICASSPW62465.2024.10626914, April 2024, pp. 174-178.
BibTeX TR2024-029 PDF
- @inproceedings{Pan2024apr,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and {Le Roux}, Jonathan},
- title = {{Late Audio-Visual Fusion for In-The-Wild Speaker Diarization}},
- booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
- year = 2024,
- pages = {174--178},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSPW62465.2024.10626914},
- isbn = {979-8-3503-7451-3},
- url = {https://www.merl.com/publications/TR2024-029}
- }
Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", arXiv, April 2024.
BibTeX arXiv
- @article{Koo2024apr2,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers}},
- journal = {arXiv},
- year = 2024,
- month = apr,
- url = {https://arxiv.org/abs/2404.02252}
- }
Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), April 2024.
BibTeX TR2024-032 PDF
- @inproceedings{Koo2024apr,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads}},
- booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
- year = 2024,
- month = apr,
- url = {https://www.merl.com/publications/TR2024-032}
- }