Publications

Giammarino, V., Queeney, J., Paschalidis, I.C., "Adversarial Imitation Learning from Visual Observations using Latent Information", Transactions on Machine Learning Research (TMLR), June 2024.
BibTeX TR2024-068 PDF
- @article{Giammarino2024jun,
- author = {Giammarino, Vittorio and Queeney, James and Paschalidis, Ioannis Ch.},
- title = {{Adversarial Imitation Learning from Visual Observations using Latent Information}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2024,
- month = jun,
- issn = {2835-8856},
- url = {https://www.merl.com/publications/TR2024-068}
- }
He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF
- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
Reiter, R., Quirynen, R., Diehl, M., Di Cairano, S., "Equivariant Deep Learning of Mixed-Integer Optimal Control Solutions for Vehicle Decision Making and Motion Planning", IEEE Transactions on Control Systems Technology, DOI: 10.1109/TCST.2024.3400571, May 2024.
BibTeX TR2024-056 PDF
- @article{Reiter2024may2,
- author = {Reiter, Rudolf and Quirynen, Rien and Diehl, Moritz and {Di Cairano}, Stefano},
- title = {{Equivariant Deep Learning of Mixed-Integer Optimal Control Solutions for Vehicle Decision Making and Motion Planning}},
- journal = {IEEE Transactions on Control Systems Technology},
- year = 2024,
- month = may,
- doi = {10.1109/TCST.2024.3400571},
- url = {https://www.merl.com/publications/TR2024-056}
- }
Uhlich, S., Fabbro, G., Hirano, M., Takahashi, S., Wichern, G., Le Roux, J., Chakraborty, D., Mohanty, S., Li, K., Luo, Y., Yu, J., Gu, R., Solovyev, R., Stempkovskiy, A., Habruseva, T., Sukhovei, M., Mitsufuji, Y., "The Sound Demixing Challenge 2023 – Cinematic Demixing Track", Transactions of the International Society for Music Information Retrieval, DOI: 10.5334/tismir.172, Vol. 7, No. 1, pp. 44-62, May 2024.
BibTeX TR2024-047 PDF
- @article{Uhlich2024may,
- author = {Uhlich, Stefan and Fabbro, Giorgio and Hirano, Masato and Takahashi, Shusuke and Wichern, Gordon and {Le Roux}, Jonathan and Chakraborty, Dipam and Mohanty, Sharada and Li, Kai and Luo, Yi and Yu, Jianwei and Gu, Rongzhi and Solovyev, Roman and Stempkovskiy, Alexander and Habruseva, Tatiana and Sukhovei, Mikhail and Mitsufuji, Yuki},
- title = {{The {S}ound {D}emixing {C}hallenge 2023 – {C}inematic {D}emixing {T}rack}},
- journal = {Transactions of the International Society for Music Information Retrieval},
- year = 2024,
- volume = 7,
- number = 1,
- pages = {44--62},
- month = may,
- doi = {10.5334/tismir.172},
- url = {https://www.merl.com/publications/TR2024-047}
- }
Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Why does music source separation benefit from cacophony?", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), DOI: 10.1109/ICASSPW62465.2024.10669899, March 2024, pp. 873-877.
BibTeX TR2024-030 PDF Video
- @inproceedings{Jeon2024mar,
- author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{Why does music source separation benefit from cacophony?}},
- booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
- year = 2024,
- pages = {873--877},
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/ICASSPW62465.2024.10669899},
- isbn = {979-8-3503-7451-3},
- url = {https://www.merl.com/publications/TR2024-030}
- }
Fernandez-Menduina, S., Rapp, J., Mansour, H., Greiff, M., Parsons, K., "Tracking Beyond the Unambiguous Range with Modulo Single-Photon Lidar", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10446835, March 2024, pp. 6-10.
BibTeX TR2024-021 PDF
- @inproceedings{Fernandez-Menduina2024mar,
- author = {Fernandez-Menduina, Samuel and Rapp, Joshua and Mansour, Hassan and Greiff, Marcus and Parsons, Kieran},
- title = {{Tracking Beyond the Unambiguous Range with Modulo Single-Photon Lidar}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {6--10},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10446835},
- url = {https://www.merl.com/publications/TR2024-021}
- }
Masuyama, Y., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10448477, March 2024, pp. 1016-1020.
BibTeX TR2024-026 PDF Software
- @inproceedings{Masuyama2024mar,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {1016--1020},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10448477},
- url = {https://www.merl.com/publications/TR2024-026}
- }
Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2024.3350887, Vol. 32, pp. 1185-1197, February 2024.
BibTeX TR2024-006 PDF Software
- @article{Boeddeker2024feb,
- author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and {Le Roux}, Jonathan},
- title = {{TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2024,
- volume = 32,
- pages = {1185--1197},
- month = feb,
- doi = {10.1109/TASLP.2024.3350887},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2024-006}
- }
Ma, Y., Rapp, J., Boufounos, P.T., Mansour, H., "A model of spatial resolution uncertainty for Compton camera imaging", International Conference on Advancements in Nuclear Instrumentation Measurement Methods and their Applications (ANIMMA), DOI: 10.1051/epjconf/202328810002, January 2024, pp. 10002.
BibTeX TR2024-005 PDF
- @inproceedings{Ma2024jan,
- author = {Ma, Yanting and Rapp, Joshua and Boufounos, Petros T. and Mansour, Hassan},
- title = {{A model of spatial resolution uncertainty for Compton camera imaging}},
- booktitle = {Advancements in Nuclear Instrumentation Measurement Methods and their Applications (ANIMMA)},
- year = 2024,
- pages = 10002,
- month = jan,
- publisher = {EPJ Web of Conferences, 288},
- doi = {10.1051/epjconf/202328810002},
- url = {https://www.merl.com/publications/TR2024-005}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v38i4.28167, December 2023, pp. 3765-3773.
BibTeX TR2023-154 PDF
- @inproceedings{Liu2023dec2,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {{CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments}},
- booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
- year = 2023,
- pages = {3765--3773},
- month = dec,
- doi = {10.1609/aaai.v38i4.28167},
- url = {https://www.merl.com/publications/TR2023-154}
- }
Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU57964.2023.10389618, December 2023.
BibTeX TR2023-152 PDF Video
- @inproceedings{Pan2023dec2,
- author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2023,
- month = dec,
- doi = {10.1109/ASRU57964.2023.10389618},
- isbn = {979-8-3503-0689-7},
- url = {https://www.merl.com/publications/TR2023-152}
- }
He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023, pp. 5496-5507.
BibTeX TR2023-144 PDF
- @inproceedings{He2023dec,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
- title = {{Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- pages = {5496--5507},
- month = dec,
- url = {https://www.merl.com/publications/TR2023-144}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOBECOM54140.2023.10437920, December 2023.
BibTeX TR2023-139 PDF
- @inproceedings{Fujihashi2023dec,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication}},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2023,
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GLOBECOM54140.2023.10437920},
- issn = {2576-6813},
- isbn = {979-8-3503-1090-0},
- url = {https://www.merl.com/publications/TR2023-139}
- }
Shimane, Y., Miraldo, P., Berntorp, K., Greiff, M., Elango, P., Weiss, A., "High-Fidelity Simulation of Horizon-Based Optical Navigation with Open-Source Software", International Astronautical Congress (IAC), October 2023, pp. IAC-23,C1,5,9,x78805.
BibTeX TR2023-128 PDF
- @inproceedings{Shimane2023oct,
- author = {Shimane, Yuri and Miraldo, Pedro and Berntorp, Karl and Greiff, Marcus and Elango, Purnanand and Weiss, Avishai},
- title = {{High-Fidelity Simulation of Horizon-Based Optical Navigation with Open-Source Software}},
- booktitle = {International Astronautical Congress (IAC)},
- year = 2023,
- pages = {IAC--23,C1,5,9,x78805},
- month = oct,
- url = {https://www.merl.com/publications/TR2023-128}
- }
Qiao, H., Laughman, C.R., "Theoretical analysis of cycling losses in air source heat pump systems", International Congress of Refrigeration (ICR), DOI: 10.18462/iir.icr.2023.0773, September 2023.
BibTeX TR2023-127 PDF
- @inproceedings{Qiao2023sep,
- author = {Qiao, Hongtao and Laughman, Christopher R.},
- title = {{Theoretical analysis of cycling losses in air source heat pump systems}},
- booktitle = {International Congress of Refrigeration (ICR)},
- year = 2023,
- month = sep,
- publisher = {INTERNATIONAL INSTITUTE OF REFRIGERATION},
- doi = {10.18462/iir.icr.2023.0773},
- issn = {0151-1637},
- isbn = {978-2-36215-055-5},
- url = {https://www.merl.com/publications/TR2023-127}
- }
Falcon Perez, R., Wichern, G., Germain, F., Le Roux, J., "Location as supervision for weakly supervised multi-channel source separation of machine sounds", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA58266.2023.10248128, September 2023.
BibTeX TR2023-119 PDF Presentation
- @inproceedings{FalconPerez2023aug,
- author = {Falcon Perez, Ricardo and Wichern, Gordon and Germain, Francois and {Le Roux}, Jonathan},
- title = {{Location as supervision for weakly supervised multi-channel source separation of machine sounds}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2023,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/WASPAA58266.2023.10248128},
- issn = {1947-1629},
- isbn = {979-8-3503-2372-6},
- url = {https://www.merl.com/publications/TR2023-119}
- }
Petermann, D., Wichern, G., Subramanian, A.S., Wang, Z.-Q., Le Roux, J., "Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3290428, Vol. 31, pp. 2592-2605, September 2023.
BibTeX TR2023-113 PDF
- @article{Petermann2023sep,
- author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- volume = 31,
- pages = {2592--2605},
- month = sep,
- doi = {10.1109/TASLP.2023.3290428},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2023-113}
- }
Salatiello, A., Wang, Y., Wichern, G., Koike-Akino, T., Yoshihiro, O., Kaneko, Y., Laughman, C.R., Chakrabarty, A., "Synthesizing Building Operation Data with Generative Models: VAEs, GANs, or Something In Between?", ACM e-Energy Conference, DOI: 10.1145/3599733.3600260, June 2023.
BibTeX TR2023-072 PDF
- @inproceedings{Salatiello2023jun,
- author = {Salatiello, Alessandro and Wang, Ye and Wichern, Gordon and Koike-Akino, Toshiaki and Yoshihiro, Ohta and Kaneko, Yosuke and Laughman, Christopher R. and Chakrabarty, Ankush},
- title = {{Synthesizing Building Operation Data with Generative Models: VAEs, GANs, or Something In Between?}},
- booktitle = {ACM e-Energy Conference},
- year = 2023,
- month = jun,
- doi = {10.1145/3599733.3600260},
- url = {https://www.merl.com/publications/TR2023-072}
- }
Chen, K., Wichern, G., Germain, F., Le Roux, J., "Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT", IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB), DOI: 10.1109/ICASSPW59220.2023.10193575, May 2023.
BibTeX TR2023-030 PDF
- @inproceedings{Chen2023may,
- author = {Chen, Ke and Wichern, Gordon and Germain, Francois and {Le Roux}, Jonathan},
- title = {{Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT}},
- booktitle = {IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB)},
- year = 2023,
- month = may,
- doi = {10.1109/ICASSPW59220.2023.10193575},
- isbn = {979-8-3503-0261-5},
- url = {https://www.merl.com/publications/TR2023-030}
- }
Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A.S., Le Roux, J., "Reverberation as Supervision for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095022, May 2023, pp. 1-5.
BibTeX TR2023-016 PDF
- @inproceedings{Aralikatti2023may,
- author = {Aralikatti, Rohith and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin Shanmugam and {Le Roux}, Jonathan},
- title = {{Reverberation as Supervision for Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095022},
- url = {https://www.merl.com/publications/TR2023-016}
- }
Bralios, D., Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Latent Iterative Refinement for Modular Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096897, May 2023, pp. 1-5.
BibTeX TR2023-019 PDF
- @inproceedings{Bralios2023may,
- author = {Bralios, Dimitrios and Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and {Le Roux}, Jonathan},
- title = {{Latent Iterative Refinement for Modular Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096897},
- url = {https://www.merl.com/publications/TR2023-019}
- }
Jeon, E.S., Lohit, S., Anirudh, R., Turaga, P., "Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10096888, May 2023.
BibTeX TR2023-021 PDF Presentation
- @inproceedings{Jeon2023may,
- author = {Jeon, Eun Som and Lohit, Suhas and Anirudh, Rushil and Turaga, Pavan},
- title = {{Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10096888},
- url = {https://www.merl.com/publications/TR2023-021}
- }
Petermann, D., Wichern, G., Subramanian, A.S., Le Roux, J., "Hyperbolic Audio Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10094943, May 2023, pp. 1-5.
BibTeX TR2023-017 PDF Video Software
- @inproceedings{Petermann2023may,
- author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and {Le Roux}, Jonathan},
- title = {{Hyperbolic Audio Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10094943},
- url = {https://www.merl.com/publications/TR2023-017}
- }
Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Optimal Condition Training for Target Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095128, May 2023, pp. 1-5.
BibTeX TR2023-018 PDF
- @inproceedings{Tzinis2023may,
- author = {Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and {Le Roux}, Jonathan},
- title = {{Optimal Condition Training for Target Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- pages = {1--5},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095128},
- url = {https://www.merl.com/publications/TR2023-018}
- }
Aich, A., Peng, K.-C., Roy-Chowdhury, A.K., "Cross-Domain Video Anomaly Detection without Target Domain Adaptation", IEEE Winter Conference on Applications of Computer Vision (WACV), Crandall, D. and Gong, B. and Lee, Y. J. and Souvenir, R. and Yu, S., Eds., DOI: 10.1109/WACV56688.2023.00261, January 2023, pp. 2578-2590.
BibTeX TR2023-001 PDF Video Presentation
- @inproceedings{Aich2023jan,
- author = {Aich, Abhishek and Peng, Kuan-Chuan and Roy-Chowdhury, Amit K.},
- title = {{Cross-Domain Video Anomaly Detection without Target Domain Adaptation}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- editor = {Crandall, D. and Gong, B. and Lee, Y. J. and Souvenir, R. and Yu, S.},
- pages = {2578--2590},
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/WACV56688.2023.00261},
- issn = {2642-9381},
- isbn = {978-1-6654-9346-8},
- url = {https://www.merl.com/publications/TR2023-001}
- }