Publications

Jung, Y.G., Park, J., Yoon, J., Peng, K.-C., Kim, W., Teoh, A.B.J., Camps, O., "TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Isola, P. and Kjellström, H. and Lepetit, V. and Li, F. and Su, H. and Tang, S., Eds., DOI: 10.1109/CVPR52734.2025.02378, June 2025, pp. 25539-25548.
BibTeX TR2025-077 PDF Video Presentation
- @inproceedings{Jung2025jun,
- author = {{{Jung, Yoon G. and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew B. J. and Camps, Octavia}}},
- title = {{{TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- editor = {Isola, P. and Kjellström, H. and Lepetit, V. and Li, F. and Su, H. and Tang, S.},
- pages = {25539--25548},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52734.2025.02378},
- issn = {2575-7075},
- isbn = {979-8-3315-4364-8},
- url = {https://www.merl.com/publications/TR2025-077}
- }
Lai, Y.-H., Ebbers, J., Wang, Y.-C.F., Germain, F.G., Jones, M.J., Chatterjee, M., "UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR52734.2025.01266, June 2025, pp. 13561-13570.
BibTeX TR2025-072 PDF
- @inproceedings{Lai2025jun,
- author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, François G and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- pages = {13561--13570},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52734.2025.01266},
- url = {https://www.merl.com/publications/TR2025-072}
- }
Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Multimodal 3D Object Detection on Unseen Domains", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025, pp. 2499-2509.
BibTeX TR2025-078 PDF
- @inproceedings{Hegde2025jun,
- author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.},
- title = {{Multimodal 3D Object Detection on Unseen Domains}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- pages = {2499--2509},
- month = jun,
- url = {https://www.merl.com/publications/TR2025-078}
- }
Koike-Akino, T., Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., "LatentLLM: Attention-Aware Joint Tensor Compression", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025.
BibTeX TR2025-075 PDF Presentation
- @inproceedings{Koike-Akino2025jun,
- author = {{{Koike-Akino, Toshiaki and Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew}}},
- title = {{{LatentLLM: Attention-Aware Joint Tensor Compression}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-075}
- }
Li, K., Zhang, T., Peng, K.-C., Wang, G., "PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025, pp. 3787-3796.
BibTeX TR2025-076 PDF Presentation
- @inproceedings{Li2025jun,
- author = {{{Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}}},
- title = {{{PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- pages = {3787--3796},
- month = jun,
- url = {https://www.merl.com/publications/TR2025-076}
- }
Chen, X., Liu, J., Wang, Y., Brand, M., Wang, P., Koike-Akino, T., "TuneComp: Joint Fine-Tuning and Compression for Large Foundation Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) workshop on Efficient and On-Device Generation, June 2025.
BibTeX TR2025-079 PDF Presentation
- @inproceedings{Chen2025jun,
- author = {{{Chen, Xiangyu and Liu, Jing and Wang, Ye and Brand, Matthew and Wang, Pu and Koike-Akino, Toshiaki}}},
- title = {{{TuneComp: Joint Fine-Tuning and Compression for Large Foundation Models}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) workshop on Efficient and On-Device Generation},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-079}
- }
Sawada, N., Miraldo, P., Lohit, S., Marks, T.K., Chatterjee, M., "FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations", IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR), DOI: 10.1109/CVPRW67362.2025.00041, June 2025, pp. 369-379.
BibTeX TR2025-074 PDF
- @inproceedings{Sawada2025jun,
- author = {Sawada, Naoko and Miraldo, Pedro and Lohit, Suhas and Marks, Tim K. and Chatterjee, Moitreya},
- title = {{FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR)},
- year = 2025,
- pages = {369--379},
- month = jun,
- doi = {10.1109/CVPRW67362.2025.00041},
- url = {https://www.merl.com/publications/TR2025-074}
- }
Ni, Y., Wen, S., Koniusz, P., Cherian, A., "Noise Consistency Regularization for Improved Subject-Driven Image Synthesis", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR), June 2025, pp. 3116-3126.
BibTeX TR2025-073 PDF
- @inproceedings{Ni2025jun,
- author = {Ni, Yao and Wen, Song and Koniusz, Piotr and Cherian, Anoop},
- title = {{Noise Consistency Regularization for Improved Subject-Driven Image Synthesis}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR)},
- year = 2025,
- pages = {3116--3126},
- month = jun,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-073}
- }
Hazarika, A., Guo, J., Parsons, K., Nagai, Y., Sumi, T., Orlik, P.V., Rahmati, M., "AgriNex: Next-Gen Smart Agriculture with LLM-Integrated UAV-IoT Solutions", IEEE International Conference on Communications Workshops (ICC), June 2025.
BibTeX TR2025-081 PDF
- @inproceedings{Hazarika2025jun,
- author = {Hazarika, Ananya and Guo, Jianlin and Parsons, Kieran and Nagai, Yukimasa and Sumi, Takenori and Orlik, Philip V. and Rahmati, Mehdi},
- title = {{AgriNex: Next-Gen Smart Agriculture with LLM-Integrated UAV-IoT Solutions}},
- booktitle = {IEEE International Conference on Communications Workshops (ICC)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-081}
- }
Lu, K., Ma, C., Hori, C., Romeres, D., "KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks", IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA), May 2025.
BibTeX TR2025-068 PDF
- @inproceedings{Lu2025may,
- author = {Lu, Kai and Ma, Chenyang and Hori, Chiori and Romeres, Diego},
- title = {{KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks}},
- booktitle = {IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA)},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-068}
- }
Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025, pp. 6449-6458.
BibTeX TR2025-058 PDF
- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- pages = {6449--6458},
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
Basu, S., Lohit, S., Brand, M., "G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups", Transactions on Machine Learning Research (TMLR), May 2025.
BibTeX TR2025-056 PDF Software
- @article{Basu2025may,
- author = {Basu, Sourya and Lohit, Suhas and Brand, Matthew},
- title = {{G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2025,
- month = may,
- issn = {2835-8856},
- url = {https://www.merl.com/publications/TR2025-056}
- }
Masuyama, Y., Chang, X., Zhang, W., Cornell, S., Wang, Z.-Q., Ono, N., Qian, Y., Watanabe, S., "An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation", Computer Speech & Language, DOI: 10.1016/j.csl.2025.101813, Vol. 95, pp. 101813, May 2025.
BibTeX TR2025-054 PDF
- @article{Masuyama2025may,
- author = {Masuyama, Yoshiki and Chang, Xuankai and Zhang, Wangyou and Cornell, Samuele and Wang, Zhong-Qiu and Ono, Nobutaka and Qian, Yanmin and Watanabe, Shinji},
- title = {{An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation}},
- journal = {Computer Speech \& Language},
- year = 2025,
- volume = 95,
- pages = 101813,
- month = may,
- doi = {10.1016/j.csl.2025.101813},
- issn = {0885-2308},
- url = {https://www.merl.com/publications/TR2025-054}
- }
Koike-Akino, T., Tonin, F., Wu, Y., Wu, F.Z., Candogan, L.N., Cevher, V., "Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning", International Conference on Learning Representations (ICLR), April 2025.
BibTeX TR2025-051 PDF
- @inproceedings{Koike-Akino2025apr,
- author = {Koike-Akino, Toshiaki and Tonin,Francesco and Wu,Yongtao and Wu,Frank Zhengqing and Candogan,Leyla Naz and Cevher, Volkan},
- title = {{Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning}},
- booktitle = {International Conference on Learning Representations (ICLR)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-051}
- }
Tang, H., Ellis, K., Lohit, S., Jones, M.J., Chatterjee, M., "Programmatic Video Prediction Using Large Language Models", International Conference on Learning Representations Workshops (ICLRW), April 2025.
BibTeX TR2025-049 PDF
- @inproceedings{Tang2025apr,
- author = {Tang, Hao and Ellis, Kevin and Lohit, Suhas and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{Programmatic Video Prediction Using Large Language Models}},
- booktitle = {International Conference on Learning Representations Workshops (ICLRW)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-049}
- }
Araki, S., Ito, N., Haeb-Umbach, R., Wichern, G., Wang, Z.-Q., Mitsufuji, Y., "30+ Years of Source Separation Research: Achievements and Future Challenges", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10889006, April 2025, pp. 1-5.
BibTeX TR2025-036 PDF
- @inproceedings{Araki2025mar,
- author = {Araki, Shoko and Ito, Nobutaka and Haeb-Umbach, Reinhold and Wichern, Gordon and Wang, Zhong-Qiu and Mitsufuji, Yuki},
- title = {{30+ Years of Source Separation Research: Achievements and Future Challenges}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- pages = {1--5},
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10889006},
- url = {https://www.merl.com/publications/TR2025-036}
- }
Ebbers, J., Germain, F.G., Wilkinghoff, K., Wichern, G., Le Roux, J., "No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10890706, April 2025.
BibTeX TR2025-037 PDF
- @inproceedings{Ebbers2025mar,
- author = {Ebbers, Janek and Germain, François G and Wilkinghoff, Kevin and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10890706},
- url = {https://www.merl.com/publications/TR2025-037}
- }
Gruttadauria, E., Fontaine, M., Le Roux, J., Essid, S., "O-EENC-SD: Efficient Online End-to-End Neural Clustering for Speaker Diarization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10890837, April 2025.
BibTeX TR2025-031 PDF
- @inproceedings{Gruttadauria2025mar,
- author = {Gruttadauria, Elio and Fontaine, Mathieu and {Le Roux}, Jonathan and Essid, Slim},
- title = {{{O-EENC-SD}: Efficient Online End-to-End Neural Clustering for Speaker Diarization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10890837},
- url = {https://www.merl.com/publications/TR2025-031}
- }
Hori, C., Kambara, M., Sugiura, K., Ota, K., Khurana, S., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Interactive Robot Action Replanning using Multimodal LLM Trained from Human Demonstration Videos", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887717, April 2025.
BibTeX TR2025-034 PDF
- @inproceedings{Hori2025mar,
- author = {Hori, Chiori and Kambara, Motonari and Sugiura, Komei and Ota, Kei and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Interactive Robot Action Replanning using Multimodal {LLM} Trained from Human Demonstration Videos}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887717},
- url = {https://www.merl.com/publications/TR2025-034}
- }
Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10889481, April 2025.
BibTeX TR2025-029 PDF Software
- @inproceedings{Masuyama2025mar,
- author = {{{Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and Le Roux, Jonathan}}},
- title = {{{Retrieval-Augmented Neural Field for HRTF Upsampling and Personalization}}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = apr,
- doi = {10.1109/ICASSP49660.2025.10889481},
- url = {https://www.merl.com/publications/TR2025-029}
- }
Saijo, K., Ebbers, J., Germain, F.G., Khurana, S., Wichern, G., Le Roux, J., "Leveraging Audio-Only Data for Text-Queried Target Sound Extraction", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888769, April 2025.
BibTeX TR2025-033 PDF
- @inproceedings{Saijo2025mar2,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Khurana, Sameer and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Leveraging Audio-Only Data for Text-Queried Target Sound Extraction}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10888769},
- url = {https://www.merl.com/publications/TR2025-033}
- }
Saijo, K., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Task-Aware Unified Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10887819, April 2025.
BibTeX TR2025-032 PDF Software
- @inproceedings{Saijo2025mar,
- author = {Saijo, Kohei and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Task-Aware Unified Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- doi = {10.1109/ICASSP49660.2025.10887819},
- url = {https://www.merl.com/publications/TR2025-032}
- }
Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Keeping the Balance: Anomaly Score Calculation for Domain Generalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10888402, April 2025.
BibTeX TR2025-030 PDF Software
- @inproceedings{Wilkinghoff2025mar,
- author = {{{Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and Le Roux, Jonathan}}},
- title = {{{Keeping the Balance: Anomaly Score Calculation for Domain Generalization}}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = apr,
- doi = {10.1109/ICASSP49660.2025.10888402},
- url = {https://www.merl.com/publications/TR2025-030}
- }
Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA), April 2025.
BibTeX TR2025-045 PDF
- @inproceedings{Ick2025apr,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Data Augmentation Using Neural Acoustic Fields With Retrieval-Augmented Pre-training}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) Satellite Workshop on Generative Data Augmentation for Real-World Signal Processing Applications (GenDA)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-045}
- }
Smedemark-Margulies, N., Wang, Y., Koike-Akino, T., Liu, J., Parsons, K., Bicer, Y., Erdogmus, D., "Improving Subject Transfer in EEG Classification with Divergence Estimation", Journal of Neural Engineering, DOI: 10.1088/1741-2552/ad9777, Vol. 21, No. 6, April 2025.
BibTeX TR2025-044 PDF Software
- @article{Smedemark-Margulies2025apr,
- author = {Smedemark-Margulies, Niklas and Wang, Ye and Koike-Akino, Toshiaki and Liu, Jing and Parsons, Kieran and Bicer, Yunus and Erdogmus, Deniz},
- title = {{Improving Subject Transfer in EEG Classification with Divergence Estimation}},
- journal = {Journal of Neural Engineering},
- year = 2025,
- volume = 21,
- number = 6,
- month = apr,
- doi = {10.1088/1741-2552/ad9777},
- url = {https://www.merl.com/publications/TR2025-044}
- }