Publications

590 / 3,592 publications found.


  •  Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), April 2024.
    BibTeX TR2024-029 PDF
    • @inproceedings{Pan2024apr,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and Le Roux, Jonathan},
    • title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
    • booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-029}
    • }
  •  Gaur, A., Pais, G., Miraldo, P., "Oriented-grid Encoder for 3D Implicit Representations", International Conference on 3D Vision (3DV), March 2024.
    BibTeX TR2024-031 PDF
    • @inproceedings{Gaur2024mar,
    • author = {Gaur, Arihant and Pais, Goncalo and Miraldo, Pedro},
    • title = {Oriented-grid Encoder for 3D Implicit Representations},
    • booktitle = {International Conference on 3D Vision (3DV)},
    • year = 2024,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2024-031}
    • }
  •  Sholokhov, A., Rapp, J., Nabi, S., Brunton, S., Kutz, N., Mansour, H., "Single-pixel imaging of dynamic flows using Neural ODE regularization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2024.
    BibTeX TR2024-024 PDF
    • @inproceedings{Sholokhov2024mar,
    • author = {Sholokhov, Aleksei and Rapp, Joshua and Nabi, Saleh and Brunton, Steven and Kutz, Nathan and Mansour, Hassan},
    • title = {Single-pixel imaging of dynamic flows using Neural ODE regularization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2024-024}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2024.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
    • title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Srinivas, N., Vinod, A.P., Di Cairano, S., Weiss, A., "Lunar Landing with Feasible Divert using Controllable Sets", AIAA SciTech, DOI: 10.2514/​6.2024-0324, January 2024, pp. AIAA 2024-0324.
    BibTeX TR2024-004 PDF
    • @inproceedings{Srinivas2024jan,
    • author = {Srinivas, Neeraj and Vinod, Abraham P. and Di Cairano, Stefano and Weiss, Avishai},
    • title = {Lunar Landing with Feasible Divert using Controllable Sets},
    • booktitle = {AIAA SCITECH 2024 Forum},
    • year = 2024,
    • pages = {AIAA 2024--0324},
    • month = jan,
    • doi = {10.2514/6.2024-0324},
    • url = {https://www.merl.com/publications/TR2024-004}
    • }
  •  Carmichael, Z., Jones, L.S., Cherian, A., Michael J., , Scheirer, W., "Pixel-Grounded Prototypical Part Networks", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2024.
    BibTeX TR2024-002 PDF Presentation
    • @inproceedings{Carmichael2024jan,
    • author = {Carmichael, Zachariah and Jones, Lohit, Suhas and Cherian, Anoop and Michael J. and Scheirer, Walter},
    • title = {Pixel-Grounded Prototypical Part Networks},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2024,
    • month = jan,
    • url = {https://www.merl.com/publications/TR2024-002}
    • }
  •  Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, December 2023.
    BibTeX TR2023-154 PDF
    • @inproceedings{Liu2023dec2,
    • author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
    • title = {CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2023,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2023-154}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }
  •  He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023.
    BibTeX TR2023-144 PDF
    • @inproceedings{He2023dec,
    • author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
    • title = {Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2023,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2023-144}
    • }
  •  Delattre, F., Dirnfeld, D., Nguyen, P., Scarano, S., Jones, M.J., Miraldo, P., Learned-Miller, E., "Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV51070.2023.00894, October 2023, pp. 3715-3724.
    BibTeX TR2023-123 PDF Video
    • @inproceedings{Delattre2023oct,
    • author = {Delattre, Fabien and Dirnfeld, David and Nguyen, Phat and Scarano, Stephen and Jones, Michael J. and Miraldo, Pedro and Learned-Miller, Erik},
    • title = {Robust Frame-to-Frame Camera Rotation Estimation in Crowded Scenes},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2023,
    • pages = {3715--3724},
    • month = oct,
    • publisher = {IEEE/CVF},
    • doi = {10.1109/ICCV51070.2023.00894},
    • issn = {2380-7504},
    • isbn = {979-8-3503-0718-4},
    • url = {https://www.merl.com/publications/TR2023-123}
    • }
  •  Miraldo, P., Piedade, V., "BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV51070.2023.00346, October 2023, pp. 3715-3724.
    BibTeX TR2023-124 PDF Video Software
    • @inproceedings{Miraldo2023oct,
    • author = {Miraldo, Pedro and Piedade, Valter},
    • title = {BANSAC: A dynamic BAyesian Network for adaptive SAmple Consensus},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2023,
    • pages = {3715--3724},
    • month = oct,
    • publisher = {IEEE/CVF},
    • doi = {10.1109/ICCV51070.2023.00346},
    • issn = {2380-7504},
    • isbn = {979-8-3503-0718-4},
    • url = {https://www.merl.com/publications/TR2023-124}
    • }
  •  Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
    BibTeX TR2023-126 PDF Presentation
    • @inproceedings{Nair2023sep,
    • author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
    • title = {Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis},
    • booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
    • year = 2023,
    • pages = {20850--20860},
    • month = oct,
    • publisher = {IEEE/CVF},
    • url = {https://www.merl.com/publications/TR2023-126}
    • }
  •  Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
    BibTeX TR2023-125 PDF Presentation
    • @inproceedings{Sharma2023oct,
    • author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
    • title = {Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection},
    • booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
    • year = 2023,
    • pages = {924--932},
    • month = oct,
    • url = {https://www.merl.com/publications/TR2023-125}
    • }
  •  Shenoy, V., Marks, T.K., Mansour, H., Lohit, S., "Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/​ICIP49359.2023.10222169, September 2023, pp. 2715-2719.
    BibTeX TR2023-116 PDF Video
    • @inproceedings{Shenoy2023sep,
    • author = {Shenoy, Vineet and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas},
    • title = {Unrolled IPPG: Video Heart Rate Esitmation via Unrolling Proximal Gradient Descent},
    • booktitle = {IEEE International Conference on Image Processing (ICIP)},
    • year = 2023,
    • pages = {2715--2719},
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/ICIP49359.2023.10222169},
    • isbn = {978-1-7281-9835-4},
    • url = {https://www.merl.com/publications/TR2023-116}
    • }
  •  Yoshino, K., Chen, Y.-N., Crook, P., Kottur, S., Li, J., Hedayatnia, B., Moon, S., Fe, Z., Li, Z., Zhang, J., Fen, Y., Zhou, J., Kim, S., Liu, Y., Jin, D., Papangelis, A., Gopalakrishnan, K., Hakkani-Tur, D., Damavandi, B., Geramifard, A., <br /><br /> Hori, C., Shah, A., Zhang, C., Li, H., Sedoc, J., D’Haro, L.F., Banchs, R., Rudnicky, A., "Overview of the Tenth Dialog System Technology Challenge: DSTC10", IEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2023.3293030, pp. 1-14, August 2023.
    BibTeX TR2023-109 PDF
    • @article{Yoshino2023aug,
    • author = {Yoshino, Koichiro and Chen, Yun-Nung and Crook, Paul and Kottur, Satwik and Li, Jinchao and Hedayatnia, Behnam and Moon, Seungwhan and Fe, Zhengcong and Li, Zekang and Zhang, Jinchao and Fen, Yang and Zhou, Jie and Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and Hakkani-Tur, Dilek and Damavandi, Babak and Geramifard, Alborz and

      Hori, Chiori and Shah, Ankit and Zhang, Chen and Li, Haizhou and Sedoc, João and D’Haro, Luis F. and Banchs, Rafael and Rudnicky, Alexander},
    • title = {Overview of the Tenth Dialog System Technology Challenge: DSTC10},
    • journal = {IEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2023,
    • pages = {1--14},
    • month = aug,
    • doi = {10.1109/TASLP.2023.3293030},
    • issn = {2329-9290},
    • url = {https://www.merl.com/publications/TR2023-109}
    • }
  •  Hori, C., Peng, P., Harwath, D., Liu, X., Ota, K., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos", Interspeech, DOI: 10.21437/​Interspeech.2023-1983, August 2023, pp. 4663-4667.
    BibTeX TR2023-104 PDF
    • @inproceedings{Hori2023aug,
    • author = {Hori, Chiori and Peng, Puyuang and Harwath, David and Liu, Xinyu and Ota, Kei and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and Le Roux, Jonathan},
    • title = {Style-transfer based Speech and Audio-visual Scene understanding for Robot Action Sequence Acquisition from Videos},
    • booktitle = {Interspeech},
    • year = 2023,
    • pages = {4663--4667},
    • month = aug,
    • doi = {10.21437/Interspeech.2023-1983},
    • url = {https://www.merl.com/publications/TR2023-104}
    • }
  •  Singh, A., Jones, M.J., Learned-Miller, E., "EVAL: Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR52729.2023.01795, June 2023.
    BibTeX TR2023-071 PDF Video Presentation
    • @inproceedings{Singh2023jun,
    • author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik},
    • title = {EVAL: Explainable Video Anomaly Localization},
    • booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2023,
    • month = jun,
    • doi = {10.1109/CVPR52729.2023.01795},
    • url = {https://www.merl.com/publications/TR2023-071}
    • }
  •  Jha, D.K., Jain, S., Romeres, D., Yerazunis, W.S., Nikovski, D., "Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control", European Control Conference (ECC), DOI: 10.23919/​ECC57647.2023.10178330, May 2023, pp. 1-8.
    BibTeX TR2023-065 PDF
    • @inproceedings{Jha2023may,
    • author = {Jha, Devesh K. and Jain, Siddarth and Romeres, Diego and Yerazunis, William S. and Nikovski, Daniel},
    • title = {Generalizable Human-Robot Collaborative Assembly Using Imitation Learning and Force Control},
    • booktitle = {European Control Conference (ECC)},
    • year = 2023,
    • pages = {1--8},
    • month = may,
    • doi = {10.23919/ECC57647.2023.10178330},
    • url = {https://www.merl.com/publications/TR2023-065}
    • }
  •  Skibik, T., Vinod, A.P., Weiss, A., Di Cairano, S., "MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving", American Control Conference (ACC), May 2023, pp. 1122-1128.
    BibTeX TR2023-055 PDF
    • @inproceedings{Skibik2023may,
    • author = {Skibik, Terrence and Vinod, Abraham P. and Weiss, Avishai and Di Cairano, Stefano},
    • title = {MPC with Integrated Evasive Maneuvers for Failure-safe Automated Driving},
    • booktitle = {American Control Conference (ACC)},
    • year = 2023,
    • pages = {1122--1128},
    • month = may,
    • url = {https://www.merl.com/publications/TR2023-055}
    • }
  •  Jain, S., Corcodel, R., Jha, D.K., Romeres, D., "Vision Guided Food Assembly by Robot Teaching from Target Composition", ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork, May 2023.
    BibTeX TR2023-067 PDF
    • @inproceedings{Jain2023may,
    • author = {Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego},
    • title = {Vision Guided Food Assembly by Robot Teaching from Target Composition},
    • booktitle = {ICRA 2023 Workshop on Task-Informed Grasping IV (TIG-IV): From Farm to Fork},
    • year = 2023,
    • month = may,
    • url = {https://www.merl.com/publications/TR2023-067}
    • }
  •  Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/​ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
    BibTeX TR2023-010 PDF Presentation
    • @inproceedings{Cherian2023may,
    • author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
    • title = {Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
    • year = 2023,
    • pages = {9296--9302},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICRA48891.2023.10160644},
    • url = {https://www.merl.com/publications/TR2023-010}
    • }
  •  Ota, K., Tung, H.-Y., Smith, K., Cherian, A., Marks, T.K., Sullivan, A., Kanezaki, A., Tenenbaum, J.B., "H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/​ICRA48891.2023.10160575, May 2023, pp. 7272-7278.
    BibTeX TR2023-009 PDF
    • @inproceedings{Ota2023may,
    • author = {Ota, Kei and Tung, Hsiao-Yu and Smith, Kevin and Cherian, Anoop and Marks, Tim K. and Sullivan, Alan and Kanezaki, Asako and Tenenbaum, Joshua B.},
    • title = {H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
    • year = 2023,
    • pages = {7272--7278},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICRA48891.2023.10160575},
    • url = {https://www.merl.com/publications/TR2023-009}
    • }
  •  Shah, A., Roy, A., Shah, K., Mishra, S.K., Jacobs, D., Cherian, A., Chellappa, R., "HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 18846-18856.
    BibTeX TR2023-035 PDF
    • @inproceedings{Shah2023may,
    • author = {Shah, Anshul and Roy, Aniket and Shah, Ketul and Mishra, Shlok Kumar and Jacobs, David and Cherian, Anoop and Chellappa, Rama},
    • title = {HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2023,
    • pages = {18846--18856},
    • month = may,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2023-035}
    • }
  •  Zhang, J., Cherian, A., Liu, Y., Shabat, I.B., Rodriguez, C., Gould, S., "Aligning Step-by-Step Instructional Diagrams to Video Demonstrations", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 2483-2492.
    BibTeX TR2023-034 PDF
    • @inproceedings{Zhang2023may,
    • author = {Zhang, Jiahao and Cherian, Anoop and Liu, Yanbin and Shabat, Itzik Ben and Rodriguez, Cristian and Gould, Stephen},
    • title = {Aligning Step-by-Step Instructional Diagrams to Video Demonstrations},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2023,
    • pages = {2483--2492},
    • month = may,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2023-034}
    • }
  •  Jeon, E.S., Lohit, S., Anirudh, R., Turaga, P., "Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096888, May 2023.
    BibTeX TR2023-021 PDF Presentation
    • @inproceedings{Jeon2023may,
    • author = {Jeon, Eun Som and Lohit, Suhas and Anirudh, Rushil and Turaga, Pavan},
    • title = {Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096888},
    • url = {https://www.merl.com/publications/TR2023-021}
    • }