Publications

Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Zhu, X., Jain, S., Tomizuka, M., van Baar, J., "Learning to Synthesize Volumetric Meshes from Vision-based Tactile Imprints", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA46639.2022.9812092, May 2022, pp. 4833-4839.
BibTeX TR2022-055 PDF
- @inproceedings{Zhu2022may2,
- author = {Zhu, Xinghao and Jain, Siddarth and Tomizuka, Masayoshi and van Baar, Jeroen},
- title = {Learning to Synthesize Volumetric Meshes from Vision-based Tactile Imprints},
- booktitle = {2022 IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2022,
- pages = {4833--4839},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA46639.2022.9812092},
- isbn = {978-1-7281-9681-7},
- url = {https://www.merl.com/publications/TR2022-055}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
- title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Le Roux, Jonathan and Marks, Tim K.},
- title = {Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Shah, A.P., Hori, T., Le Roux, J., Hori, C., DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning, February 2022.
BibTeX TR2022-025 PDF
- @book{Shah2022feb,
- author = {Shah, Ankit Parag and Hori, Takaaki and Le Roux, Jonathan and Hori, Chiori},
- title = {DSTC10-AVSD Submission System with Reasoning using Audio-Visual Transformers with Joint Student-Teacher Learning},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-025}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and Le Roux, Jonathan},
- title = {(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Ke, L., Peng, K.-C., Lyu, S., "Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition", AAAI Conference on Artificial Intelligence, February 2022.
BibTeX TR2022-015 PDF Presentation
- @inproceedings{Ke2022feb,
- author = {Ke, Lipeng and Peng, Kuan-Chuan and Lyu, Siwei},
- title = {Towards To-a-T Spatio-Temporal Focus for Skeleton-Based Action Recognition},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-015}
- }
Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i8.20796, February 2022, pp. 8220-8230.
BibTeX TR2022-013 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
- title = {Max-Margin Contrastive Learning},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {8220--8230},
- month = feb,
- doi = {10.1609/aaai.v36i8.20796},
- url = {https://www.merl.com/publications/TR2022-013}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Lohit, S., Jones, M.J., "Model Compression Using Optimal Transport", IEEE Winter Conference on Applications of Computer Vision (WACV), January 2022.
BibTeX TR2022-006 PDF Presentation
- @inproceedings{Lohit2022jan,
- author = {Lohit, Suhas and Jones, Michael J.},
- title = {Model Compression Using Optimal Transport},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2022,
- month = jan,
- publisher = {CVF OpenAccess},
- url = {https://www.merl.com/publications/TR2022-006}
- }
Yu, X., van Baar, J., Chen, S., Sullivan, A., "Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph", International Conference on 3D Vision (3DV), DOI: 10.1109/3DV53792.2021.00060, December 2021, pp. 505-514.
BibTeX TR2021-143 PDF
- @inproceedings{Yu2021dec,
- author = {Yu, Xin and van Baar, Jeroen and Chen, Siheng and Sullivan, Alan},
- title = {Keypoint-aligned 3D Human Shape Recovery from A Single Imagewith Bilayer-Graph},
- booktitle = {International Conference on 3D Vision (3DV)},
- year = 2021,
- pages = {505--514},
- month = dec,
- doi = {10.1109/3DV53792.2021.00060},
- url = {https://www.merl.com/publications/TR2021-143}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and Le Roux, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {Visual Scene Graphs for Audio Source Separation},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software Presentation
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Kannapiran, S., van Baar, J., Berman, S., "A Visual Inertial Odometry Framework for 3D Points, Lines and Planes", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), DOI: 10.1109/IROS51168.2021.9636526, September 2021.
BibTeX TR2021-131 PDF
- @inproceedings{Kannapiran2021sep,
- author = {Kannapiran, Shenbagaraj and van Baar, Jeroen and Berman, Spring},
- title = {A Visual Inertial Odometry Framework for 3D Points, Lines and Planes},
- booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2021,
- month = sep,
- doi = {10.1109/IROS51168.2021.9636526},
- url = {https://www.merl.com/publications/TR2021-131}
- }
Comas, A., Marks, T.K., Mansour, H., Lohit, S., Ma, Y., Liu, X., "TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506663, September 2021, pp. 309-313.
BibTeX TR2021-099 PDF
- @inproceedings{Comas2021sep,
- author = {Comas, Armand and Marks, Tim K. and Mansour, Hassan and Lohit, Suhas and Ma, Yechi and Liu, Xiaoming},
- title = {TURNIP: Time-series U-NET with Recurrence for NIR Imaging PPG},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {309--313},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506663},
- url = {https://www.merl.com/publications/TR2021-099}
- }
Das, P., Ortega, A., Chen, S., Mansour, H., Vetro, A., "Application-agnostic spatio-temporal hand graph representations for stable activity understanding", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506054, September 2021, pp. 1074-1078.
BibTeX TR2021-112 PDF
- @inproceedings{Das2021sep,
- author = {Das, Pratyusha and Ortega, Antonio and Chen, Siheng and Mansour, Hassan and Vetro, Anthony},
- title = {Application-agnostic spatio-temporal hand graph representations for stable activity understanding},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- pages = {1074--1078},
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506054},
- url = {https://www.merl.com/publications/TR2021-112}
- }
Shi, L., Liu, D., Thornton, J.E., "Robust Camera Pose Estimation for Image Stitching", IEEE International Conference on Image Processing (ICIP), DOI: 10.1109/ICIP42928.2021.9506680, September 2021.
BibTeX TR2021-113 PDF
- @inproceedings{Shi2021sep,
- author = {Shi, Laixi and Liu, Dehong and Thornton, Jay E.},
- title = {Robust Camera Pose Estimation for Image Stitching},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2021,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/ICIP42928.2021.9506680},
- isbn = {978-1-6654-4115-5},
- url = {https://www.merl.com/publications/TR2021-113}
- }
Hu, W., Pang, J., Liu, X., Tian, D., Lin, C.-W., Vetro, A., "Graph Signal Processing for Geometric Data and Beyond: Theory and Applications", IEEE Transactions on Multimedia, DOI: 10.1109/TMM.2021.3111440, Vol. 24, pp. 3961-3977, September 2021.
BibTeX TR2021-121 PDF
- @article{Hu2021oct,
- author = {Hu, Wei and Pang, Jiahao and Liu, Xianming and Tian, Dong and Lin, Chia-Wen and Vetro, Anthony},
- title = {Graph Signal Processing for Geometric Data and Beyond: Theory and Applications},
- journal = {IEEE Transactions on Multimedia},
- year = 2021,
- volume = 24,
- pages = {3961--3977},
- month = sep,
- doi = {10.1109/TMM.2021.3111440},
- issn = {1941-0077},
- url = {https://www.merl.com/publications/TR2021-121}
- }
Hori, C., Hori, T., Le Roux, J., "Optimizing Latency for Online Video Captioning Using Audio-VisualTransformers", Interspeech, DOI: 10.21437/Interspeech.2021-1975, August 2021, pp. 586–590.
BibTeX TR2021-093 PDF
- @inproceedings{Hori2021aug2,
- author = {Hori, Chiori and Hori, Takaaki and Le Roux, Jonathan},
- title = {Optimizing Latency for Online Video Captioning Using Audio-VisualTransformers},
- booktitle = {Interspeech},
- year = 2021,
- pages = {586–590},
- month = aug,
- publisher = {ISCA},
- doi = {10.21437/Interspeech.2021-1975},
- url = {https://www.merl.com/publications/TR2021-093}
- }
Cherian, A., Wang, J., "Generalized One-Class Learning Using Pairs of Complementary Classifiers", IEEE Transactions on Pattern Analysis and Machine Intelligence, DOI: 10.1109/TPAMI.2021.3092999, June 2021.
BibTeX TR2021-076 PDF Software
- @article{Cherian2021jun,
- author = {Cherian, Anoop and Wang, Jue},
- title = {Generalized One-Class Learning Using Pairs of Complementary Classifiers},
- journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
- year = 2021,
- month = jun,
- doi = {10.1109/TPAMI.2021.3092999},
- url = {https://www.merl.com/publications/TR2021-076}
- }
Hori, C., "Human Perspective Scene Understanding via Multimodal Sensing," Tech. Rep. TR2022-151, Audio-Visual Scene Understanding Tutorial at CVPR 2021, June 2021.
BibTeX TR2022-151 PDF Video
- @techreport{Hori2021jun,
- author = {Hori, Chiori},
- title = {Human Perspective Scene Understanding via Multimodal Sensing},
- institution = {Mitsubishi Electric Research Laboratories},
- year = 2021,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-151}
- }
Shi, L., Liu, D., Umeda, M., Hana, N., "Fusion-Based Image Correlations Framework For Strain Measurement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9414987, June 2021.
BibTeX TR2021-012 PDF Video
- @inproceedings{Shi2021feb,
- author = {Shi, Laixi and Liu, Dehong and Umeda, Masaki and Hana, Norihiko},
- title = {Fusion-Based Image Correlations Framework For Strain Measurement},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9414987},
- issn = {2379-190X},
- isbn = {978-1-7281-7606-2},
- url = {https://www.merl.com/publications/TR2021-012}
- }
Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3078368, May 2021.
BibTeX TR2021-064 PDF
- @article{Kim2021may,
- author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
- title = {Overview of the Eighth Dialog System Technology Challenge: DSTC8},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- month = may,
- doi = {10.1109/TASLP.2021.3078368},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2021-064}
- }
Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
BibTeX TR2021-042 PDF Video
- @article{Hori2021may,
- author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and Le Roux, Jonathan and Sullivan, Alan and Vetro, Anthony},
- title = {マルチモーダルセンシング情報に基づくScene-aware Interaction 技術},
- journal = {Society of Automotive Engineers of Japan},
- year = 2021,
- volume = 75,
- number = 5,
- pages = {66--71},
- month = may,
- url = {https://www.merl.com/publications/TR2021-042}
- }