Publications

Cornell, S., Boeddeker, C., Park, T., Huang, H., Raj, D., Wiesner, M., Masuyama, Y., Chang, X., Wang, Z.-Q., Squartini, S., Garcia, P., Watanabe, S., "Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges", Computer Speech & Language, DOI: 10.1016/j.csl.2025.101901, Vol. 97, pp. 101901, December 2025.
BibTeX TR2026-008 PDF
- @article{Cornell2025dec,
- author = {Cornell, Samuele and Boeddeker, Christoph and Park, Taejin and Huang, He and Raj, Desh and Wiesner, Matthew and Masuyama, Yoshiki and Chang, Xuankai and Wang, Zhong-Qiu and Squartini, Stefano and Garcia, Paola and Watanabe, Shinji},
- title = {{Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges}},
- journal = {Computer Speech \& Language},
- year = 2025,
- volume = 97,
- pages = 101901,
- month = dec,
- doi = {10.1016/j.csl.2025.101901},
- url = {https://www.merl.com/publications/TR2026-008}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Rateless Deep Joint Source Channel Coding for 3D Point Cloud", IEEE Access, DOI: 10.1109/ACCESS.2025.3546514, Vol. 13, pp. 39585-39599, June 2025.
BibTeX TR2025-069 PDF
- @article{Fujihashi2025jun,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Rateless Deep Joint Source Channel Coding for 3D Point Cloud}},
- journal = {IEEE Access},
- year = 2025,
- volume = 13,
- pages = {39585--39599},
- month = jun,
- doi = {10.1109/ACCESS.2025.3546514},
- issn = {2169-3536},
- url = {https://www.merl.com/publications/TR2025-069}
- }
Masuyama, Y., Chang, X., Zhang, W., Cornell, S., Wang, Z.-Q., Ono, N., Qian, Y., Watanabe, S., "An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation", Computer Speech & Language, DOI: 10.1016/j.csl.2025.101813, Vol. 95, pp. 101813, May 2025.
BibTeX TR2025-054 PDF
- @article{Masuyama2025may,
- author = {Masuyama, Yoshiki and Chang, Xuankai and Zhang, Wangyou and Cornell, Samuele and Wang, Zhong-Qiu and Ono, Nobutaka and Qian, Yanmin and Watanabe, Shinji},
- title = {{An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation}},
- journal = {Computer Speech \& Language},
- year = 2025,
- volume = 95,
- pages = 101813,
- month = may,
- doi = {10.1016/j.csl.2025.101813},
- issn = {0885-2308},
- url = {https://www.merl.com/publications/TR2025-054}
- }
Tian, J., Shi, J., Chen, W., Arora, S., Masuyama, Y., Takashi, M., Wu, Y., Peng, J., Bharadwaj, S., Zhao, Y., Cornell, S., Peng, Y., Yue, X., Yang, C.-H.H., Neubig, G., Watanabe, S., "ESPnet-SpeechLM: An Open Speech Language Model Toolkit", NAACL-HLT (the system demonstration track), Dziri, N. and Ren, S. X. and Diao, S., Eds., March 2025, pp. 116-124.
BibTeX TR2025-038 PDF
- @inproceedings{Tian2025mar,
- author = {Tian, Jinchuan and Shi, Jiatong and Chen, William and Arora, Siddhant and Masuyama, Yoshiki and Takashi, Maekaku and Wu, Yihan and Peng, Junyi and Bharadwaj, Shikhar and Zhao, Yiwen and Cornell, Samuele and Peng, Yifan and Yue, Xiang and Yang, Chao-Han H. and Neubig, Graham and Watanabe, Shinji},
- title = {{ESPnet-SpeechLM: An Open Speech Language Model Toolkit}},
- booktitle = {NAACL-HLT (the system demonstration track)},
- year = 2025,
- editor = {Dziri, N. and Ren, S. X. and Diao, S.},
- pages = {116--124},
- month = mar,
- publisher = {Association for Computational Linguistics},
- url = {https://www.merl.com/publications/TR2025-038}
- }
Kirihara, H., Ibuki, S., Fujihashi, T., Koike-Akino, T., Watanabe, T., "Point Cloud Geometry Compression using Parameterized Graph Fourier Transform", ACM SIGCOMM, DOI: 10.1145/3672196.367340, August 2024, pp. 52-57.
BibTeX TR2024-106 PDF
- @inproceedings{Kirihara2024aug,
- author = {Kirihara, Hinata and Ibuki, Shoichi and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Point Cloud Geometry Compression using Parameterized Graph Fourier Transform}},
- booktitle = {ACM SIGCOMM},
- year = 2024,
- pages = {52--57},
- month = aug,
- publisher = {ACM},
- doi = {10.1145/3672196.367340},
- url = {https://www.merl.com/publications/TR2024-106}
- }
Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10447215, March 2024, pp. 316-320.
BibTeX TR2024-028 PDF
- @inproceedings{Wu2024mar,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {316--320},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10447215},
- url = {https://www.merl.com/publications/TR2024-028}
- }
Kuwabara, A., Osako Yutaro, , Kato, S., Fujihashi, T., Koike-Akino, T., Watanabe, T., "Implicit Neural Representation-based Hybrid Digital-Analog Image Delivery", IEEE International Conference on Computing, Networking and Communications (ICNC), DOI: 10.1109/ICNC59896.2024.10556282, February 2024.
BibTeX TR2024-007 PDF
- @inproceedings{Kuwabara2024feb,
- author = {Kuwabara,Akihiro and Osako Yutaro and Kato, Sorachi and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Implicit Neural Representation-based Hybrid Digital-Analog Image Delivery}},
- booktitle = {IEEE International Conference on Computing, Networking and Communications (ICNC)},
- year = 2024,
- month = feb,
- publisher = {IEEE},
- doi = {10.1109/ICNC59896.2024.10556282},
- issn = {2473-7585},
- isbn = {979-8-3503-7099-7},
- url = {https://www.merl.com/publications/TR2024-007}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOBECOM54140.2023.10437920, December 2023.
BibTeX TR2023-139 PDF
- @inproceedings{Fujihashi2023dec,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication}},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2023,
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GLOBECOM54140.2023.10437920},
- issn = {2576-6813},
- isbn = {979-8-3503-1090-0},
- url = {https://www.merl.com/publications/TR2023-139}
- }
Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks", International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero), November 2023.
BibTeX TR2023-141 PDF
- @inproceedings{Wu2023nov,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks}},
- booktitle = {International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero)},
- year = 2023,
- month = nov,
- url = {https://www.merl.com/publications/TR2023-141}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft Delivery: Survey on A New Paradigm for Wireless and Mobile Multimedia Streaming", ACM Computing Surveys, DOI: 10.1145/3607139, Vol. 56, No. 2, pp. 1-37, August 2023.
BibTeX TR2023-102 PDF
- @article{Fujihashi2023aug,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Soft Delivery: Survey on A New Paradigm for Wireless and Mobile Multimedia Streaming}},
- journal = {ACM Computing Surveys},
- year = 2023,
- volume = 56,
- number = 2,
- pages = {1--37},
- month = aug,
- doi = {10.1145/3607139},
- url = {https://www.merl.com/publications/TR2023-102}
- }
Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F., Le Roux, J., Watanabe, S., "BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up," Tech. Rep. TR2023-068, DCASE2023 Challenge, May 2023.
BibTeX TR2023-068 PDF
- @techreport{Wu2023may,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, Francois and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up}},
- institution = {DCASE2023 Challenge},
- year = 2023,
- month = may,
- url = {https://www.merl.com/publications/TR2023-068}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49357.2023.10095882, May 2023.
BibTeX TR2023-031 PDF
- @inproceedings{Fujihashi2023may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2023,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49357.2023.10095882},
- isbn = {978-1-7281-6327-7},
- url = {https://www.merl.com/publications/TR2023-031}
- }
Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
BibTeX TR2022-166 PDF
- @article{Wang2022dec2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2022,
- volume = 31,
- pages = {397--410},
- month = dec,
- doi = {10.1109/TASLP.2022.3224285},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2022-166}
- }
Soushi Ueno, , Fujihashi, T., Koike-Akino, T., Watanabe, T., "Point Cloud Soft Multicast for Untethered XR Users", IEEE Transactions on Multimedia, DOI: 10.1109/TMM.2022.3218172, Vol. 25, pp. 7185-7195, December 2022.
BibTeX TR2022-164 PDF
- @article{SoushiUeno;Fujihashi2022dec,
- author = {Soushi Ueno and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Point Cloud Soft Multicast for Untethered XR Users}},
- journal = {IEEE Transactions on Multimedia},
- year = 2022,
- volume = 25,
- pages = {7185--7195},
- month = dec,
- doi = {10.1109/TMM.2022.3218172},
- issn = {1941-0077},
- url = {https://www.merl.com/publications/TR2022-164}
- }
Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
BibTeX TR2022-021 PDF
- @inproceedings{Chang2022apr,
- author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7322--7326},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747375},
- url = {https://www.merl.com/publications/TR2022-021}
- }
Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
BibTeX TR2022-024 PDF
- @inproceedings{Moritz2022apr,
- author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Sequence Transduction with Graph-based Supervision}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7212--7216},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747788},
- url = {https://www.merl.com/publications/TR2022-024}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning", arXiv, January 2022.
BibTeX arXiv
- @article{Fujihashi2022jan2,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning}},
- journal = {arXiv},
- year = 2022,
- month = jan,
- url = {https://arxiv.org/abs/2201.04557}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., "Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization", IEEE International Conference on Consumer Electronics (ICCE), DOI: 10.1109/ICCE53296.2022.9730509, January 2022.
BibTeX TR2022-005 PDF
- @inproceedings{Fujihashi2022jan,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {{Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization}},
- booktitle = {IEEE International Conference on Consumer Electronics (ICCE)},
- year = 2022,
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/ICCE53296.2022.9730509},
- issn = {2158-4001},
- isbn = {978-1-6654-4154-4},
- url = {https://www.merl.com/publications/TR2022-005}
- }
Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC42927.2021.9500925, June 2021.
BibTeX TR2021-066 PDF Presentation
- @inproceedings{Fujihashi2021jun,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
- title = {{Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks}},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2021,
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/ICC42927.2021.9500925},
- issn = {1938-1883},
- isbn = {978-1-7281-7122-7},
- url = {https://www.merl.com/publications/TR2021-066}
- }
Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/DSLW51110, June 2021, pp. 1-6.
BibTeX TR2021-073 PDF
- @inproceedings{Watanabe2021jun,
- author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
- title = {{The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans}},
- booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
- year = 2021,
- pages = {1--6},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/DSLW51110},
- isbn = {978-1-6654-2826-2},
- url = {https://www.merl.com/publications/TR2021-073}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform", IEEE Transactions on Multimedia, DOI: 10.1109/TMM.2021.3077772, May 2021.
BibTeX TR2021-043 PDF Presentation
- @article{Fujihashi2021may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {{HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform}},
- journal = {IEEE Transactions on Multimedia},
- year = 2021,
- month = may,
- doi = {10.1109/TMM.2021.3077772},
- issn = {1941-0077},
- url = {https://www.merl.com/publications/TR2021-043}
- }
Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", arXiv, June 2020.
BibTeX arXiv
- @article{Fujihashi2020jun,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
- title = {{Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks}},
- journal = {arXiv},
- year = 2020,
- month = jun,
- url = {https://arxiv.org/abs/2006.09835}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Overhead Reduction in Graph-Based Point Cloud Delivery", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC40277.2020.9148650, May 2020.
BibTeX TR2020-061 PDF Video
- @inproceedings{Fujihashi2020may2,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {{Overhead Reduction in Graph-Based Point Cloud Delivery}},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2020,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC40277.2020.9148650},
- issn = {1938-1883},
- isbn = {978-1-7281-5089-5},
- url = {https://www.merl.com/publications/TR2020-061}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "High-Quality Soft Image Delivery with Deep Image Denoising", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC40277.2020.9148611, May 2020.
BibTeX TR2020-060 PDF Video
- @inproceedings{Fujihashi2020may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {{High-Quality Soft Image Delivery with Deep Image Denoising}},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2020,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC40277.2020.9148611},
- issn = {1938-1883},
- isbn = {978-1-7281-5089-5},
- url = {https://www.merl.com/publications/TR2020-060}
- }
Fujihashi, T., Koike-Akino, T., Orlik, P.V., Watanabe, T., "High-Throughput Visual MIMO Systems for Screen-Camera Communications", IEEE Transactions on Mobile Computing, April 2020.
BibTeX TR2020-048 PDF
- @article{Fujihashi2020apr,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Orlik, Philip V. and Watanabe, Takashi},
- title = {{High-Throughput Visual MIMO Systems for Screen-Camera Communications}},
- journal = {IEEE Transactions on Mobile Computing},
- year = 2020,
- month = apr,
- url = {https://www.merl.com/publications/TR2020-048}
- }