Publications

140 / 3,604 publications found.


  •  Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2024.
    BibTeX TR2024-028 PDF
    • @inproceedings{Wu2024mar,
    • author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2024-028}
    • }
  •  Kuwabara, A., Osako Yutaro, , Kato, S., Fujihashi, T., Koike-Akino, T., Watanabe, T., "Implicit Neural Representation-based Hybrid Digital-Analog Image Delivery", IEEE International Conference on Computing, Networking and Communications (ICNC), February 2024.
    BibTeX TR2024-007 PDF
    • @inproceedings{Kuwabara2024feb,
    • author = {Kuwabara,Akihiro and Osako Yutaro and Kato, Sorachi and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Implicit Neural Representation-based Hybrid Digital-Analog Image Delivery},
    • booktitle = {IEEE International Conference on Computing, Networking and Communications (ICNC)},
    • year = 2024,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2024-007}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/​GLOBECOM54140.2023.10437920, December 2023.
    BibTeX TR2023-139 PDF
    • @inproceedings{Fujihashi2023dec,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Rateless Deep Graph Joint Source Channel Coding for Holographic-Type Communication},
    • booktitle = {IEEE Global Communications Conference (GLOBECOM)},
    • year = 2023,
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/GLOBECOM54140.2023.10437920},
    • issn = {2576-6813},
    • isbn = {979-8-3503-1090-0},
    • url = {https://www.merl.com/publications/TR2023-139}
    • }
  •  Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks", International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero), November 2023.
    BibTeX TR2023-141 PDF
    • @inproceedings{Wu2023nov,
    • author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks},
    • booktitle = {International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero)},
    • year = 2023,
    • month = nov,
    • url = {https://www.merl.com/publications/TR2023-141}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft Delivery: Survey on A New Paradigm for Wireless and Mobile Multimedia Streaming", ACM Computing Surveys, DOI: 10.1145/​3607139, Vol. 56, No. 2, pp. 1-37, August 2023.
    BibTeX TR2023-102 PDF
    • @article{Fujihashi2023aug,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Soft Delivery: Survey on A New Paradigm for Wireless and Mobile Multimedia Streaming},
    • journal = {ACM Computing Surveys},
    • year = 2023,
    • volume = 56,
    • number = 2,
    • pages = {1--37},
    • month = aug,
    • doi = {10.1145/3607139},
    • url = {https://www.merl.com/publications/TR2023-102}
    • }
  •  Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F., Le Roux, J., Watanabe, S., "BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up," Tech. Rep. TR2023-068, DCASE2023 Challenge, May 2023.
    BibTeX TR2023-068 PDF
    • @techreport{Wu2023may,
    • author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, Francois and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up},
    • institution = {DCASE2023 Challenge},
    • year = 2023,
    • month = may,
    • url = {https://www.merl.com/publications/TR2023-068}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095882, May 2023.
    BibTeX TR2023-031 PDF
    • @inproceedings{Fujihashi2023may,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095882},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-031}
    • }
  •  Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
    BibTeX TR2022-166 PDF
    • @article{Wang2022dec2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2022,
    • volume = 31,
    • pages = {397--410},
    • month = dec,
    • doi = {10.1109/TASLP.2022.3224285},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2022-166}
    • }
  •  Soushi Ueno, , Fujihashi, T., Koike-Akino, T., Watanabe, T., "Point Cloud Soft Multicast for Untethered XR Users", IEEE Transactions on Multimedia, DOI: 10.1109/​TMM.2022.3218172, Vol. 25, pp. 7185-7195, December 2022.
    BibTeX TR2022-164 PDF
    • @article{SoushiUeno;Fujihashi2022dec,
    • author = {Soushi Ueno and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Point Cloud Soft Multicast for Untethered XR Users},
    • journal = {IEEE Transactions on Multimedia},
    • year = 2022,
    • volume = 25,
    • pages = {7185--7195},
    • month = dec,
    • doi = {10.1109/TMM.2022.3218172},
    • issn = {1941-0077},
    • url = {https://www.merl.com/publications/TR2022-164}
    • }
  •  Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
    BibTeX TR2022-021 PDF
    • @inproceedings{Chang2022apr,
    • author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7322--7326},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9747375},
    • url = {https://www.merl.com/publications/TR2022-021}
    • }
  •  Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
    BibTeX TR2022-024 PDF
    • @inproceedings{Moritz2022apr,
    • author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Sequence Transduction with Graph-based Supervision},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7212--7216},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9747788},
    • url = {https://www.merl.com/publications/TR2022-024}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning", arXiv, January 2022.
    BibTeX arXiv
    • @article{Fujihashi2022jan2,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning},
    • journal = {arXiv},
    • year = 2022,
    • month = jan,
    • url = {https://arxiv.org/abs/2201.04557}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization", IEEE International Conference on Consumer Electronics (ICCE), DOI: 10.1109/​ICCE53296.2022.9730509, January 2022.
    BibTeX TR2022-005 PDF
    • @inproceedings{Fujihashi2022jan,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization},
    • booktitle = {IEEE International Conference on Consumer Electronics (ICCE)},
    • year = 2022,
    • month = jan,
    • publisher = {IEEE},
    • doi = {10.1109/ICCE53296.2022.9730509},
    • issn = {2158-4001},
    • isbn = {978-1-6654-4154-4},
    • url = {https://www.merl.com/publications/TR2022-005}
    • }
  •  Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", IEEE International Conference on Communications (ICC), DOI: 10.1109/​ICC42927.2021.9500925, June 2021.
    BibTeX TR2021-066 PDF Presentation
    • @inproceedings{Fujihashi2021jun,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
    • title = {Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks},
    • booktitle = {IEEE International Conference on Communications (ICC)},
    • year = 2021,
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/ICC42927.2021.9500925},
    • issn = {1938-1883},
    • isbn = {978-1-7281-7122-7},
    • url = {https://www.merl.com/publications/TR2021-066}
    • }
  •  Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/​DSLW51110, June 2021, pp. 1-6.
    BibTeX TR2021-073 PDF
    • @inproceedings{Watanabe2021jun,
    • author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
    • title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
    • booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
    • year = 2021,
    • pages = {1--6},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/DSLW51110},
    • isbn = {978-1-6654-2826-2},
    • url = {https://www.merl.com/publications/TR2021-073}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform", IEEE Transactions on Multimedia, DOI: 10.1109/​TMM.2021.3077772, May 2021.
    BibTeX TR2021-043 PDF Presentation
    • @article{Fujihashi2021may,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
    • title = {HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform},
    • journal = {IEEE Transactions on Multimedia},
    • year = 2021,
    • month = may,
    • doi = {10.1109/TMM.2021.3077772},
    • issn = {1941-0077},
    • url = {https://www.merl.com/publications/TR2021-043}
    • }
  •  Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", arXiv, June 2020.
    BibTeX arXiv
    • @article{Fujihashi2020jun,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
    • title = {Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks},
    • journal = {arXiv},
    • year = 2020,
    • month = jun,
    • url = {https://arxiv.org/abs/2006.09835}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Overhead Reduction in Graph-Based Point Cloud Delivery", IEEE International Conference on Communications (ICC), DOI: 10.1109/​ICC40277.2020.9148650, May 2020.
    BibTeX TR2020-061 PDF Video
    • @inproceedings{Fujihashi2020may2,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
    • title = {Overhead Reduction in Graph-Based Point Cloud Delivery},
    • booktitle = {IEEE International Conference on Communications (ICC)},
    • year = 2020,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICC40277.2020.9148650},
    • issn = {1938-1883},
    • isbn = {978-1-7281-5089-5},
    • url = {https://www.merl.com/publications/TR2020-061}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "High-Quality Soft Image Delivery with Deep Image Denoising", IEEE International Conference on Communications (ICC), DOI: 10.1109/​ICC40277.2020.9148611, May 2020.
    BibTeX TR2020-060 PDF Video
    • @inproceedings{Fujihashi2020may,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
    • title = {High-Quality Soft Image Delivery with Deep Image Denoising},
    • booktitle = {IEEE International Conference on Communications (ICC)},
    • year = 2020,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICC40277.2020.9148611},
    • issn = {1938-1883},
    • isbn = {978-1-7281-5089-5},
    • url = {https://www.merl.com/publications/TR2020-060}
    • }
  •  Fujihashi, T., Koike-Akino, T., Orlik, P.V., Watanabe, T., "High-Throughput Visual MIMO Systems for Screen-Camera Communications", IEEE Transactions on Mobile Computing, April 2020.
    BibTeX TR2020-048 PDF
    • @article{Fujihashi2020apr,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Orlik, Philip V. and Watanabe, Takashi},
    • title = {High-Throughput Visual MIMO Systems for Screen-Camera Communications},
    • journal = {IEEE Transactions on Mobile Computing},
    • year = 2020,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2020-048}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video Presentation
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Li, R., Wang, X., Mallidi, H., Watanabe, S., Hori, T., Hermansky, H., "Multi-Stream End-to-End Speech Recognition", IEEE/ACM Transactions on Audio, Speech and Language Processing, DOI: 10.1109/​TASLP.2019.2959721, Vol. 28, pp. 646-655, March 2020.
    BibTeX TR2020-030 PDF
    • @article{Li2020mar,
    • author = {Li, Ruizhi and Wang, Xiaofei and Mallidi, Harish and Watanabe, Shinji and Hori, Takaaki and Hermansky, Hynek},
    • title = {Multi-Stream End-to-End Speech Recognition},
    • journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {646--655},
    • month = mar,
    • doi = {10.1109/TASLP.2019.2959721},
    • url = {https://www.merl.com/publications/TR2020-030}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Enrique Yalta Soplin, N., Yamamoto, R., Wang, X., Watanabe, S., Yoshimura, T., Zhang, W., "A Comparative Study on Transformer Vs RNN in Speech Applications", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU46091.2019.9003750, December 2019, pp. 449-456.
    BibTeX TR2019-158 PDF
    • @inproceedings{Karita2019dec,
    • author = {Karita, Shigeki and Chen, Nanxin and Hayashi, Tomoki and Hori, Takaaki and Inaguma, Hirofumi and Jiang, Ziyan and Someki, Masao and Enrique Yalta Soplin, Nelson and Yamamoto, Ryuichi and Wang, Xiaofei and Watanabe, Shinji and Yoshimura, Takenori and Zhang, Wangyou},
    • title = {A Comparative Study on Transformer Vs RNN in Speech Applications},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {449--456},
    • month = dec,
    • doi = {10.1109/ASRU46091.2019.9003750},
    • url = {https://www.merl.com/publications/TR2019-158}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "DNN-based Simultaneous Screen-to-Camera and Screen-to-Eye Communications", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/​GLOBECOM38437.2019.9013967, December 2019.
    BibTeX TR2019-132 PDF
    • @inproceedings{Fujihashi2019dec,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
    • title = {DNN-based Simultaneous Screen-to-Camera and Screen-to-Eye Communications},
    • booktitle = {IEEE Global Communications Conference (GLOBECOM)},
    • year = 2019,
    • month = dec,
    • publisher = {IEEE},
    • doi = {10.1109/GLOBECOM38437.2019.9013967},
    • issn = {2576-6813},
    • isbn = {978-1-7281-0962-6},
    • url = {https://www.merl.com/publications/TR2019-132}
    • }