- Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, December 2022.
BibTeX TR2022-166 PDF- @article{Wang2022dec2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
- title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2022,
- month = dec,
- url = {https://www.merl.com/publications/TR2022-166}
- }
- Soushi Ueno, , Fujihashi, T., Koike-Akino, T., Watanabe, T., "Point Cloud Soft Multicast for Untethered XR Users", IEEE Transactions on Multimedia, December 2022.
BibTeX TR2022-164 PDF- @article{SoushiUeno;Fujihashi2022dec,
- author = {Soushi Ueno and Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {Point Cloud Soft Multicast for Untethered XR Users},
- journal = {IEEE Transactions on Multimedia},
- year = 2022,
- month = dec,
- url = {https://www.merl.com/publications/TR2022-164}
- }
- Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
BibTeX TR2022-021 PDF- @inproceedings{Chang2022apr,
- author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7322--7326},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747375},
- url = {https://www.merl.com/publications/TR2022-021}
- }
- Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
BibTeX TR2022-024 PDF- @inproceedings{Moritz2022apr,
- author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Sequence Transduction with Graph-based Supervision},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7212--7216},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP43922.2022.9747788},
- url = {https://www.merl.com/publications/TR2022-024}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., "Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning", arXiv, January 2022.
BibTeX arXiv- @article{Fujihashi2022jan2,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {Federated AirNet: Hybrid Digital-Analog Neural Network Transmission for Federated Learning},
- journal = {arXiv},
- year = 2022,
- month = jan,
- url = {https://arxiv.org/abs/2201.04557}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., "Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization", IEEE International Conference on Consumer Electronics (ICCE), DOI: 10.1109/ICCE53296.2022.9730509, January 2022.
BibTeX TR2022-005 PDF- @inproceedings{Fujihashi2022jan,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {Overhead Reduction for Graph-Based Point Cloud Delivery Using Non-Uniform Quantization},
- booktitle = {IEEE International Conference on Consumer Electronics (ICCE)},
- year = 2022,
- month = jan,
- publisher = {IEEE},
- doi = {10.1109/ICCE53296.2022.9730509},
- issn = {2158-4001},
- isbn = {978-1-6654-4154-4},
- url = {https://www.merl.com/publications/TR2022-005}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft Delivery: Survey on a New Paradigm for Wireless and Mobile Multimedia Streaming", arXiv, November 2021.
BibTeX arXiv- @article{Fujihashi2021nov,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
- title = {Soft Delivery: Survey on a New Paradigm for Wireless and Mobile Multimedia Streaming},
- journal = {arXiv},
- year = 2021,
- month = nov,
- url = {https://arxiv.org/abs/2111.08189}
- }
- Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC42927.2021.9500925, June 2021.
BibTeX TR2021-066 PDF Presentation- @inproceedings{Fujihashi2021jun,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
- title = {Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2021,
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/ICC42927.2021.9500925},
- issn = {1938-1883},
- isbn = {978-1-7281-7122-7},
- url = {https://www.merl.com/publications/TR2021-066}
- }
- Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/DSLW51110, June 2021, pp. 1-6.
BibTeX TR2021-073 PDF- @inproceedings{Watanabe2021jun,
- author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
- title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
- booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
- year = 2021,
- pages = {1--6},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/DSLW51110},
- isbn = {978-1-6654-2826-2},
- url = {https://www.merl.com/publications/TR2021-073}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform", IEEE Transactions on Multimedia, DOI: 10.1109/TMM.2021.3077772, May 2021.
BibTeX TR2021-043 PDF Presentation- @article{Fujihashi2021may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {HoloCast+: Hybrid Digital-Analog Transmission for Graceful Point Cloud Delivery with Graph Fourier Transform},
- journal = {IEEE Transactions on Multimedia},
- year = 2021,
- month = may,
- doi = {10.1109/TMM.2021.3077772},
- issn = {1941-0077},
- url = {https://www.merl.com/publications/TR2021-043}
- }
- Fujihashi, T., Koike-Akino, T., Chen, S., Watanabe, T., "Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks", arXiv, June 2020.
BibTeX arXiv- @article{Fujihashi2020jun,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Chen, Siheng and Watanabe, Takashi},
- title = {Wireless 3D Point Cloud Delivery Using Deep Graph Neural Networks},
- journal = {arXiv},
- year = 2020,
- month = jun,
- url = {https://arxiv.org/abs/2006.09835}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Overhead Reduction in Graph-Based Point Cloud Delivery", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC40277.2020.9148650, May 2020.
BibTeX TR2020-061 PDF Video- @inproceedings{Fujihashi2020may2,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {Overhead Reduction in Graph-Based Point Cloud Delivery},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2020,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC40277.2020.9148650},
- issn = {1938-1883},
- isbn = {978-1-7281-5089-5},
- url = {https://www.merl.com/publications/TR2020-061}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "High-Quality Soft Image Delivery with Deep Image Denoising", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC40277.2020.9148611, May 2020.
BibTeX TR2020-060 PDF Video- @inproceedings{Fujihashi2020may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {High-Quality Soft Image Delivery with Deep Image Denoising},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2020,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICC40277.2020.9148611},
- issn = {1938-1883},
- isbn = {978-1-7281-5089-5},
- url = {https://www.merl.com/publications/TR2020-060}
- }
- Fujihashi, T., Koike-Akino, T., Orlik, P.V., Watanabe, T., "High-Throughput Visual MIMO Systems for Screen-Camera Communications", IEEE Transactions on Mobile Computing, April 2020.
BibTeX TR2020-048 PDF- @article{Fujihashi2020apr,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Orlik, Philip V. and Watanabe, Takashi},
- title = {High-Throughput Visual MIMO Systems for Screen-Camera Communications},
- journal = {IEEE Transactions on Mobile Computing},
- year = 2020,
- month = apr,
- url = {https://www.merl.com/publications/TR2020-048}
- }
- Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
BibTeX TR2020-043 PDF Video Presentation- @inproceedings{Chang2020apr,
- author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
- title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {6134--6138},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9054029},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-043}
- }
- Li, R., Wang, X., Mallidi, H., Watanabe, S., Hori, T., Hermansky, H., "Multi-Stream End-to-End Speech Recognition", IEEE/ACM Transactions on Audio, Speech and Language Processing, DOI: 10.1109/TASLP.2019.2959721, Vol. 28, pp. 646-655, March 2020.
BibTeX TR2020-030 PDF- @article{Li2020mar,
- author = {Li, Ruizhi and Wang, Xiaofei and Mallidi, Harish and Watanabe, Shinji and Hori, Takaaki and Hermansky, Hynek},
- title = {Multi-Stream End-to-End Speech Recognition},
- journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
- year = 2020,
- volume = 28,
- pages = {646--655},
- month = mar,
- doi = {10.1109/TASLP.2019.2959721},
- url = {https://www.merl.com/publications/TR2020-030}
- }
- Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
BibTeX TR2019-157 PDF- @inproceedings{Chang2019dec,
- author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
- title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {237--144},
- month = dec,
- isbn = {978-1-7281-0305-1},
- url = {https://www.merl.com/publications/TR2019-157}
- }
- Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Enrique Yalta Soplin, N., Yamamoto, R., Wang, X., Watanabe, S., Yoshimura, T., Zhang, W., "A Comparative Study on Transformer Vs RNN in Speech Applications", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU46091.2019.9003750, December 2019, pp. 449-456.
BibTeX TR2019-158 PDF- @inproceedings{Karita2019dec,
- author = {Karita, Shigeki and Chen, Nanxin and Hayashi, Tomoki and Hori, Takaaki and Inaguma, Hirofumi and Jiang, Ziyan and Someki, Masao and Enrique Yalta Soplin, Nelson and Yamamoto, Ryuichi and Wang, Xiaofei and Watanabe, Shinji and Yoshimura, Takenori and Zhang, Wangyou},
- title = {A Comparative Study on Transformer Vs RNN in Speech Applications},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {449--456},
- month = dec,
- doi = {10.1109/ASRU46091.2019.9003750},
- url = {https://www.merl.com/publications/TR2019-158}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "DNN-based Simultaneous Screen-to-Camera and Screen-to-Eye Communications", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOBECOM38437.2019.9013967, December 2019.
BibTeX TR2019-132 PDF- @inproceedings{Fujihashi2019dec,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {DNN-based Simultaneous Screen-to-Camera and Screen-to-Eye Communications},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2019,
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GLOBECOM38437.2019.9013967},
- issn = {2576-6813},
- isbn = {978-1-7281-0962-6},
- url = {https://www.merl.com/publications/TR2019-132}
- }
- Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "DNN-based Overhead Reduction for High-Quality Soft Delivery", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOBECOM38437.2019.9014124, December 2019.
BibTeX TR2019-133 PDF- @inproceedings{Fujihashi2019dec2,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {DNN-based Overhead Reduction for High-Quality Soft Delivery},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2019,
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/GLOBECOM38437.2019.9014124},
- issn = {2576-6813},
- isbn = {978-1-7281-0962-6},
- url = {https://www.merl.com/publications/TR2019-133}
- }
- Baskar, M.K., Watanabe, S., Astudillo, R., Hori, T., Burget, L., Cernocky, J.H., "Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text", Interspeech, DOI: 10.21437/Interspeech.2019-3167, September 2019, pp. 3790-3794.
BibTeX TR2019-100 PDF- @inproceedings{Baskar2019sep,
- author = {Baskar, Murali Karthick and Watanabe, Shinji and Astudillo, Ramon and Hori, Takaaki and Burget, Lukas and Cernocky, Jan, Honza},
- title = {Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3790--3794},
- month = sep,
- doi = {10.21437/Interspeech.2019-3167},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2019-100}
- }
- Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/Interspeech.2019-2355//, September 2019, pp. 2019-2355.
BibTeX TR2019-103 PDF- @inproceedings{Karafiat2019sep,
- author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
- title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
- booktitle = {Interspeech},
- year = 2019,
- pages = {2019--2355},
- month = sep,
- doi = {10.21437/Interspeech.2019-2355//},
- url = {https://www.merl.com/publications/TR2019-103}
- }
- Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
BibTeX TR2019-101 PDF- @inproceedings{Seki2019sep,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
- title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3755--3759},
- month = sep,
- doi = {10.21437/Interspeech.2019-3038},
- url = {https://www.merl.com/publications/TR2019-101}
- }
- Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
BibTeX TR2019-102 PDF- @inproceedings{Seki2019sep2,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and Le Roux, Jonathan},
- title = {Vectorized Beam Search for CTC-Attention-based Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3825--3829},
- month = sep,
- doi = {10.21437/Interspeech.2019-2860},
- url = {https://www.merl.com/publications/TR2019-102}
- }
- Yalta, N., Watanabe, S., Hori, T., Nakadai, K., Ogata, T., "CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments", European Signal Processing Conference (EUSIPCO), DOI: 10.23919/EUSIPCO.2019.8902524, September 2019, pp. 1-5.
BibTeX TR2019-094 PDF- @inproceedings{Yalta2019sep,
- author = {Yalta, Nelson and Watanabe, Shinji and Hori, Takaaki and Nakadai, Kazuhiro and Ogata, Tetsuya},
- title = {CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments},
- booktitle = {European Signal Processing Conference (EUSIPCO)},
- year = 2019,
- pages = {1--5},
- month = sep,
- doi = {10.23919/EUSIPCO.2019.8902524},
- url = {https://www.merl.com/publications/TR2019-094}
- }