Publications

Masuyama, Y., "Single- and Multi-Channel Speech Enhancement and Separation for Far-Field Conversation Recognition," Tech. Rep. TR2025-097, Jelinek Summer Workshop on Speech and Language Technology (JSALT), June 2025.
BibTeX TR2025-097 PDF
- @techreport{Masuyama2025jun,
- author = {{{Masuyama, Yoshiki}}},
- title = {{{Single- and Multi-Channel Speech Enhancement and Separation for Far-Field Conversation Recognition}}},
- institution = {Jelinek Summer Workshop on Speech and Language Technology (JSALT)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-097}
- }
Attiah, K., Wang, P., Mansour, H., Koike-Akino, T., Boufounos, P.T., "Enabling DMG Wi-Fi Sensing in Data Transmission Intervals by Exploiting Beam Training Codebook", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP49660.2025.10889300, April 2025.
BibTeX TR2025-026 PDF
- @inproceedings{Attiah2025mar,
- author = {Attiah, Kareem and Wang, Pu and Mansour, Hassan and Koike-Akino, Toshiaki and Boufounos, Petros T.},
- title = {{Enabling DMG Wi-Fi Sensing in Data Transmission Intervals by Exploiting Beam Training Codebook}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2025,
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/ICASSP49660.2025.10889300},
- issn = {2379-190X},
- isbn = {979-8-3503-6874-1},
- url = {https://www.merl.com/publications/TR2025-026}
- }
Jin, S., Wang, P., Boufounos, P.T., Orlik, P.V., Takahashi, R., Roy, S., "Spatial-Domain Mutual Interference Mitigation for MIMO-FMCW Automotive Radar", IEEE Transactions on Vehicular Technology, DOI: 10.1109/TVT.2024.3467917, September 2024.
BibTeX TR2024-148 PDF
- @article{Jin2024sep,
- author = {Jin, Sian and Wang, Pu and Boufounos, Petros T. and Orlik, Philip V. and Takahashi, Ryuhei and Roy, Sumit},
- title = {{Spatial-Domain Mutual Interference Mitigation for MIMO-FMCW Automotive Radar}},
- journal = {IEEE Transactions on Vehicular Technology},
- year = 2024,
- month = sep,
- doi = {10.1109/TVT.2024.3467917},
- issn = {1939-9359},
- url = {https://www.merl.com/publications/TR2024-148}
- }
Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2024.3350887, Vol. 32, pp. 1185-1197, February 2024.
BibTeX TR2024-006 PDF Software
- @article{Boeddeker2024feb,
- author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and {Le Roux}, Jonathan},
- title = {{TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2024,
- volume = 32,
- pages = {1185--1197},
- month = feb,
- doi = {10.1109/TASLP.2024.3350887},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2024-006}
- }
Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
BibTeX TR2022-166 PDF
- @article{Wang2022dec2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2022,
- volume = 31,
- pages = {397--410},
- month = dec,
- doi = {10.1109/TASLP.2022.3224285},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2022-166}
- }
Pervej, M.F., Guo, J., Kim, K.J., Parsons, K., Orlik, P.V., Di Cairano, S., Menner, M., Berntorp, K., Nagai, Y., Dai, H., "Mobility, Communication and Computation Aware Federated Learning for Internet of Vehicles", IEEE Intelligent Vehicles Symposium (IV), June 2022.
BibTeX TR2022-079 PDF
- @inproceedings{Pervej2022jun,
- author = {Pervej, Md Ferdous and Guo, Jianlin and Kim, Kyeong Jin and Parsons, Kieran and Orlik, Philip V. and {Di Cairano}, Stefano and Menner, Marcel and Berntorp, Karl and Nagai, Yukimasa and Dai, Huaiyu},
- title = {{Mobility, Communication and Computation Aware Federated Learning for Internet of Vehicles}},
- booktitle = {IEEE Intelligent Vehicles Symposium (IV)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-079}
- }
Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632667, October 2021, pp. 56-60.
BibTeX TR2021-127 PDF
- @inproceedings{Wang2021oct4,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Convolutive Prediction for Reverberant Speech Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {56--60},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632667},
- url = {https://www.merl.com/publications/TR2021-127}
- }
Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/DSLW51110, June 2021, pp. 1-6.
BibTeX TR2021-073 PDF
- @inproceedings{Watanabe2021jun,
- author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
- title = {{The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans}},
- booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
- year = 2021,
- pages = {1--6},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/DSLW51110},
- isbn = {978-1-6654-2826-2},
- url = {https://www.merl.com/publications/TR2021-073}
- }
Sravan Kumar, P., Ma, R., "Design Considerations and FPGA Implementation of a Wideband All-Digital Transmit Beamformer with 50% Fractional Bandwidth", IEEE International Microwave Symposium (IMS), DOI: 10.1109/IMS30576.2020.9223888, June 2020, pp. 1073-1076.
BibTeX TR2020-078 PDF
- @inproceedings{SravanKumar2020jun,
- author = {Sravan Kumar, Pulipati and Ma, Rui},
- title = {{Design Considerations and FPGA Implementation of a Wideband All-Digital Transmit Beamformer with 50\% Fractional Bandwidth}},
- booktitle = {IEEE International Microwave Symposium (IMS)},
- year = 2020,
- pages = {1073--1076},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/IMS30576.2020.9223888},
- issn = {2576-7216},
- isbn = {978-1-7281-6815-9},
- url = {https://www.merl.com/publications/TR2020-078}
- }
Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
BibTeX TR2020-043 PDF Video Presentation
- @inproceedings{Chang2020apr,
- author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{End-To-End Multi-Speaker Speech Recognition with Transformer}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {6134--6138},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9054029},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-043}
- }
Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
BibTeX TR2019-157 PDF
- @inproceedings{Chang2019dec,
- author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {237--144},
- month = dec,
- isbn = {978-1-7281-0305-1},
- url = {https://www.merl.com/publications/TR2019-157}
- }
Sangwan, A., Ma, R., Wang, B., Kim, K.J., Parsons, K., Koike-Akino, T., Wang, P., Orlik, P.V., Teo, K.H., Tasuku, K., Kazunari, K., Fukasawa, T., "CDM-based 4-Channel Digital Beamforming Transmitter Using a Single DAC", IEEE International New Circuits and Systems Conference, DOI: 10.1109/NEWCAS44328.2019.8961250, June 2019, pp. 1-4.
BibTeX TR2019-058 PDF
- @inproceedings{Sangwan2019jun,
- author = {Sangwan, Amit and Ma, Rui and Wang, Bingnan and Kim, Kyeong Jin and Parsons, Kieran and Koike-Akino, Toshiaki and Wang, Pu and Orlik, Philip V. and Teo, Koon Hoo and Tasuku, Kuriyama and Kazunari, Kihira and Fukasawa, Toru},
- title = {{CDM-based 4-Channel Digital Beamforming Transmitter Using a Single DAC}},
- booktitle = {IEEE International New Circuits and Systems Conference},
- year = 2019,
- pages = {1--4},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/NEWCAS44328.2019.8961250},
- isbn = {978-1-7281-1031-8},
- url = {https://www.merl.com/publications/TR2019-058}
- }
Koike-Akino, T., Wang, P., Orlik, P.V., "Joint Lattice and Subspace Vector Perturbation with PAPR Reduction for Massive MU-MIMO Systems", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOCOM.2018.8647187, December 2018.
BibTeX TR2018-163 PDF
- @inproceedings{Koike-Akino2018dec,
- author = {Koike-Akino, Toshiaki and Wang, Pu and Orlik, Philip V.},
- title = {{Joint Lattice and Subspace Vector Perturbation with PAPR Reduction for Massive MU-MIMO Systems}},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2018,
- month = dec,
- doi = {10.1109/GLOCOM.2018.8647187},
- url = {https://www.merl.com/publications/TR2018-163}
- }
Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
BibTeX
- @incollection{Xiao2018jul2,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
- title = {{Discriminative beamforming with phase aware neural networks for speech enhancement and recognition}},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 4,
- month = jul,
- publisher = {Springer}
- }
Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
BibTeX TR2018-006 PDF
- @inproceedings{Ochiai2018apr,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
- title = {{Speaker Adaptation for Multichannel End-to-End Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {6707--6711},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462161},
- url = {https://www.merl.com/publications/TR2018-006}
- }
Peng, Z., Kim, K.J., Wang, P., Ma, R., Kihira, K., Fukasawa, T., Li, C., Wang, B., "Code-Division Multiplexing based Hardware Reduction for a Digital Beamforming Transmitter Array", European Conference on Antennas and Propagation (EuCAP), April 2018.
BibTeX TR2018-049 PDF
- @inproceedings{Peng2018apr,
- author = {Peng, Zhengyu and Kim, Kyeong Jin and Wang, Pu and Ma, Rui and Kihira, Kazunari and Fukasawa, Toru and Li, Changzhi and Wang, Bingnan},
- title = {{Code-Division Multiplexing based Hardware Reduction for a Digital Beamforming Transmitter Array}},
- booktitle = {European Conference on Antennas and Propagation (EuCAP)},
- year = 2018,
- month = apr,
- url = {https://www.merl.com/publications/TR2018-049}
- }
Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
BibTeX TR2017-139 PDF
- @inproceedings{Ochiai2017oct,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
- title = {{Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR}},
- booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1274--1288},
- month = oct,
- doi = {10.1109/JSTSP.2017.2764276},
- url = {https://www.merl.com/publications/TR2017-139}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
BibTeX TR2017-192 PDF
- @article{Ochiai2017oct2,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
- title = {{Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming}},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1274--1288},
- month = oct,
- doi = {10.1109/JSTSP.2017.2764276},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-192}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
BibTeX TR2017-107 PDF
- @inproceedings{Ochiai2017aug,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
- title = {{Multichannel End-to-end Speech Recognition}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-107}
- }
Ding, Y., Kim, K.J., Koike-Akino, T., Pajovic, M., Wang, P., Orlik, P.V., "Spatial Scattering Modulation for Uplink Millimeter-Wave Systems", IEEE Communications Letters, DOI: 10.1109/LCOMM.2017.2684126, Vol. 21, No. 7, pp. 1493-1496, May 2017.
BibTeX TR2017-071 PDF
- @article{Ding2017may4,
- author = {Ding, Yacong and Kim, Kyeong Jin and Koike-Akino, Toshiaki and Pajovic, Milutin and Wang, Pu and Orlik, Philip V.},
- title = {{Spatial Scattering Modulation for Uplink Millimeter-Wave Systems}},
- journal = {IEEE Communications Letters},
- year = 2017,
- volume = 21,
- number = 7,
- pages = {1493--1496},
- month = may,
- doi = {10.1109/LCOMM.2017.2684126},
- url = {https://www.merl.com/publications/TR2017-071}
- }
Ding, Y., Kim, K.J., Koike-Akino, T., Pajovic, M., Wang, P., Orlik, P.V., "Millimeter Wave Adaptive Transmission Using Spatial Scattering Modulation", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC.2017.7996404, May 2017.
BibTeX TR2017-045 PDF Presentation
- @inproceedings{Ding2017may,
- author = {Ding, Yacong and Kim, Kyeong Jin and Koike-Akino, Toshiaki and Pajovic, Milutin and Wang, Pu and Orlik, Philip V.},
- title = {{Millimeter Wave Adaptive Transmission Using Spatial Scattering Modulation}},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2017,
- month = may,
- doi = {10.1109/ICC.2017.7996404},
- url = {https://www.merl.com/publications/TR2017-045}
- }
Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-012 PDF
- @inproceedings{Meng2017mar,
- author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
- title = {{Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-012}
- }
Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-011 PDF
- @inproceedings{Watanabe2017mar,
- author = {Watanabe, Shinji and Hori, Takaaki and {Le Roux}, Jonathan and Hershey, John R.},
- title = {{Student- Teacher Network Learning with Enhanced Features}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-011}
- }
Zhang, C., Wang, B., Teo, K.H., "Design of Continuous Beam Steerable and Scalable Unit Module for Wireless Power Transmission Using Injection-locked Oscillator Array", Journal of Progress In Electromagnetics Research (PIER), DOI: 10.2528/PIERC16081805, Vol. 69, pp. 169-179, December 2016.
BibTeX TR2016-168 PDF
- @article{Zhang2016dec,
- author = {Zhang, Ce and Wang, Bingnan and Teo, Koon Hoo},
- title = {{Design of Continuous Beam Steerable and Scalable Unit Module for Wireless Power Transmission Using Injection-locked Oscillator Array}},
- journal = {Journal of Progress In Electromagnetics Research (PIER)},
- year = 2016,
- volume = 69,
- pages = {169--179},
- month = dec,
- doi = {10.2528/PIERC16081805},
- url = {https://www.merl.com/publications/TR2016-168}
- }
Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/APSIPA.2016.7820724, December 2016.
BibTeX TR2016-162 PDF
- @inproceedings{Xiao2016dec,
- author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
- title = {{Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
  },
- year = 2016,
- month = dec,
- doi = {10.1109/APSIPA.2016.7820724},
- url = {https://www.merl.com/publications/TR2016-162}
- }