Publications

43 / 3,604 publications found.


  •  Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2024.3350887, Vol. 32, pp. 1185-1197, February 2024.
    BibTeX TR2024-006 PDF
    • @article{Boeddeker2024feb,
    • author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and Le Roux, Jonathan},
    • title = {TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2024,
    • volume = 32,
    • pages = {1185--1197},
    • month = feb,
    • doi = {10.1109/TASLP.2024.3350887},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2024-006}
    • }
  •  Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
    BibTeX TR2022-166 PDF
    • @article{Wang2022dec2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2022,
    • volume = 31,
    • pages = {397--410},
    • month = dec,
    • doi = {10.1109/TASLP.2022.3224285},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2022-166}
    • }
  •  Pervej, M.F., Guo, J., Kim, K.J., Parsons, K., Orlik, P.V., Di Cairano, S., Menner, M., Berntorp, K., Nagai, Y., Dai, H., "Mobility, Communication and Computation Aware Federated Learning for Internet of Vehicles", IEEE Intelligent Vehicles Symposium (IV), June 2022.
    BibTeX TR2022-079 PDF
    • @inproceedings{Pervej2022jun,
    • author = {Pervej, Md Ferdous and Guo, Jianlin and Kim, Kyeong Jin and Parsons, Kieran and Orlik, Philip V. and Di Cairano, Stefano and Menner, Marcel and Berntorp, Karl and Nagai, Yukimasa and Dai, Huaiyu},
    • title = {Mobility, Communication and Computation Aware Federated Learning for Internet of Vehicles},
    • booktitle = {IEEE Intelligent Vehicles Symposium (IV)},
    • year = 2022,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2022-079}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA52581.2021.9632667, October 2021, pp. 56-60.
    BibTeX TR2021-127 PDF
    • @inproceedings{Wang2021oct4,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Convolutive Prediction for Reverberant Speech Separation},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2021,
    • pages = {56--60},
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA52581.2021.9632667},
    • url = {https://www.merl.com/publications/TR2021-127}
    • }
  •  Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/​DSLW51110, June 2021, pp. 1-6.
    BibTeX TR2021-073 PDF
    • @inproceedings{Watanabe2021jun,
    • author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
    • title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
    • booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
    • year = 2021,
    • pages = {1--6},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/DSLW51110},
    • isbn = {978-1-6654-2826-2},
    • url = {https://www.merl.com/publications/TR2021-073}
    • }
  •  Sravan Kumar, P., Ma, R., "Design Considerations and FPGA Implementation of a Wideband All-Digital Transmit Beamformer with 50% Fractional Bandwidth", IEEE International Microwave Symposium (IMS), DOI: 10.1109/​IMS30576.2020.9223888, June 2020, pp. 1073-1076.
    BibTeX TR2020-078 PDF
    • @inproceedings{SravanKumar2020jun,
    • author = {Sravan Kumar, Pulipati and Ma, Rui},
    • title = {Design Considerations and FPGA Implementation of a Wideband All-Digital Transmit Beamformer with 50% Fractional Bandwidth},
    • booktitle = {IEEE International Microwave Symposium (IMS)},
    • year = 2020,
    • pages = {1073--1076},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/IMS30576.2020.9223888},
    • issn = {2576-7216},
    • isbn = {978-1-7281-6815-9},
    • url = {https://www.merl.com/publications/TR2020-078}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video Presentation
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Sangwan, A., Ma, R., Wang, B., Kim, K.J., Parsons, K., Koike-Akino, T., Wang, P., Orlik, P.V., Teo, K.H., Tasuku, K., Kazunari, K., Fukasawa, T., "CDM-based 4-Channel Digital Beamforming Transmitter Using a Single DAC", IEEE International New Circuits and Systems Conference, DOI: 10.1109/​NEWCAS44328.2019.8961250, June 2019, pp. 1-4.
    BibTeX TR2019-058 PDF
    • @inproceedings{Sangwan2019jun,
    • author = {Sangwan, Amit and Ma, Rui and Wang, Bingnan and Kim, Kyeong Jin and Parsons, Kieran and Koike-Akino, Toshiaki and Wang, Pu and Orlik, Philip V. and Teo, Koon Hoo and Tasuku, Kuriyama and Kazunari, Kihira and Fukasawa, Toru},
    • title = {CDM-based 4-Channel Digital Beamforming Transmitter Using a Single DAC},
    • booktitle = {IEEE International New Circuits and Systems Conference},
    • year = 2019,
    • pages = {1--4},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/NEWCAS44328.2019.8961250},
    • isbn = {978-1-7281-1031-8},
    • url = {https://www.merl.com/publications/TR2019-058}
    • }
  •  Koike-Akino, T., Wang, P., Orlik, P.V., "Joint Lattice and Subspace Vector Perturbation with PAPR Reduction for Massive MU-MIMO Systems", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/​GLOCOM.2018.8647187, December 2018.
    BibTeX TR2018-163 PDF
    • @inproceedings{Koike-Akino2018dec,
    • author = {Koike-Akino, Toshiaki and Wang, Pu and Orlik, Philip V.},
    • title = {Joint Lattice and Subspace Vector Perturbation with PAPR Reduction for Massive MU-MIMO Systems},
    • booktitle = {IEEE Global Communications Conference (GLOBECOM)},
    • year = 2018,
    • month = dec,
    • doi = {10.1109/GLOCOM.2018.8647187},
    • url = {https://www.merl.com/publications/TR2018-163}
    • }
  •  Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
    BibTeX
    • @incollection{Xiao2018jul2,
    • author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
    • title = {Discriminative beamforming with phase aware neural networks for speech enhancement and recognition},
    • booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
    • year = 2018,
    • editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
    • chapter = 4,
    • month = jul,
    • publisher = {Springer}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Peng, Z., Kim, K.J., Wang, P., Ma, R., Kihira, K., Fukasawa, T., Li, C., Wang, B., "Code-Division Multiplexing based Hardware Reduction for a Digital Beamforming Transmitter Array", European Conference on Antennas and Propagation (EuCAP), April 2018.
    BibTeX TR2018-049 PDF
    • @inproceedings{Peng2018apr,
    • author = {Peng, Zhengyu and Kim, Kyeong Jin and Wang, Pu and Ma, Rui and Kihira, Kazunari and Fukasawa, Toru and Li, Changzhi and Wang, Bingnan},
    • title = {Code-Division Multiplexing based Hardware Reduction for a Digital Beamforming Transmitter Array},
    • booktitle = {European Conference on Antennas and Propagation (EuCAP)},
    • year = 2018,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2018-049}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX TR2017-139 PDF
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Ding, Y., Kim, K.J., Koike-Akino, T., Pajovic, M., Wang, P., Orlik, P.V., "Spatial Scattering Modulation for Uplink Millimeter-Wave Systems", IEEE Communications Letters, DOI: 10.1109/​LCOMM.2017.2684126, Vol. 21, No. 7, pp. 1493-1496, May 2017.
    BibTeX TR2017-071 PDF
    • @article{Ding2017may4,
    • author = {Ding, Yacong and Kim, Kyeong Jin and Koike-Akino, Toshiaki and Pajovic, Milutin and Wang, Pu and Orlik, Philip V.},
    • title = {Spatial Scattering Modulation for Uplink Millimeter-Wave Systems},
    • journal = {IEEE Communications Letters},
    • year = 2017,
    • volume = 21,
    • number = 7,
    • pages = {1493--1496},
    • month = may,
    • doi = {10.1109/LCOMM.2017.2684126},
    • url = {https://www.merl.com/publications/TR2017-071}
    • }
  •  Ding, Y., Kim, K.J., Koike-Akino, T., Pajovic, M., Wang, P., Orlik, P.V., "Millimeter Wave Adaptive Transmission Using Spatial Scattering Modulation", IEEE International Conference on Communications (ICC), DOI: 10.1109/​ICC.2017.7996404, May 2017.
    BibTeX TR2017-045 PDF Presentation
    • @inproceedings{Ding2017may,
    • author = {Ding, Yacong and Kim, Kyeong Jin and Koike-Akino, Toshiaki and Pajovic, Milutin and Wang, Pu and Orlik, Philip V.},
    • title = {Millimeter Wave Adaptive Transmission Using Spatial Scattering Modulation},
    • booktitle = {IEEE International Conference on Communications (ICC)},
    • year = 2017,
    • month = may,
    • doi = {10.1109/ICC.2017.7996404},
    • url = {https://www.merl.com/publications/TR2017-045}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-012 PDF
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-011 PDF
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Zhang, C., Wang, B., Teo, K.H., "Design of Continuous Beam Steerable and Scalable Unit Module for Wireless Power Transmission Using Injection-locked Oscillator Array", Journal of Progress In Electromagnetics Research (PIER), DOI: 10.2528/​PIERC16081805, Vol. 69, pp. 169-179, December 2016.
    BibTeX TR2016-168 PDF
    • @article{Zhang2016dec,
    • author = {Zhang, Ce and Wang, Bingnan and Teo, Koon Hoo},
    • title = {Design of Continuous Beam Steerable and Scalable Unit Module for Wireless Power Transmission Using Injection-locked Oscillator Array},
    • journal = {Journal of Progress In Electromagnetics Research (PIER)},
    • year = 2016,
    • volume = 69,
    • pages = {169--179},
    • month = dec,
    • doi = {10.2528/PIERC16081805},
    • url = {https://www.merl.com/publications/TR2016-168}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/​APSIPA.2016.7820724, December 2016.
    BibTeX TR2016-162 PDF
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
      },
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/​j.csl.2016.11.005, December 2016.
    BibTeX TR2016-172 PDF
    • @article{Vincent2016dec,
    • author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
    • title = {An analysis of environment, microphone and data simulation mismatches in robust speech recognition},
    • journal = {Computer Speech \& Language},
    • year = 2016,
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2016.11.005},
    • url = {https://www.merl.com/publications/TR2016-172}
    • }
  •  Zhang, C., Wang, B., Ishimaru, A., Kuga, Y., Optimal Array Beamforming for Microwave Power Transmission in Complex Environment, DOI: 10.1007/​978-3-319-46810-5_3, November 2016.
    BibTeX TR2017-032 PDF
    • @book{Zhang2016nov,
    • author = {Zhang, Ce and Wang, Bingnan and Ishimaru, Akira and Kuga, Yasuo},
    • title = {Optimal Array Beamforming for Microwave Power Transmission in Complex Environment},
    • year = 2016,
    • month = nov,
    • doi = {10.1007/978-3-319-46810-5_3},
    • isbn = {978-3-319-46810-5},
    • url = {https://www.merl.com/publications/TR2017-032}
    • }
  •  Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/​Interspeech.2016-552, September 2016, pp. 1981-1985.
    BibTeX TR2016-072 PDF
    • @inproceedings{Erdogan2016sep,
    • author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and Le Roux, Jonathan},
    • title = {Improved MVDR beamforming using single-channel mask prediction networks},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {1981--1985},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-552},
    • url = {https://www.merl.com/publications/TR2016-072}
    • }