Publications

59 / 3,604 publications found.


  •  Yataka, R., Wang, P., Boufounos, P.T., Takahashi, R., "Radar Perception with Scalable Connective Temporal Relations for Autonomous Driving", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446449, March 2024, pp. 13266-13270.
    BibTeX TR2024-023 PDF
    • @inproceedings{Yataka2024mar,
    • author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros T. and Takahashi, Ryuhei},
    • title = {Radar Perception with Scalable Connective Temporal Relations for Autonomous Driving},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13266--13270},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10446449},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-023}
    • }
  •  Baoueb, T., Liu, H., Fontaine, M., Le Roux, J., Richard, G., "SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2024.
    BibTeX TR2024-013 PDF
    • @inproceedings{Baoueb2024mar,
    • author = {Baoueb, Teysir and Liu, Haocheng and Fontaine, Mathieu and Le Roux, Jonathan and Richard, Gaël},
    • title = {SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2024-013}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
    • title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13296--13300},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447600},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }
  •  Yen, H., Germain, F., Wichern, G., Le Roux, J., "Cold Diffusion for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096064, May 2023, pp. 1-5.
    BibTeX TR2023-020 PDF
    • @inproceedings{Yen2023may,
    • author = {Yen, Hao and Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Cold Diffusion for Speech Enhancement},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096064},
    • url = {https://www.merl.com/publications/TR2023-020}
    • }
  •  Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
    BibTeX TR2022-166 PDF
    • @article{Wang2022dec2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2022,
    • volume = 31,
    • pages = {397--410},
    • month = dec,
    • doi = {10.1109/TASLP.2022.3224285},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2022-166}
    • }
  •  Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746275, April 2022, pp. 7672-7676.
    BibTeX TR2022-026 PDF
    • @inproceedings{Higuchi2022apr,
    • author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
    • title = {Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7672--7676},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746275},
    • url = {https://www.merl.com/publications/TR2022-026}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement", arXiv, October 2021.
    BibTeX arXiv
    • @article{Wang2021oct,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement},
    • journal = {arXiv},
    • year = 2021,
    • month = oct,
    • url = {https://arxiv.org/abs/2110.00570}
    • }
  •  Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/​DSLW51110, June 2021, pp. 1-6.
    BibTeX TR2021-073 PDF
    • @inproceedings{Watanabe2021jun,
    • author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
    • title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
    • booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
    • year = 2021,
    • pages = {1--6},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/DSLW51110},
    • isbn = {978-1-6654-2826-2},
    • url = {https://www.merl.com/publications/TR2021-073}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA.2019.8937253, October 2019, pp. 170-174.
    BibTeX TR2019-123 PDF
    • @inproceedings{Kavalerov2019oct,
    • author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and Le Roux, Jonathan and Hershey, John},
    • title = {Universal Sound Separation},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2019,
    • pages = {170--174},
    • month = oct,
    • doi = {10.1109/WASPAA.2019.8937253},
    • issn = {1947-1629},
    • isbn = {978-1-7281-1123-0},
    • url = {https://www.merl.com/publications/TR2019-123}
    • }
  •  Kadu, A., Mansour, H., Boufounos, P.T., Liu, D., "Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2019.8682393, May 2019.
    BibTeX TR2019-012 PDF Video
    • @inproceedings{Kadu2019may,
    • author = {Kadu, Ajinkya and Mansour, Hassan and Boufounos, Petros T. and Liu, Dehong},
    • title = {Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682393},
    • url = {https://www.merl.com/publications/TR2019-012}
    • }
  •  Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2019.8682587, May 2019.
    BibTeX TR2019-008 PDF
    • @inproceedings{LeRoux2019may2,
    • author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
    • title = {The Phasebook: Building Complex Masks via Discrete Representations for Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8682587},
    • url = {https://www.merl.com/publications/TR2019-008}
    • }
  •  Le Roux, J., Wisdom, S., Erdogan, H., Hershey, J., "SDR -- Half-Baked or Well Done?", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2019.8683855, May 2019.
    BibTeX TR2019-013 PDF
    • @inproceedings{LeRoux2019may,
    • author = {Le Roux, Jonathan and Wisdom, Scott and Erdogan, Hakan and Hershey, John},
    • title = {SDR -- Half-Baked or Well Done?},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2019,
    • month = may,
    • doi = {10.1109/ICASSP.2019.8683855},
    • url = {https://www.merl.com/publications/TR2019-013}
    • }
  •  Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "Phasebook and Friends: Leveraging discrete representations for source separation", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2019.2904183, Vol. 13, No. 2, pp. 370-382, March 2019.
    BibTeX TR2018-199 PDF
    • @article{LeRoux2019mar,
    • author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
    • title = {Phasebook and Friends: Leveraging discrete representations for source separation},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2019,
    • volume = 13,
    • number = 2,
    • pages = {370--382},
    • month = mar,
    • doi = {10.1109/JSTSP.2019.2904183},
    • url = {https://www.merl.com/publications/TR2018-199}
    • }
  •  Wichern, G., Le Roux, J., "Phase Reconstruction with Learned Time-Frequency Representations for Single-Channel Speech Separation", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/​IWAENC.2018.8521243, September 2018.
    BibTeX TR2018-146 PDF
    • @inproceedings{Wichern2018sep,
    • author = {Wichern, Gordon and Le Roux, Jonathan},
    • title = {Phase Reconstruction with Learned Time-Frequency Representations for Single-Channel Speech Separation},
    • booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
    • year = 2018,
    • month = sep,
    • doi = {10.1109/IWAENC.2018.8521243},
    • url = {https://www.merl.com/publications/TR2018-146}
    • }
  •  Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
    BibTeX
    • @incollection{Xiao2018jul2,
    • author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
    • title = {Discriminative beamforming with phase aware neural networks for speech enhancement and recognition},
    • booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
    • year = 2018,
    • editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
    • chapter = 4,
    • month = jul,
    • publisher = {Springer}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX TR2017-139 PDF
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
    BibTeX TR2017-134 PDF
    • @inproceedings{Tachioka2017aug,
    • author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-134}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Chen, S., Tian, D., Feng, C., Vetro, A., Kovacevic, J., "Contour-Enhanced Resampling of 3D Point Clouds Via Graphs", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-017 PDF
    • @inproceedings{Chen2017mar,
    • author = {Chen, Siheng and Tian, Dong and Feng, Chen and Vetro, Anthony and Kovacevic, Jelena},
    • title = {Contour-Enhanced Resampling of 3D Point Clouds Via Graphs},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-017}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-011 PDF
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/​j.csl.2016.11.005, December 2016.
    BibTeX TR2016-172 PDF
    • @article{Vincent2016dec,
    • author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
    • title = {An analysis of environment, microphone and data simulation mismatches in robust speech recognition},
    • journal = {Computer Speech \& Language},
    • year = 2016,
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2016.11.005},
    • url = {https://www.merl.com/publications/TR2016-172}
    • }