NEWS Jonathan Le Roux gives invited talk at CMU's Language Technology Institute Colloquium

Date released: February 27, 2023

NEWS Jonathan Le Roux gives invited talk at CMU's Language Technology Institute Colloquium
Date:

December 9, 2022
Where:

Pittsburg, PA
Description:

MERL Senior Principal Research Scientist and Speech and Audio Senior Team Leader, Jonathan Le Roux, was invited by Carnegie Mellon University's Language Technology Institute (LTI) to give an invited talk as part of the LTI Colloquium Series. The LTI Colloquium is a prestigious series of talks given by experts from across the country related to different areas of language technologies. Jonathan's talk, entitled "Towards general and flexible audio source separation", presented an overview of techniques developed at MERL towards the goal of robustly and flexibly decomposing and analyzing an acoustic scene, describing in particular the Speech and Audio Team's efforts to extend MERL's early speech separation and enhancement methods to more challenging environments, and to more general and less supervised scenarios.

Jonathan Le Roux gives invited talk at CMU's Language Technology Institute Colloquium

MERL Contact:
- Jonathan
  Le Roux
Research Areas:

Artificial Intelligence, Machine Learning, Speech & Audio
- Related Publications
  Tzinis, E., Wichern, G., Subramanian, A.S., Smaragdis, P., Le Roux, J., "Heterogeneous Target Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2022-10717, September 2022, pp. 1796-1800.
  BibTeX TR2022-115 PDF Video Presentation
  @inproceedings{Tzinis2022sep,
  author = {Tzinis, Efthymios and Wichern, Gordon and Subramanian, Aswin Shanmugam and Smaragdis, Paris and {Le Roux}, Jonathan},
  title = {{Heterogeneous Target Speech Separation}},
  booktitle = {Interspeech},
  year = 2022,
  pages = {1796--1800},
  month = sep,
  doi = {10.21437/Interspeech.2022-10717},
  url = {https://www.merl.com/publications/TR2022-115}
  }
  Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9746005, April 2022, pp. 526-530.
  BibTeX TR2022-022 PDF Video Data Software
  @inproceedings{Petermann2022apr,
  author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
  title = {{The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks}},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
  year = 2022,
  pages = {526--530},
  month = apr,
  doi = {10.1109/ICASSP43922.2022.9746005},
  url = {https://www.merl.com/publications/TR2022-022}
  }
  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
  BibTeX TR2021-144 PDF
  @article{Wang2021dec,
  author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
  title = {{Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation}},
  journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year = 2021,
  volume = 29,
  pages = {3476--3490},
  month = dec,
  doi = {10.1109/TASLP.2021.3129363},
  url = {https://www.merl.com/publications/TR2021-144}
  }
  Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
  BibTeX TR2021-137 PDF
  @article{Wang2021nov2,
  author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
  title = {{On The Compensation Between Magnitude and Phase in Speech Separation}},
  journal = {IEEE Signal Processing Letters},
  year = 2021,
  volume = 28,
  pages = {2018--2022},
  month = nov,
  doi = {10.1109/LSP.2021.3116502},
  url = {https://www.merl.com/publications/TR2021-137}
  }
  Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413358, June 2021, pp. 46-50.
  BibTeX TR2021-069 PDF
  @inproceedings{Hung2021jun,
  author = {Hung, Yun-Ning and Wichern, Gordon and {Le Roux}, Jonathan},
  title = {{Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision}},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
  year = 2021,
  pages = {46--50},
  month = jun,
  doi = {10.1109/ICASSP39728.2021.9413358},
  issn = {2379-190X},
  isbn = {978-1-7281-7605-5},
  url = {https://www.merl.com/publications/TR2021-069}
  }
  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/Interspeech.2020-2757, October 2020, pp. 3112-3116.
  BibTeX TR2020-138 PDF Presentation
  @inproceedings{Moritz2020oct,
  author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and {Le Roux}, Jonathan},
  title = {{All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection}},
  booktitle = {Interspeech},
  year = 2020,
  pages = {3112--3116},
  month = oct,
  doi = {10.21437/Interspeech.2020-2757},
  issn = {1990-9772},
  url = {https://www.merl.com/publications/TR2020-138}
  }
  Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
  BibTeX TR2020-136 PDF Software
  @inproceedings{Manilow2020oct,
  author = {Manilow, Ethan and Wichern, Gordon and {Le Roux}, Jonathan},
  title = {{Hierarchical Musical Instrument Separation}},
  booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
  year = 2020,
  pages = {376--383},
  month = oct,
  isbn = {978-0-9813537-0-8},
  url = {https://www.merl.com/publications/TR2020-136}
  }
  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
  BibTeX TR2020-126 PDF
  @article{Pishdadian2020sep,
  author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
  title = {{Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision}},
  journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year = 2020,
  volume = 28,
  pages = {2386--2399},
  month = sep,
  doi = {10.1109/TASLP.2020.3013105},
  url = {https://www.merl.com/publications/TR2020-126}
  }
  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
  BibTeX TR2020-111 PDF
  @inproceedings{Seetharaman2020jul,
  author = {Seetharaman, Prem and Wichern, Gordon and {Le Roux}, Jonathan and Pardo, Bryan},
  title = {{Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles}},
  booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
  year = 2020,
  month = jul,
  url = {https://www.merl.com/publications/TR2020-111}
  }
  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
  BibTeX TR2020-043 PDF Video Presentation
  @inproceedings{Chang2020apr,
  author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
  title = {{End-To-End Multi-Speaker Speech Recognition with Transformer}},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
  year = 2020,
  pages = {6134--6138},
  month = apr,
  publisher = {IEEE},
  doi = {10.1109/ICASSP40776.2020.9054029},
  issn = {2379-190X},
  isbn = {978-1-5090-6631-5},
  url = {https://www.merl.com/publications/TR2020-043}
  }
  Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053327, April 2020, pp. 696-700.
  BibTeX TR2020-042 PDF Video Presentation
  @inproceedings{Maciejewski2020apr,
  author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and {Le Roux}, Jonathan},
  title = {{WHAMR!: Noisy and Reverberant Single-Channel Speech Separation}},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
  year = 2020,
  pages = {696--700},
  month = apr,
  publisher = {IEEE},
  doi = {10.1109/ICASSP40776.2020.9053327},
  issn = {2379-190X},
  isbn = {978-1-5090-6631-5},
  url = {https://www.merl.com/publications/TR2020-042}
  }
  Wichern, G., McQuinn, E., Antognini, J., Flynn, M., Zhu, R., Crow, D., Manilow, E., Le Roux, J., "WHAM!: Extending Speech Separation to Noisy Environments", Interspeech, DOI: 10.21437/Interspeech.2019-2821, September 2019, pp. 1368-1372.
  BibTeX TR2019-099 PDF
  @inproceedings{Wichern2019sep,
  author = {Wichern, Gordon and McQuinn, Emmett and Antognini, Joe and Flynn, Michael and Zhu, Richard and Crow, Dwight and Manilow, Ethan and {Le Roux}, Jonathan},
  title = {{WHAM!: Extending Speech Separation to Noisy Environments}},
  booktitle = {Interspeech},
  year = 2019,
  pages = {1368--1372},
  month = sep,
  doi = {10.21437/Interspeech.2019-2821},
  url = {https://www.merl.com/publications/TR2019-099}
  }

Date:

Where:

Description:

MERL Contact:

JonathanLe Roux

Research Areas:

Jonathan
Le Roux