- Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937253, October 2019, pp. 170-174.
BibTeX TR2019-123 PDF- @inproceedings{Kavalerov2019oct,
- author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and Le Roux, Jonathan and Hershey, John},
- title = {Universal Sound Separation},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {170--174},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937253},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-123}
- }
- Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
BibTeX TR2019-101 PDF- @inproceedings{Seki2019sep,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
- title = {End-to-End Multilingual Multi-Speaker Speech Recognition},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3755--3759},
- month = sep,
- doi = {10.21437/Interspeech.2019-3038},
- url = {https://www.merl.com/publications/TR2019-101}
- }
- Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682587, May 2019.
BibTeX TR2019-008 PDF- @inproceedings{LeRoux2019may2,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {The Phasebook: Building Complex Masks via Discrete Representations for Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682587},
- url = {https://www.merl.com/publications/TR2019-008}
- }
- Le Roux, J., Wisdom, S., Erdogan, H., Hershey, J., "SDR -- Half-Baked or Well Done?", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683855, May 2019.
BibTeX TR2019-013 PDF- @inproceedings{LeRoux2019may,
- author = {Le Roux, Jonathan and Wisdom, Scott and Erdogan, Hakan and Hershey, John},
- title = {SDR -- Half-Baked or Well Done?},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683855},
- url = {https://www.merl.com/publications/TR2019-013}
- }
- Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "Phasebook and Friends: Leveraging discrete representations for source separation", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2019.2904183, Vol. 13, No. 2, pp. 370-382, March 2019.
BibTeX TR2018-199 PDF- @article{LeRoux2019mar,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {Phasebook and Friends: Leveraging discrete representations for source separation},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2019,
- volume = 13,
- number = 2,
- pages = {370--382},
- month = mar,
- doi = {10.1109/JSTSP.2019.2904183},
- url = {https://www.merl.com/publications/TR2018-199}
- }
- Hori, T., Wang, W., Koji, Y., Hori, C., Harsham, B.A., Hershey, J., "Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models", Computer Speech and Language, DOI: 10.1016/j.csl.2018.08.006, Vol. 54, pp. 122-139, December 2018.
BibTeX TR2018-161 PDF- @article{Hori2018dec2,
- author = {Hori, Takaaki and Wang, Wen and Koji, Yusuke and Hori, Chiori and Harsham, Bret A. and Hershey, John},
- title = {Adversarial Training and Decoding Strategies for End-to-end Neural Conversation Models},
- journal = {Computer Speech and Language},
- year = 2018,
- volume = 54,
- pages = {122--139},
- month = dec,
- publisher = {Elsevier},
- doi = {10.1016/j.csl.2018.08.006},
- url = {https://www.merl.com/publications/TR2018-161}
- }
- Wang, Z.-Q., Le Roux, J., Wang, D., Hershey, J., "End-to-End Speech Separation with Unfolded Iterative Phase Reconstruction", Interspeech, September 2018.
BibTeX TR2018-135 PDF- @inproceedings{Wang2018sep,
- author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Wang, DeLiang and Hershey, John},
- title = {End-to-End Speech Separation with Unfolded Iterative Phase Reconstruction},
- booktitle = {Interspeech},
- year = 2018,
- month = sep,
- url = {https://www.merl.com/publications/TR2018-135}
- }
- Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "A Purely End-to-end System for Multi-speaker Speech Recognition", Annual Meeting of the Association for Computational Linguistics (ACL), July 2018, pp. 2620-2630.
BibTeX TR2018-104 PDF Video- @inproceedings{Seki2018jul,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan and Hershey, John},
- title = {A Purely End-to-end System for Multi-speaker Speech Recognition},
- booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)},
- year = 2018,
- pages = {2620--2630},
- month = jul,
- publisher = {Elsevier},
- url = {https://www.merl.com/publications/TR2018-104}
- }
- Erdogan, H., Hershey, J., Watanabe, S., Le Roux, J., "Deep recurrent networks for separation and recognition of single-channel speech in non-stationary background audio" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 7, Springer, July 2018.
BibTeX - @incollection{Erdogan2018jul,
- author = {Erdogan, Hakan and Hershey, John and Watanabe, Shinji and Le Roux, Jonathan},
- title = {Deep recurrent networks for separation and recognition of single-channel speech in non-stationary background audio},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 7,
- month = jul,
- publisher = {Springer},
- isbn = {978-3-319-64680-0}
- }
- Hershey, J., Le Roux, J., Watanabe, S., Wisdom, S., Chen, Z., Isik, Y., "Novel deep architectures in speech processing" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 6, Springer, July 9, 2018.
BibTeX - @incollection{Hershey2018jul,
- author = {Hershey, John and Le Roux, Jonathan and Watanabe, Shinji and Wisdom, Scott and Chen, Zhuo and Isik, Yusuf},
- title = {Novel deep architectures in speech processing},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 6,
- month = jul,
- publisher = {Springer}
- }
- Watanabe, S., Hori, T., Miao, Y., Delcroix, M., Metze, F., Hershey, J., "Toolkits for robust speech processing" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 14, Springer, July 9, 2018.
BibTeX - @incollection{Watanabe2018jul,
- author = {Watanabe, Shinji and Hori, Takaaki and Miao, Yajie and Delcroix, Marc and Metze, Florian and Hershey, John},
- title = {Toolkits for robust speech processing},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 14,
- month = jul,
- publisher = {Springer}
- }
- Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
BibTeX - @incollection{Xiao2018jul2,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
- title = {Discriminative beamforming with phase aware neural networks for speech enhancement and recognition},
- booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
- year = 2018,
- editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
- chapter = 4,
- month = jul,
- publisher = {Springer}
- }
- Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
BibTeX TR2018-006 PDF- @inproceedings{Ochiai2018apr,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
- title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {6707--6711},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462161},
- url = {https://www.merl.com/publications/TR2018-006}
- }
- Seki, H., Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462180, April 2018, pp. 4919-4923.
BibTeX TR2018-002 PDF Video- @inproceedings{Seki2018apr,
- author = {Seki, Hiroshi and Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
- title = {An End-to-End Language-Tracking Speech Recognizer for Mixed-Language Speech},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {4919--4923},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462180},
- url = {https://www.merl.com/publications/TR2018-002}
- }
- Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461893, April 2018, pp. 4819-4823.
BibTeX TR2018-001 PDF Video- @inproceedings{Settle2018apr,
- author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
- title = {End-to-End Multi-Speaker Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {4819--4823},
- month = apr,
- doi = {10.1109/ICASSP.2018.8461893},
- url = {https://www.merl.com/publications/TR2018-001}
- }
- Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461639, April 2018, pp. 1-5.
BibTeX TR2018-007 PDF- @inproceedings{Wang2018apr2,
- author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
- title = {Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {1--5},
- month = apr,
- doi = {10.1109/ICASSP.2018.8461639},
- url = {https://www.merl.com/publications/TR2018-007}
- }
- Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Alternative Objective Functions for Deep Clustering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462507, April 2018, pp. 686-690.
BibTeX TR2018-005 PDF- @inproceedings{Wang2018apr,
- author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
- title = {Alternative Objective Functions for Deep Clustering},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {686--690},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462507},
- url = {https://www.merl.com/publications/TR2018-005}
- }
- Hori, C., Hori, T., Marks, T.K., Hershey, J.R., "Early and Late Integration of Audio Features for Automatic Video Description", IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), DOI: 10.1109/ASRU.2017.8268968, December 2017.
BibTeX TR2017-183 PDF- @inproceedings{Hori2017dec2,
- author = {Hori, Chiori and Hori, Takaaki and Marks, Tim K. and Hershey, John R.},
- title = {Early and Late Integration of Audio Features for Automatic Video Description},
- booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
- year = 2017,
- month = dec,
- doi = {10.1109/ASRU.2017.8268968},
- url = {https://www.merl.com/publications/TR2017-183}
- }
- Hori, T., Watanabe, S., Hershey, J.R., "Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268948, December 2017.
BibTeX TR2017-181 PDF- @inproceedings{Hori2017dec,
- author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
- title = {Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2017,
- month = dec,
- doi = {10.1109/ASRU.2017.8268948},
- url = {https://www.merl.com/publications/TR2017-181}
- }
- Watanabe, S., Hori, T., Hershey, J.R., "Language Independent End-to-End Architecture For Joint Language and Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2017.8268945, December 2017.
BibTeX TR2017-182 PDF Video- @inproceedings{Watanabe2017dec,
- author = {Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
- title = {Language Independent End-to-End Architecture For Joint Language and Speech Recognition},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2017,
- month = dec,
- doi = {10.1109/ASRU.2017.8268945},
- url = {https://www.merl.com/publications/TR2017-182}
- }
- Wang, W., Koji, Y., Harsham, B.A., Hori, T., Hershey, J.R., "Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models", Dialog System Technology Challenges, December 2017.
BibTeX TR2017-180 PDF- @inproceedings{Wang2017dec,
- author = {Wang, Wen and Koji, Yusuke and Harsham, Bret A. and Hori, Takaaki and Hershey, John R.},
- title = {Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models},
- booktitle = {Dialog System Technology Challenges},
- year = 2017,
- month = dec,
- url = {https://www.merl.com/publications/TR2017-180}
- }
- Hori, C., Hori, T., Lee, T.-Y., Zhang, Z., Harsham, B.A., Sumi, K., Marks, T.K., Hershey, J.R., "Attention-Based Multimodal Fusion for Video Description", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/ICCV.2017.450, October 2017.
BibTeX TR2017-156 PDF- @inproceedings{Hori2017oct,
- author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Zhang, Ziming and Harsham, Bret A. and Sumi, Kazuhiko and Marks, Tim K. and Hershey, John R.},
- title = {Attention-Based Multimodal Fusion for Video Description},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2017,
- month = oct,
- doi = {10.1109/ICCV.2017.450},
- url = {https://www.merl.com/publications/TR2017-156}
- }
- Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
BibTeX TR2017-192 PDF- @article{Ochiai2017oct2,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
- title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1274--1288},
- month = oct,
- doi = {10.1109/JSTSP.2017.2764276},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-192}
- }
- Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
BibTeX TR2017-190 PDF Video- @article{Watanabe2017oct,
- author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
- title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1240--1253},
- month = oct,
- doi = {10.1109/JSTSP.2017.2763455},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-190}
- }
- Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
BibTeX TR2017-107 PDF- @inproceedings{Ochiai2017aug,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
- title = {Multichannel End-to-end Speech Recognition},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-107}
- }