Publications

596 / 3,599 publications found.


  •  Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Alternative Objective Functions for Deep Clustering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462507, April 2018, pp. 686-690.
    BibTeX TR2018-005 PDF
    • @inproceedings{Wang2018apr,
    • author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
    • title = {Alternative Objective Functions for Deep Clustering},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {686--690},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462507},
    • url = {https://www.merl.com/publications/TR2018-005}
    • }
  •  Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8461639, April 2018, pp. 1-5.
    BibTeX TR2018-007 PDF
    • @inproceedings{Wang2018apr2,
    • author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
    • title = {Multi-Channel Deep Clustering: Discriminative Spectral and Spatial Embeddings for Speaker-Independent Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {1--5},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461639},
    • url = {https://www.merl.com/publications/TR2018-007}
    • }
  •  Zhang, Z., Kag, A., Sullivan, A., Saligrama, V., "Equilibrated Recurrent Neural Network: Neuronal Time-Delayed Self-Feedback Improves Accuracy and Stability", arXiv, March 2019.
    BibTeX arXiv
    • @article{Zhang2018mar2,
    • author = {Zhang, Ziming and Kag, Anil and Sullivan, Alan and Saligrama, Venkatesh},
    • title = {Equilibrated Recurrent Neural Network: Neuronal Time-Delayed Self-Feedback Improves Accuracy and Stability},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://arxiv.org/abs/1903.00755}
    • }
  •  Zhang, Z., Xu, W., Sullivan, A., "Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning", arXiv, March 2018.
    BibTeX arXiv
    • @article{Zhang2018mar,
    • author = {Zhang, Ziming and Xu, Wenju and Sullivan, Alan},
    • title = {Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning},
    • journal = {arXiv},
    • year = 2018,
    • month = mar,
    • url = {https://arxiv.org/abs/1903.00760}
    • }
  •  Santa Cruz, R., Fernando, B., Cherian, A., Gould, S., "Neural Algebra of Classifiers", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/​WACV.2018.00085, March 2018, pp. 729-737.
    BibTeX TR2018-033 PDF
    • @inproceedings{Santacruz2018mar,
    • author = {Santa Cruz, Rodrigo and Fernando, Basura and Cherian, Anoop and Gould, Stephen},
    • title = {Neural Algebra of Classifiers},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2018,
    • pages = {729--737},
    • month = mar,
    • doi = {10.1109/WACV.2018.00085},
    • url = {https://www.merl.com/publications/TR2018-033}
    • }
  •  Quivira, F., Koike-Akino, T., Wang, Y., Erdogmus, D., "Translating sEMG Signals to Continuous Hand Poses using Recurrent Neural Networks", IEEE Conference on Biomedical and Health Informatics (BHI), DOI: 10.1109/​BHI.2018.8333395, January 2018.
    BibTeX TR2018-014 PDF Presentation
    • @inproceedings{Quivira2018jan,
    • author = {Quivira, Fernando and Koike-Akino, Toshiaki and Wang, Ye and Erdogmus, Deniz},
    • title = {Translating sEMG Signals to Continuous Hand Poses using Recurrent Neural Networks},
    • booktitle = {IEEE Conference on Biomedical and Health Informatics (BHI)},
    • year = 2018,
    • month = jan,
    • doi = {10.1109/BHI.2018.8333395},
    • url = {https://www.merl.com/publications/TR2018-014}
    • }
  •  Hori, C., Hori, T., "End-to-end Conversation Modeling Track in DSTC6", Dialog System Technology Challenges, December 2017.
    BibTeX TR2017-188 PDF
    • @inproceedings{Hori2017dec3,
    • author = {Hori, Chiori and Hori, Takaaki},
    • title = {End-to-end Conversation Modeling Track in DSTC6},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-188}
    • }
  •  Hori, C., Hori, T., Marks, T.K., Hershey, J.R., "Early and Late Integration of Audio Features for Automatic Video Description", IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), DOI: 10.1109/​ASRU.2017.8268968, December 2017.
    BibTeX TR2017-183 PDF
    • @inproceedings{Hori2017dec2,
    • author = {Hori, Chiori and Hori, Takaaki and Marks, Tim K. and Hershey, John R.},
    • title = {Early and Late Integration of Audio Features for Automatic Video Description},
    • booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268968},
    • url = {https://www.merl.com/publications/TR2017-183}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU.2017.8268948, December 2017.
    BibTeX TR2017-181 PDF
    • @inproceedings{Hori2017dec,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Multi-level Language Modeling and Decoding for Open Vocabulary End-to-End Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268948},
    • url = {https://www.merl.com/publications/TR2017-181}
    • }
  •  Watanabe, S., Hori, T., Hershey, J.R., "Language Independent End-to-End Architecture For Joint Language and Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU.2017.8268945, December 2017.
    BibTeX TR2017-182 PDF Video
    • @inproceedings{Watanabe2017dec,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Language Independent End-to-End Architecture For Joint Language and Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2017,
    • month = dec,
    • doi = {10.1109/ASRU.2017.8268945},
    • url = {https://www.merl.com/publications/TR2017-182}
    • }
  •  Wang, W., Koji, Y., Harsham, B.A., Hori, T., Hershey, J.R., "Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models", Dialog System Technology Challenges, December 2017.
    BibTeX TR2017-180 PDF
    • @inproceedings{Wang2017dec,
    • author = {Wang, Wen and Koji, Yusuke and Harsham, Bret A. and Hori, Takaaki and Hershey, John R.},
    • title = {Sequence Adversarial Training and Minimum Bayes Risk Decoding for End-to-end Neural Conversation Models},
    • booktitle = {Dialog System Technology Challenges},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-180}
    • }
  •  Ziming, Z., Brand, M., "Convergent Block Coordinate Descent for Training Tikhonov Regularized Deep Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2017.
    BibTeX TR2017-140 PDF
    • @inproceedings{Ziming2017dec,
    • author = {Ziming, Zhang and Brand, Matthew},
    • title = {Convergent Block Coordinate Descent for Training Tikhonov Regularized Deep Neural Networks},
    • booktitle = {Advances in Neural Information Processing Systems (NIPS)},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-140}
    • }
  •  Lin, C., Marks, T.K., Pajovic, M., Watanabe, S., Tung, C., "Model parameter learning using Kullback-Leibler divergence", Physica A, DOI: 10.1016/​j.physa.2017.09.018, Vol. 491, No. 1, pp. 549-559, November 2017.
    BibTeX TR2017-184 PDF
    • @article{Lin2017nov,
    • author = {Lin, Chungwei and Marks, Tim K. and Pajovic, Milutin and Watanabe, Shinji and Tung, Chihkuan},
    • title = {Model parameter learning using Kullback-Leibler divergence},
    • journal = {Physica A},
    • year = 2017,
    • volume = 491,
    • number = 1,
    • pages = {549--559},
    • month = nov,
    • publisher = {Elsevier},
    • doi = {10.1016/j.physa.2017.09.018},
    • url = {https://www.merl.com/publications/TR2017-184}
    • }
  •  Jha, D.K., Yerazunis, W.S., Nikovski, D.N., Farahmand, A.-M., "Learning to Regulate Rolling Ball Motion", IEEE Symposium on Computational Intelligence in Engineering Solutions, DOI: 10.1109/​SSCI.2017.8285376, November 2017.
    BibTeX TR2017-176 PDF
    • @inproceedings{Jha2017nov,
    • author = {Jha, Devesh K. and Yerazunis, William S. and Nikovski, Daniel N. and Farahmand, Amir-massoud},
    • title = {Learning to Regulate Rolling Ball Motion},
    • booktitle = {IEEE Symposium on Computational Intelligence in Engineering Solutions},
    • year = 2017,
    • month = nov,
    • doi = {10.1109/SSCI.2017.8285376},
    • url = {https://www.merl.com/publications/TR2017-176}
    • }
  •  Hori, C., Hori, T., Lee, T.-Y., Zhang, Z., Harsham, B.A., Sumi, K., Marks, T.K., Hershey, J.R., "Attention-Based Multimodal Fusion for Video Description", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV.2017.450, October 2017.
    BibTeX TR2017-156 PDF
    • @inproceedings{Hori2017oct,
    • author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Zhang, Ziming and Harsham, Bret A. and Sumi, Kazuhiko and Marks, Tim K. and Hershey, John R.},
    • title = {Attention-Based Multimodal Fusion for Video Description},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/ICCV.2017.450},
    • url = {https://www.merl.com/publications/TR2017-156}
    • }
  •  Jones, M.J., Kobori, H., "Improving face verification and person re-identification accuracy using hyperplane similarity", IEEE Workshop on Analysis and Modeling of Faces and Gestures (at ICCV), DOI: 10.1109/​ICCVW.2017.183, October 2017.
    BibTeX TR2017-155 PDF
    • @inproceedings{Jones2017oct,
    • author = {Jones, Michael J. and Kobori, Hiroko},
    • title = {Improving face verification and person re-identification accuracy using hyperplane similarity},
    • booktitle = {IEEE Workshop on Analysis and Modeling of Faces and Gestures (at ICCV)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/ICCVW.2017.183},
    • url = {https://www.merl.com/publications/TR2017-155}
    • }
  •  Sahinoglu, G.O., Pajovic, M., Sahinoglu, Z., Wang, Y., Orlik, P.V., Wada, T., "Battery State of Charge Estimation Based on Regular/Recurrent Gaussian Process Regression", IEEE Transactions on Industrial Electronics, DOI: 10.1109/​TIE.2017.2764869, Vol. 65, No. 5, pp. 4311-4321, October 2017.
    BibTeX TR2017-124 PDF
    • @article{Ozcan2017oct,
    • author = {Sahinoglu, Gozde Ozcan and Pajovic, Milutin and Sahinoglu, Zafer and Wang, Yebin and Orlik, Philip V. and Wada, Toshihiro},
    • title = {Battery State of Charge Estimation Based on Regular/Recurrent Gaussian Process Regression},
    • journal = {IEEE Transactions on Industrial Electronics},
    • year = 2017,
    • volume = 65,
    • number = 5,
    • pages = {4311--4321},
    • month = oct,
    • doi = {10.1109/TIE.2017.2764869},
    • url = {https://www.merl.com/publications/TR2017-124}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX TR2017-139 PDF
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Shinozaki, T., Watanabe, S., Mochihashi, D., Neubig, G., "Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text", Interspeech, August 2017.
    BibTeX TR2017-133 PDF
    • @inproceedings{Shinozaki2017aug,
    • author = {Shinozaki, Takahiro and Watanabe, Shinji and Mochihashi, Daichi and Neubig, Graham},
    • title = {Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-133}
    • }
  •  Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
    BibTeX TR2017-134 PDF
    • @inproceedings{Tachioka2017aug,
    • author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-134}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
    BibTeX TR2017-150 PDF
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }