@book{CTT100930615, author = {Sheng Li}, title = {Bridging Eurasia: Multilingual Speech Recognition for Silkroad}, publisher = {}, year = 2023, } @book{CTT100930616, author = {Sheng Li}, title = {Voices of the Himalayas: Investigation of Speech Recognition Technology for the Tibetan Language}, publisher = {}, year = 2023, } @book{CTT100930617, author = {李勝}, title = {Phantom in the Opera: The Vulnerabilities of Speech-based Artificial Intelligence Systems}, publisher = {}, year = 2022, } @book{CTT100930446, author = {Xugang Lu and Sheng Li and Masakiyo Fujimoto}, title = {Automatic speech recognition: Speech-to-Speech Translation}, publisher = {}, year = 2020, } @article{CTT100933510, author = {Kai Wang and Lili Yin and Sheng Li and Madina Mansurova and Hao Huang.}, title = {Neural TTS-based Dynamic Data Augmentation for Improved Speech Separation.}, journal = {IEEE Trans. Audio, Speech \& Language Process. (TASLP)}, year = 2025, } @article{CTT100933511, author = {Chin Yuen Kwok and He Xin Liu and Jia Qi Yip and Sheng Li and Eng Siong Chng}, title = {A Two-Stage LoRA Strategy for Expanding Language Capabilities in Multilingual ASR Models.}, journal = {IEEE Trans. Audio, Speech \& Language Process. (TASLP)}, year = 2025, } @article{CTT100930621, author = {Sheng Li}, title = {Robust Voice Activity Detection Using an Auditory-Inspired Masked Modulation Encoder Based Convolutional Attention Network.}, journal = {Speech Communication (SPEECH COMMUN)}, year = 2024, } @article{CTT100930619, author = {Sheng Li and Jiyi Li and Yang Cao}, title = {Phantom in the Opera: Adversarial Music Attack for Robot Dialogue System.}, journal = {Frontiers in Computer Science, section Human-Media Interaction}, year = 2024, } @article{CTT100930618, author = {Sheng Li and Jiyi Li and Chenhui Chu}, title = {Voices of the Himalayas: Benchmarking Speech Recognition Systems for the Tibetan Language.}, journal = {}, year = 2024, } @article{CTT100930620, author = {Zhengdong Yang and Shuichiro Shimizu and Chenhui Chu and Sheng Li and Sadao Kurohashi}, title = {End-to-end Japanese-English Speech-to-text Translation with Spoken-to-Written Style Conversion.}, journal = {Journal of Natural Language Processing}, year = 2024, } @article{CTT100931479, author = {Shen Qiao and Cuicui Zhang and Xuefeng Zhang and Kai Zhang and Hao Shi and Sheng Li and Hao Wei}, title = {Tendency-and-attention-informed deep learning for ENSO forecasts}, journal = {Climate Dynamics}, year = 2023, } @article{CTT100930622, author = {Yuqin Lin and Longbiao Wang and Jianwu Dang and Sheng Li and Chenchen Ding}, title = {Disordered Speech Recognition Considering Low Resources and Abnormal Articulation.}, journal = {Speech Communication (SPEECH COMMUN)}, year = 2023, } @article{CTT100930624, author = {Kak Soky and Sheng Li and Chenhui Chu and Tatsuya Kawahara}, title = {Finetuning Pretrained Model with Embedding of Domain and Language Information for ASR of Very Low-Resource Settings.}, journal = {International Journal of Asian Language Processing}, year = 2023, } @article{CTT100930626, author = {Kak Soky and Masato Mimura and Tatsuya Kawahara and Chenhui Chu and Sheng Li and Chenchen Ding and Sethserey Sam}, title = {TriECCC: Trilingual Corpus of the Extraordinary Chambers in the Courts of Cambodia for Speech Recognition and Translation Studies.}, journal = {International Journal of Asian Language Processing}, year = 2022, } @article{CTT100930627, author = {Cunhang Fan and Hongmei Zhang and Jiangyan Yi and Zhao Lv and Jianhua Tao and Taihao Li and Guanxiong Pei and Xiaopei Wu and Sheng Li}, title = {SpecMNet: Spectrum Mend Network for Monaural Speech Enhancement.}, journal = {}, year = 2022, } @article{CTT100930628, author = {Shuichiro Shimizu and Chenhui Chu and Sheng Li and Sadao Kurohashi}, title = {Cross-Lingual Transfer Learning for End-to-End Speech Translation.}, journal = {Journal of Natural Language Processing (JNLP)}, year = 2022, } @article{CTT100930629, author = {Siqin Qin and Longbiao Wang and Sheng Li and Jianwu Dang and Lixin Pan}, title = {Improving Low-resource Tibetan End-to-end ASR by Multilingual and Multi-level Unit Modeling.}, journal = {EURASIP Journal on Audio, Speech and Music Processing. (EURASIP JASMP)}, year = 2022, } @article{CTT100930631, author = {Xiaojiao Chen and Hao Huang and Sheng Li}, title = {Adversarial Attack and Defense on Deep Neural Network-based Voice Processing Systems: An Overview.}, journal = {Applied Sciences, Special Issues of Machine Speech Communication}, year = 2021, } @article{CTT100930632, author = {Peng Shen and Xugang Lu and Sheng Li and Hisashi Kawai}, title = {Knowledge Distillation-based Representation Learning for Short-Utterance Spoken Language Identification.}, journal = {IEEE/ACM Trans. Audio, Speech \& Language Process. (TASLP)}, year = 2020, } @article{CTT100930633, author = {Sheng Li and Yuya Akita and Tatsuya Kawahara}, title = {Semi-supervised acoustic model training by discriminative data selection from multiple ASR systems' hypotheses.}, journal = {IEEE/ACM Trans. Audio, Speech \& Language Process. (TASLP)}, year = 2016, } @article{CTT100930634, author = {Sheng Li and Yuya Akita and Tatsuya Kawahara}, title = {Automatic lecture transcription based on discriminative data selection for lightly supervised acoustic model training.}, journal = {IEICE Trans.}, year = 2015, } @article{CTT100930635, author = {Lan Wang and Hui Chen and Sheng Li and Helen Meng}, title = {Phoneme-level articulatory animation in pronunciation training}, journal = {Speech Communication (SPEECH COMMUN)}, year = 2012, } @inproceedings{CTT100931502, author = {Chin Yuen Kwok and Sheng Li and Jia Qi Yip and Chenhui Chu and Tatsuya Kawahara and Eng Siong Chng}, title = {Extending Whisper for Emotion Prediction Using Word-level Pseudo Labels}, booktitle = {}, year = 2025, } @inproceedings{CTT100933790, author = {Haowei Lou and Hye young Paik and Pari Delir Haghighi and Sheng Li and Wen Hu and Lina Yao}, title = {LatentSpeech: Latent Diffusion for Text-To-Speech Generation}, booktitle = {}, year = 2025, } @inproceedings{CTT100933791, author = {Jing Li and Felix Schijve and Sheng Li and Yuye Yang and Jun Hu and Emilia Barakova}, title = {Towards Emotion Co-regulation with LLM-powered Socially Assistive Robots: Integrating LLM Prompts and Robotic Behaviors to Support Parent-Neurodivergent Child Dyads,}, booktitle = {}, year = 2025, } @inproceedings{CTT100931505, author = {Zhao Ren and Rathi Adarshi Rammohan and Kevin Scheck and Sheng Li and Tanja Schultz}, title = {End-to-end Acoustic-linguistic Emotion and Intent Recognition Enhanced by Semi-supervised Learning}, booktitle = {}, year = 2025, } @inproceedings{CTT100931504, author = {Jiliang Hu and Zuchao Li and Mengjia Shen and Haojun Ai and Sheng Li and Jun Zhang}, title = {Joint Automatic Speech Recognition And Structure Learning For Better Speech Understanding}, booktitle = {}, year = 2025, } @inproceedings{CTT100931503, author = {Jun-You Wang and Sheng Li and Li-An Lu and Sydney Chia-Chun Kao and Jyh-Shing Roger Jang}, title = {Similarity-based accent recognition with continuous and discrete self-supervised speech representations}, booktitle = {}, year = 2025, } @inproceedings{CTT100933508, author = {Hongli Yang and Yizhou Peng and Hao Huang and Sheng Li}, title = {Adapting Whisper for Parameter-efficient Code-Switching Speech Recognition via Soft Prompt Tuning,}, booktitle = {}, year = 2025, } @inproceedings{CTT100933509, author = {Hongli Yang and Sheng Li and Hao Huang and Ayiduosi Tuohan and Yizhou Peng}, title = {Language-Aware Prompt Tuning for Parameter-Efficient Seamless Language Expansion in Multilingual ASR,}, booktitle = {}, year = 2025, } @inproceedings{CTT100933507, author = {Wangjin Zhou and Tianjiao Du and Chenglin Xu and Sheng Li and Yi Zhao and Tatsuya Kawahara}, title = {Simple and Effective Content Encoder for Singing Voice Conversion via Dimension Reduction,}, booktitle = {}, year = 2025, } @inproceedings{CTT100933506, author = {Zhengdong Yang and Sheng Li and Chenhui Chu}, title = {Generative Error Correction for Emotion-aware Speech-to-text Translation,}, booktitle = {}, year = 2025, } @inproceedings{CTT100933504, author = {Yu Xu and Xiaokai Qin and Tianyu Fan and Eng Siong Chng and Sheng Li and Nobuaki Minematsu and Daisuke Saito}, title = {Bandwidth Extension System for Throat Microphone Speech Reconstruction,}, booktitle = {}, year = 2025, } @inproceedings{CTT100933505, author = {Zhen Wan and Chao-Han Huck Yang and Yahan Yu and Jinchuan Tian and Sheng Li and Ke Hu and Zhehuai Chen and Shinji Watanabe and Fei Cheng and Chenhui Chu and Sadao Kurohashi}, title = {SIQ: Exterminating Speech Intelligence Quotient Cross Cognitive Levels in Voice Understanding Large Language Models,}, booktitle = {}, year = 2025, } @inproceedings{CTT100931499, author = {Chin Yuen Kwok and Sheng Li and Jia Qi Yip and Eng Siong Chng}, title = {Low-resource Language Adaptation with Ensemble of PEFT Approaches}, booktitle = {}, year = 2024, } @inproceedings{CTT100931490, author = {Wangjin Zhou and Zhengdong Yang and Chenhui Chu and Sheng Li and Raj Dabre and Yi Zhao and Kawahara Tatsuya}, title = {MOS-FAD: Improving Fake Audio Detection Via Automatic Mean Opinion Score Prediction}, booktitle = {}, year = 2024, } @inproceedings{CTT100931491, author = {Yi Zhao and Chunyu Qiang and Hao Li and Yulan Hu and Wangjin Zhou and Sheng Li}, title = {Enhancing Realism in 3D Facial Animation Using Conformer-Based Generation and Automated Post-Processing}, booktitle = {}, year = 2024, } @inproceedings{CTT100931492, author = {Yankun Wu and Yuta Nakashima and Noa Garcia and Sheng Li and Zhaoyang Zeng}, title = {Reproducibility Companion Paper: Stable Diffusion for Content-Style Disentanglement in Art Analysis}, booktitle = {}, year = 2024, } @inproceedings{CTT100931493, author = {Sheng Li and Jiyi Li and Yang Cao}, title = {Automatic Post-Editing of Speech Recognition System Output Using Large Language Models}, booktitle = {}, year = 2024, } @inproceedings{CTT100931494, author = {Sheng Li and Chen Chen and Chin Yuen Kwok and Chenhui Chu and Eng Siong Chng and Hisashi Kawai}, title = {Investigating ASR Error Correction with Large Language Model and Multilingual 1-best Hypotheses}, booktitle = {}, year = 2024, } @inproceedings{CTT100931495, author = {Lele Zheng and Yang Cao and Renhe Jiang and Kenjiro Taura and Yulong Shen and Sheng Li and Masatoshi Yoshikawa}, title = {Enhancing Privacy of Spatiotemporal Federated Learning Against Gradient Inversion Attacks}, booktitle = {}, year = 2024, } @inproceedings{CTT100931496, author = {Chao Tan and Sheng Li and Yang Cao and Zhao Ren and Tanja Schultz}, title = {Investigating Effective Speaker Property Privacy Protection in Federated Learning for Speech Emotion Recognition}, booktitle = {}, year = 2024, } @inproceedings{CTT100931497, author = {Jianan Chen and Chenhui Chu and Sheng Li and Tatsuya Kawahara}, title = {Data Selection using Spoken Language Identification for Low-Resource and Zero-Resource Speech Recognition}, booktitle = {}, year = 2024, } @inproceedings{CTT100931501, author = {Hay Mar Soe Naing and Win Pa Pa and Sheng Li}, title = {Parallel and Limited Data Voice Conversions on Myanmar Language Speech for Spoofed Detection}, booktitle = {}, year = 2024, } @inproceedings{CTT100931500, author = {Qingqing Zhang and Lei Luo and Simin Xu and Yongjing Chen and Chuang Li and Sheng Li and Ruili Wang}, title = {LaMuCo: Large-Scale Multilingual Conversation Speech Recognition Challenge}, booktitle = {}, year = 2024, } @inproceedings{CTT100931498, author = {Sheng Li and Yuka Ko and Akinori Ito}, title = {LLM as decoder: Investigating Lattice-based Speech Recognition Hypotheses Rescoring Using LLM}, booktitle = {}, year = 2024, } @inproceedings{CTT100931470, author = {Guangxing Li and Wangjin Zhou and Sheng Li and Yi Zhao and Jichen Yang and Hao Huang}, title = {Investigating Effective Domain Adaptation Method for Speaker Verification Task}, booktitle = {}, year = 2023, } @inproceedings{CTT100931471, author = {Xiaojiao Chen and Sheng Li and Hao Huang}, title = {GhostVec: Directly Extracting Speaker Embedding from End-to-End Speech Recognition Model Using Adversarial Examples}, booktitle = {}, year = 2023, } @inproceedings{CTT100931475, author = {Kai Wang and Yuhang Yang and Hao Huang and Ying Hu and Sheng Li}, title = {Speakeraugment: Data Augmentation for Generalizable Source Separation via Speaker Parameter Manipulation}, booktitle = {}, year = 2023, } @inproceedings{CTT100931474, author = {Kak Soky and Sheng Li and Chenhui Chu and Tatsuya Kawahara}, title = {Domain and Language Adaptation Using Heterogeneous Datasets for Wav2vec2.0-Based Speech Recognition of Low-Resource Language}, booktitle = {}, year = 2023, } @inproceedings{CTT100931473, author = {Qianying Liu and Zhuo Gong and Zhengdong Yang and Yuhang Yang and Sheng Li and Chenchen Ding and Nobuaki Minematsu and Hao Huang and Fei Cheng and Chenhui Chu and Sadao Kurohashi}, title = {Hierarchical Softmax for End-To-End Low-Resource Multilingual Speech Recognition}, booktitle = {}, year = 2023, } @inproceedings{CTT100931472, author = {Sheng Li and Jiyi Li and Qianying Liu and Zhuo Gong}, title = {An End-to-End Chinese and Japanese Bilingual Speech Recognition Systems with Shared Character Decomposition}, booktitle = {}, year = 2023, } @inproceedings{CTT100931489, author = {Zili Qi and Xinhui Hu and Wangjin Zhou and Sheng Li and Hao Wu and Jian Lu and Xinkang Xu}, title = {LE-SSL-MOS: Self-Supervised Learning MOS Prediction with Listener Enhancement}, booktitle = {}, year = 2023, } @inproceedings{CTT100931488, author = {Wenqing Wei and Zhengdong Yang and Yuan Gao and Jiyi Li and Chenhui Chu and Shogo Okada and Sheng Li}, title = {FedCPC: An Effective Federated Contrastive Learning Method for Privacy Preserving Early-Stage Alzheimers Speech Detection}, booktitle = {}, year = 2023, } @inproceedings{CTT100931487, author = {Wangjin Zhou and Zhengdong Yang and Sheng Li and Chenhui Chu}, title = {KyotoMOS: An Automatic MOS Scoring System for Speech Synthesis}, booktitle = {}, year = 2023, } @inproceedings{CTT100931486, author = {Xiaojiao Chen and Sheng Li and Jiyi Li and Yang Cao and Hao Huang and Liang He}, title = {GhostVec: A New Threat to Speaker Privacy of End-to-End Speech Recognition System}, booktitle = {}, year = 2023, } @inproceedings{CTT100931485, author = {Xiaojiao Chen and Sheng Li and Jiyi Li and Hao Huang and Yang Cao and Liang He}, title = {Reprogramming Self-supervised Learning-based Speech Representations for Speaker Anonymization}, booktitle = {}, year = 2023, } @inproceedings{CTT100931484, author = {Sheng Li and Jiyi Li}, title = {Correction while Recognition: Combining Pretrained Language Model for Taiwan-Accented Speech Recognition}, booktitle = {}, year = 2023, } @inproceedings{CTT100931483, author = {Zhengdong Yang and Shuichiro Shimizu and Wangjin Zhou and Sheng Li and Chenhui Chu}, title = {The Kyoto Speech-to-Speech Translation System for IWSLT 2023}, booktitle = {}, year = 2023, } @inproceedings{CTT100931482, author = {Longfei Yang and Jiyi Li and Sheng Li and Takahiro Shinozaki}, title = {Dialogue State Tracking with Sparse Local Slot Attention}, booktitle = {}, year = 2023, } @inproceedings{CTT100931481, author = {Longfei Yang and Jiyi Li and Sheng Li and Takahiro Shinozaki}, title = {Multi-Domain Dialogue State Tracking with Disentangled Domain-Slot Attention}, booktitle = {}, year = 2023, } @inproceedings{CTT100931480, author = {Shuichiro Shimizu and Chenhui Chu and Sheng Li and Sadao Kurohashi}, title = {Towards Speech Dialogue Translation Mediating Speakers of Different Languages}, booktitle = {}, year = 2023, } @inproceedings{CTT100931478, author = {Helen Korving and Sheng Li and Di Zhou and Paula Sterkenburg and Panos Markopoulos and Emilia Barakova}, title = {Development of a Pain Signaling System Using Machine Learning}, booktitle = {}, year = 2023, } @inproceedings{CTT100931477, author = {Chao Tan and Yang Cao and Sheng Li and Masatoshi Yoshikawa}, title = {General or Specific? Investigating Effective Privacy Protection in Federated Learning for Speech Emotion Recognition}, booktitle = {}, year = 2023, } @inproceedings{CTT100931476, author = {Yuhang Yang and Haihua Xu and Hao Huang and Eng Siong Chng and Sheng Li}, title = {Speech-Text Based Multi-Modal Training with Bidirectional Attention for Improved Speech Recognition}, booktitle = {}, year = 2023, } @inproceedings{CTT100931419, author = {Hao Shi and Longbiao Wang and Sheng Li and Jianwu Dang and Tatsuya Kawahara}, title = {Monaural Speech Enhancement Based on Spectrogram Decomposition for Convolutional Neural Network-sensitive Feature Extraction}, booktitle = {}, year = 2022, } @inproceedings{CTT100931406, author = {Kai Wang and Yizhou Peng and Hao Huang and Ying Hu and Sheng Li}, title = {Mining Hard Samples Locally And Globally For Improved Speech Separation}, booktitle = {}, year = 2022, } @inproceedings{CTT100931407, author = {Yongjie Lv and Longbiao Wang and Meng Ge and Sheng Li and Chenchen Ding and Lixin Pan}, title = {Compressing Transformer-Based ASR Model by Task-Driven Loss and Attention-Based Multi-Level Feature Distillation}, booktitle = {}, year = 2022, } @inproceedings{CTT100931408, author = {Sheng Li and Jiyi Li and Qianying Liu and Zhuo Gong}, title = {Adversarial Speech Generation and Natural Speech Recovery for Speech Content Protection.}, booktitle = {}, year = 2022, } @inproceedings{CTT100931409, author = {Zhuo Gong and Daisuke Saito and Longfei Yang and Takahiro Shinozaki and Sheng Li and Hisashi Kawai and Nobuaki Minematsu}, title = {Self-Adaptive Multilingual ASR Rescoring with Language Identification and Unified Language Model.}, booktitle = {}, year = 2022, } @inproceedings{CTT100931410, author = {Kai Li and Xugang Lu and Masato Akagi and Jianwu Dang and Sheng Li and Masashi Unoki}, title = {Relationship Between Speakers' Physiological Structure and Acoustic Speech Signals: Data-Driven Study Based on Frequency-Wise Attentional Neural Network}, booktitle = {}, year = 2022, } @inproceedings{CTT100931411, author = {Kak Soky and Sheng Li and Masato Mimura and Chenhui Chu and Tatsuya Kawahara}, title = {Leveraging Simultaneous Translation for Enhancing Transcription of Low-resource Language via Cross Attention Mechanism}, booktitle = {}, year = 2022, } @inproceedings{CTT100931412, author = {Longfei Yang and Wenqing Wei and Sheng Li and Jiyi Li and Takahiro Shinozaki}, title = {Augmented Adversarial Self-Supervised Learning for Early-Stage Alzheimer's Speech Detection}, booktitle = {}, year = 2022, } @inproceedings{CTT100931413, author = {Kai Li and Sheng Li and Xugang Lu and Masato Akagi and Meng Liu and Lin Zhang and Chang Zeng and Longbiao Wang and Jianwu Dang and Masashi Unoki}, title = {Data Augmentation Using McAdams-Coefficient-Based Speaker Anonymization for Fake Audio Detection}, booktitle = {}, year = 2022, } @inproceedings{CTT100931414, author = {Zhengdong Yang and Wangjin Zhou and Chenhui Chu and Sheng Li and Raj Dabre and Raphael Rubino and Yi Zhao}, title = {Fusion of Self-supervised Learned Models for MOS Prediction}, booktitle = {}, year = 2022, } @inproceedings{CTT100931415, author = {Siqing Qin and Longbiao Wang and Sheng Li and Yuqin Lin and Jianwu Dang}, title = {Finer-grained Modeling units-based Meta-Learning for Low-resource Tibetan Speech Recognition}, booktitle = {}, year = 2022, } @inproceedings{CTT100931416, author = {Nan LI and Meng Ge and Longbiao Wang and Masashi Unoki and Sheng Li and Jianwu Dang}, title = {Global Signal-to-noise Ratio Estimation Based on Multi-subband Processing Using Convolutional Neural Network}, booktitle = {}, year = 2022, } @inproceedings{CTT100931417, author = {Longfei Yang and Jiyi Li and Sheng Li and Takahiro Shinozaki}, title = {Multi-Domain Dialogue State Tracking with Top-k Slot Self Attention}, booktitle = {}, year = 2022, } @inproceedings{CTT100931421, author = {Kak Soky and Zhuo Gong and Sheng Li}, title = {Nict-Tib1: A Public Speech Corpus Of Lhasa Dialect For Benchmarking Tibetan Language Speech Recognition Systems}, booktitle = {}, year = 2022, } @inproceedings{CTT100931422, author = {Hao Shi and Longbiao Wang and Sheng Li and Jianwu Dang and Tatsuya Kawahara}, title = {Subband-based Spectrogram Fusion for Speech Enhancement by Combining Mapping and Masking Approaches}, booktitle = {}, year = 2022, } @inproceedings{CTT100931424, author = {Zhuo Gong and Saito Daisuke and Sheng Li and Hisashi Kawai and Minematsu Nobuaki}, title = {Can We Train a Language Model Inside an End-to-End ASR Model? - Investigating Effective Implicit Language Modeling}, booktitle = {}, year = 2022, } @inproceedings{CTT100931763, author = {Kak Soky and Sheng Li and Masato Mimura and Chenhui Chu and Tatsuya Kawahara}, title = {On the Use of Speaker Information for Automatic Speech Recognition in Speaker-imbalanced Corpora}, booktitle = {}, year = 2021, } @inproceedings{CTT100930927, author = {Hao Huang and Kai Wang and Ying Hu and Sheng Li}, title = {Encoder-Decoder based pitch tracking and joint model training for Mandarin tone classification.}, booktitle = {}, year = 2021, } @inproceedings{CTT100931757, author = {Heran Zhang and Sheng Li and Xingjun Ma and Yi Zhao and Yang Cao and Tatsuya Kawahara}, title = {Phantom in the Opera: Effective Adversarial Music Attack on Keyword Spotting Systems}, booktitle = {}, year = 2021, } @inproceedings{CTT100931758, author = {Dawei Liu and Longbiao Wang and Sheng Li and Haoyu Li and Chenchen Ding and Ju Zhang and Jianwu Dang}, title = {Exploring Effective Speech Representation via ASR for High-Quality End-to-End Multispeaker TTS}, booktitle = {}, year = 2021, } @inproceedings{CTT100930929, author = {Nan Li and Longbiao Wang and Masashi Unoki and Sheng Li and Rui Wang and Meng Ge and Jianwu Dang}, title = {Robust voice activity detection using a masked auditory encoder based convolutional neural network.}, booktitle = {}, year = 2021, } @inproceedings{CTT100930928, author = {Shunfei Chen and Xinhui Hu and Sheng Li and Xinkang Xu}, title = {An investigation of using hybrid modeling units for improving End-to-End speech recognition systems.}, booktitle = {}, year = 2021, } @inproceedings{CTT100931759, author = {Luya Qiang and Hao Shi and Meng Ge and Haoran Yin and Nan Li and Longbiao Wang and Sheng Li and Jianwu Dang}, title = {Speech Dereverberation Based on Scale-Aware Mean Square Error Loss}, booktitle = {}, year = 2021, } @inproceedings{CTT100931760, author = {Haoran Yin and Hao Shi and Longbiao Wang and Luya Qiang and Sheng Li and Meng Ge and Gaoyan Zhang and Jianwu Dang}, title = {Simultaneous Progressive Filtering-Based Monaural Speech Enhancement}, booktitle = {}, year = 2021, } @inproceedings{CTT100931761, author = {Hao Shi and Longbiao Wang and Sheng Li and Cunhang Fan and Jianwu Dang and Tatsuya Kawahara}, title = {Spectrograms Fusion-based End-to-End Robust Automatic Speech Recognition}, booktitle = {}, year = 2021, } @inproceedings{CTT100931762, author = {Yizhou Peng and Jicheng Zhang and Haobo Zhang and Haihua Xu and Hao Huang and Sheng Li and Eng Siong Chng}, title = {Multilingual Approach to Joint Speech and Accent Recognition with DNN-HMM Framework}, booktitle = {}, year = 2021, } @inproceedings{CTT100930932, author = {Kak Soky and Masato Mimura and Tatsuya Kawahara and Sheng Li and Chenchen Ding and Chenhui Chu and Sethserey Sam}, title = {Khmer Speech Translation Corpus of the Extraordinary Chambers in the Courts of Cambodia (ECCC).}, booktitle = {}, year = 2021, } @inproceedings{CTT100930931, author = {Ding Wang and Shuaishuai Ye and Xinhui Hu and Sheng Li and Xinkang Xu}, title = {An End-to-End Dialect Identification System with Transfer Learning from a Multilingual Automatic Speech Recognition Model.}, booktitle = {}, year = 2021, } @inproceedings{CTT100930930, author = {Kai Wang and Hao Huang and Ying Hu and Zhihua Huang and Sheng Li}, title = {End-to-End Speech Separation Using Orthogonal Representation in Complex and Real Time- Frequency Domain.}, booktitle = {}, year = 2021, } @inproceedings{CTT100931756, author = {Aye Thida and Nway Nway Han and Sheinn Thawtar Oo and Sheng Li and Chenchen Ding}, title = {VOIS: The First Speech Therapy App Specifically Designed for Myanmar Hearing-Impaired Children}, booktitle = {}, year = 2020, } @inproceedings{CTT100931754, author = {Shaotong Guo and Longbiao Wang and Sheng Li and Ju Zhang and Cheng Gong and Yuguang Wang and Jianwu Dang and Kiyoshi Honda}, title = {Investigation of Effectively Synthesizing Code-Switched Speech Using Highly Imbalanced Mix-Lingual Data}, booktitle = {}, year = 2020, } @inproceedings{CTT100930926, author = {Yuqin Lin and Longbiao Wang and Sheng Li and Jianwu Dang and Chenchen Ding.}, title = {Staged Knowledge Distillation for End-to-End Dysarthric Speech Recognition and Speech Attribute Transcription.}, booktitle = {}, year = 2020, } @inproceedings{CTT100930925, author = {Hao Shi and Longbiao Wang and Chenchen Ding and Meng Ge and Nan Li and Jianwu Dang and Hiroshi Seki and Hao Shi and Longbiao Wang and Sheng Li and Chenchen Ding and Meng Ge and Nan Li and Jianwu Dang and Hiroshi Seki}, title = {Singing Voice Extraction with Attention based Spectrograms Fusion.}, booktitle = {}, year = 2020, } @inproceedings{CTT100930924, author = {Sheng Li and Xugang Lu and Raj Dabre and Peng Shen and Hisashi Kawai}, title = {Joint Training End-to-End Speech Recognition Systems with Speaker Attributes.}, booktitle = {}, year = 2020, } @inproceedings{CTT100930923, author = {Peng Shen and Xugang Lu and Komei Sugiura and Sheng Li and Hisashi Kawai}, title = {Compensation on x-vector for short utterance spoken language identification.}, booktitle = {}, year = 2020, } @inproceedings{CTT100931755, author = {Yaowei Han and Yang Cao and Sheng Li and Qiang Ma and Masatoshi Yoshikawa}, title = {Voice-Indistinguishability -- Protecting Voiceprint with Differential Privacy under an Untrusted Server}, booktitle = {}, year = 2020, } @inproceedings{CTT100930711, author = {Hao Shi and Longbiao Wang and Meng Ge and Sheng Li and Jianwu Dang.}, title = {Spectrograms Fusion with Minimum Difference Masks Estimation for Monaural Speech Dereverberation.}, booktitle = {}, year = 2020, } @inproceedings{CTT100930712, author = {Yuqin Lin and Longbiao Wang and Jianwu Dang and Sheng Li and Chenchen Ding}, title = {End-To-End Articulatory Modeling for Dysarthria Articulatory Attribute Detection.}, booktitle = {}, year = 2020, } @inproceedings{CTT100930713, author = {Yaowei Han and Sheng Li and Yang Cao and Qiang Ma and Masashi Yoshikawa}, title = {Voice-Indistinguishability: Protecting Voiceprint in Privacy Preserving Speech Data Release.}, booktitle = {IEEE-ICME}, year = 2020, } @inproceedings{CTT100931752, author = {Lixin Pan and Sheng Li and Longbiao Wang and Jianwu Dang}, title = {Effective Training End-to-End ASR systems for Low-resource Lhasa Dialect of Tibetan Language}, booktitle = {}, year = 2019, } @inproceedings{CTT100931753, author = {Kak Soky and Sheng Li and Tatsuya Kawahara and Sopheap Seng}, title = {Multi-lingual transformer training for khmer automatic speech recognition}, booktitle = {}, year = 2019, } @inproceedings{CTT100930710, author = {Sheng Li}, title = {Class-wise Centroid Distance Metric Learning for Acoustic Event Detection.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930709, author = {Sheng Li and Xugang Lu and Chenchen Ding and Peng Shen and Tatsuya Kawahara and Hisashi Kawai.}, title = {Investigating Radical-based End-to-End Speech Recognition Systems for Chinese Dialects and Japanese.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930708, author = {Sheng Li and Chenchen Ding and Xugang Lu and Peng Shen and Tatsuya Kawahara and Hisashi Kawai.}, title = {End-to-End Articulatory Attribute Modeling for Low-resource Multilingual Speech Recognition.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930707, author = {Sheng Li and Raj Dabre and Xugang Lu and Peng Shen and Tatsuya Kawahara and Hisashi Kawai}, title = {Improving Transformer-based Speech Recognition Systems with Compressed Structure and Speech Attributes Augmentation.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930705, author = {Ryoichi Takashima and Sheng Li and Hisashi Kawai.}, title = {Investigation of Sequence-level Knowledge Distillation Methods for CTC Acoustic Models.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930706, author = {Peng Shen and Xugang Lu and Sheng Li and Hisashi Kawai.}, title = {Interactive learning of teacher-student model for short utterance spoken language identification.}, booktitle = {}, year = 2019, } @inproceedings{CTT100930704, author = {Sheng Li and Xugang Lu and Ryoichi Takashima and Phen Shen and Tatsuya Kawahara and Hisashi Kawai}, title = {Improving very deep time-delay neural network with vertical-attention for effectively training CTC-based ASR systems.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930702, author = {Sheng Li and Xugang Lu and Ryoichi Takashima and Peng Shen and Tatsuya Kawahara and Hisashi Kawai.}, title = {Improving CTC-based Acoustic Model with Very Deep Residual Time-delay Neural Networks.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930703, author = {Sheng Li and Xugang Lu and Ryoichi Takashima and Peng Shen and Tatsuya Kawahara and Hisashi Kawai.}, title = {Improving CTC-based Acoustic Model with Very Deep Residual Time-delay Neural Networks.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930700, author = {Xugang Lu and Peng Shen and Sheng Li and Yu Tsao and Hisashi Kawai.}, title = {Temporal Attentive Pooling for Acoustic Event Detection.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930701, author = {Peng Shen and Xugang Lu and Sheng Li and Hisashi Kawai.}, title = {Feature Representation of Short Utterances based on Knowledge Distillation for Spoken Language Identification.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930697, author = {Ryoichi Takashima and Sheng Li and Hisashi Kawai}, title = {CTC Loss Function with a Unit-level Ambiguity Penalty.}, booktitle = {}, year = 2018, } @inproceedings{CTT100930696, author = {Sheng Li and Xugang Lu and Peng Shen and Ryoichi Takashima and Tatsuya Kawahara and Hisashi Kawai}, title = {Incremental training and constructing the very deep convolutional residual network acoustic models.}, booktitle = {}, year = 2017, } @inproceedings{CTT100930693, author = {Peng Shen and Xugang Lu and Sheng Li and Hisashi Kawai}, title = {Conditional Generative Adversarial Nets Classifier for Spoken Language Identification.}, booktitle = {}, year = 2017, } @inproceedings{CTT100930699, author = {Ryoichi Takashima and Sheng Li and Hisashi Kawai}, title = {An Investigation of a Knowledge Distillation Method for CTC Acoustic Models.}, booktitle = {}, year = 2017, } @inproceedings{CTT100930692, author = {Sheng Li and Xugang Lu and Shinsuke Sakai and Masato Mimura and Tatsuya Kawahara}, title = {Semi-supervised ensemble DNN acoustic model training.}, booktitle = {}, year = 2017, } @inproceedings{CTT100931748, author = {Sheng Li and Xugang Lu and Shinsuke Mori and Yuya Akita and Tatsuya Kawahara}, title = {Confidence Estimation for Speech Recognition Systems using Conditional Random Fields Trained with Partially Annotated Data}, booktitle = {}, year = 2016, } @inproceedings{CTT100930687, author = {Sheng Li and Yuya Akita and Tatsuya Kawahara}, title = {Data selection from multiple ASR systems' hypotheses for unsupervised acoustic model training.}, booktitle = {}, year = 2016, } @inproceedings{CTT100930683, author = {Sheng Li and Xugang Lu and Yuya Akita and Tatsuya Kawahara}, title = {Ensemble speaker modeling using speaker adaptive training deep neural network for speaker adaptation.}, booktitle = {}, year = 2015, } @inproceedings{CTT100930685, author = {Sheng Li and Yuya Akita and Tatsuya Kawahara.}, title = {Discriminative data selection for lightly supervised training of acoustic model using closed caption texts.}, booktitle = {}, year = 2015, } @inproceedings{CTT100931746, author = {Sheng Li and Yuya Akita and Tatsuya Kawahara}, title = {Corpus and Transcription System of Chinese Lecture Room}, booktitle = {}, year = 2014, } @inproceedings{CTT100930677, author = {Sheng Li and Lan Wang}, title = {Cross Linguistic Comparison of Mandarin and English EMA Articulatory Data,}, booktitle = {}, year = 2012, } @inproceedings{CTT100931745, author = {Sheng Li and Lan Wang and En Qi}, title = {The Phoneme-level Articulator Dynamics for Pronunciation Animation}, booktitle = {}, year = 2011, } @inproceedings{CTT100931743, author = {Jinyu Chen and Lan Wang and Chongguo Li and Jin Hu and Sheng Li}, title = {IELS: A Computer-aided Pronunciation Training System for Undergraduate Students}, booktitle = {}, year = 2010, } @misc{CTT100930445, author = {Sheng Li}, title = {Speech Recognition Enhanced by Lightly-supervised and Semi-supervised Acoustic Model Training}, year = , } @phdthesis{CTT100930445, author = {Sheng Li}, title = {Speech Recognition Enhanced by Lightly-supervised and Semi-supervised Acoustic Model Training}, school = {京都大学}, year = , }