|
@@ -230,7 +230,7 @@ class CosyVoiceModel:
|
|
|
if self.llm_end_dict[this_uuid] is True and len(self.tts_speech_token_dict[this_uuid]) < token_hop_len + self.token_overlap_len:
|
|
if self.llm_end_dict[this_uuid] is True and len(self.tts_speech_token_dict[this_uuid]) < token_hop_len + self.token_overlap_len:
|
|
|
break
|
|
break
|
|
|
# deal with remain tokens, make sure inference remain token len equals token_hop_len when cache_speech is not None
|
|
# deal with remain tokens, make sure inference remain token len equals token_hop_len when cache_speech is not None
|
|
|
- this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid], dim=1).unsqueeze(dim=0)
|
|
|
|
|
|
|
+ this_tts_speech_token = torch.tensor(self.tts_speech_token_dict[this_uuid]).unsqueeze(dim=0)
|
|
|
this_tts_speech = self.token2wav(token=this_tts_speech_token,
|
|
this_tts_speech = self.token2wav(token=this_tts_speech_token,
|
|
|
prompt_token=flow_prompt_speech_token,
|
|
prompt_token=flow_prompt_speech_token,
|
|
|
prompt_feat=prompt_speech_feat,
|
|
prompt_feat=prompt_speech_feat,
|