|
|
@@ -26,9 +26,7 @@ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVo
|
|
|
|
|
|
- [ ] 25hz llama based llm model which supports lora finetune
|
|
|
- [ ] Support more instruction mode
|
|
|
- - [ ] Voice conversion
|
|
|
- [ ] Music generation
|
|
|
- - [ ] Training script sample based on Mandarin
|
|
|
- [ ] CosyVoice-500M trained with more multi-lingual data
|
|
|
- [ ] More...
|
|
|
|
|
|
@@ -113,7 +111,7 @@ from cosyvoice.cli.cosyvoice import CosyVoice
|
|
|
from cosyvoice.utils.file_utils import load_wav
|
|
|
import torchaudio
|
|
|
|
|
|
-cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
|
|
|
+cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=True, load_onnx=False, fp16=True)
|
|
|
# sft usage
|
|
|
print(cosyvoice.list_avaliable_spks())
|
|
|
# change stream=True for chunk stream inference
|