|
|
@@ -131,19 +131,29 @@ export PYTHONPATH=third_party/Matcha-TTS
|
|
|
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
|
|
|
from cosyvoice.utils.file_utils import load_wav
|
|
|
import torchaudio
|
|
|
+```
|
|
|
|
|
|
-# cosyvoice2
|
|
|
+**CosyVoice2 Usage**
|
|
|
+```python
|
|
|
cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=True, load_onnx=False, load_trt=False)
|
|
|
|
|
|
# zero_shot usage
|
|
|
prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
|
|
|
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)):
|
|
|
torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
|
|
+
|
|
|
+# fine grained control
|
|
|
+prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
|
|
|
+for i, j in enumerate(cosyvoice.inference_cross_lingual('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', prompt_speech_16k, stream=False)):
|
|
|
+ torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
|
|
+
|
|
|
# instruct usage
|
|
|
for i, j in enumerate(cosyvoice.inference_instruct2('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '用四川话说这句话', prompt_speech_16k, stream=False)):
|
|
|
- torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
|
|
+ torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
|
|
|
+```
|
|
|
|
|
|
-# cosyvoice
|
|
|
+**CosyVoice Usage**
|
|
|
+```python
|
|
|
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=True, load_onnx=False, fp16=True)
|
|
|
# sft usage
|
|
|
print(cosyvoice.list_avaliable_spks())
|