há 1 ano atrás · fff6f9f1e0
--- a/README.md
+++ b/README.md
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
 
															 If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
														
 
															-``` python
														
 
															-# SDK模型下载
														
 
															-from modelscope import snapshot_download
														
 
															-snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
														
 
															-snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
														
 
															-snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
														
 
															-snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
														
 
															+Download models with python script.
														
 
															+``` shell
														
 
															+python download.py
														
 
															 ```
														
 
															+Download models with git, you should install `git lfs` first.
														
 
															 ``` sh
														
 
															-# git模型下载，请确保已安装git lfs
														
 
															 mkdir -p pretrained_models
														
 
															 git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
														
 
															 git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
														
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
 
															 For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
														
 
															 You can get familiar with CosyVoice following this recipie.
														
 
															+**Serve with FastAPI**
														
 
															+```sh
														
 
															+# For development
														
 
															+fastapi dev --port 3003
														
 
															+# For production
														
 
															+fastapi run --port 3003
														
 
															+```
														
 
															+
														
 
															 **Build for deployment**
														
 
															 Optionally, if you want to use grpc for service deployment,
														
--- a/download.py
+++ b/download.py
@@ -0,0 +1,6 @@
 
															+# SDK模型下载
														
 
															+from modelscope import snapshot_download
														
 
															+snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
														
 
															+snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
														
 
															+snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
														
 
															+snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
														
--- a/main.py
+++ b/main.py
@@ -0,0 +1,40 @@
 
															+import io,time
														
 
															+from fastapi import FastAPI, Response
														
 
															+from fastapi.responses import HTMLResponse
														
 
															+from cosyvoice.cli.cosyvoice import CosyVoice
														
 
															+import torchaudio
														
 
															+
														
 
															+cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
														
 
															+# sft usage
														
 
															+print(cosyvoice.list_avaliable_spks())
														
 
															+app = FastAPI()
														
 
															+
														
 
															+@app.get("/api/voice/tts")
														
 
															+async def tts(query: str, role: str):
														
 
															+    start = time.process_time()
														
 
															+    output = cosyvoice.inference_sft(query, role)
														
 
															+    end = time.process_time()
														
 
															+    print("infer time:", end-start, "seconds")
														
 
															+    buffer = io.BytesIO()
														
 
															+    torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
														
 
															+    buffer.seek(0)
														
 
															+    return Response(content=buffer.read(-1), media_type="audio/wav")
														
 
															+
														
 
															+@app.get("/api/voice/roles")
														
 
															+async def roles():
														
 
															+    return {"roles": cosyvoice.list_avaliable_spks()}
														
 
															+
														
 
															+@app.get("/", response_class=HTMLResponse)
														
 
															+async def root():
														
 
															+    return """
														
 
															+    <!DOCTYPE html>
														
 
															+    <html lang=zh-cn>
														
 
															+        <head>
														
 
															+            <meta charset=utf-8>
														
 
															+            <title>Api information</title>
														
 
															+        </head>
														
 
															+        <body>
														
 
															+            Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
														
 
															+        </body>
														
 
															+    </html>
														
 
															+    """
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,4 +25,6 @@ soundfile==0.12.1
 
															 tensorboard==2.14.0
														
 
															 torch==2.0.1
														
 
															 torchaudio==2.0.2
														
 
															-wget==3.2
														
 
															+wget==3.2
														
 
															+fastapi==0.111.0
														
 
															+fastapi-cli==0.0.4