hai 1 ano · fff6f9f1e0
--- a/README.md
+++ b/README.md
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
 
				 
			
 
				 If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
			
 
				 
			
 
				-``` python
			
 
				-# SDK模型下载
			
 
				-from modelscope import snapshot_download
			
 
				-snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
			
 
				-snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
			
 
				-snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
			
 
				-snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
			
 
				+Download models with python script.
			
 
				+``` shell
			
 
				+python download.py
			
 
				 ```
			
 
				 
			
 
				+Download models with git, you should install `git lfs` first.
			
 
				 ``` sh
			
 
				-# git模型下载，请确保已安装git lfs
			
 
				 mkdir -p pretrained_models
			
 
				 git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
			
 
				 git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
			
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
 
				 For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
			
 
				 You can get familiar with CosyVoice following this recipie.
			
 
				 
			
 
				+**Serve with FastAPI**
			
 
				+```sh
			
 
				+# For development
			
 
				+fastapi dev --port 3003
			
 
				+# For production
			
 
				+fastapi run --port 3003
			
 
				+```
			
 
				+
			
 
				 **Build for deployment**
			
 
				 
			
 
				 Optionally, if you want to use grpc for service deployment,
			
--- a/download.py
+++ b/download.py
@@ -0,0 +1,6 @@
 
				+# SDK模型下载
			
 
				+from modelscope import snapshot_download
			
 
				+snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
			
 
				+snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
			
 
				+snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
			
 
				+snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,40 @@
 
				+import io,time
			
 
				+from fastapi import FastAPI, Response
			
 
				+from fastapi.responses import HTMLResponse
			
 
				+from cosyvoice.cli.cosyvoice import CosyVoice
			
 
				+import torchaudio
			
 
				+
			
 
				+cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
			
 
				+# sft usage
			
 
				+print(cosyvoice.list_avaliable_spks())
			
 
				+app = FastAPI()
			
 
				+
			
 
				+@app.get("/api/voice/tts")
			
 
				+async def tts(query: str, role: str):
			
 
				+    start = time.process_time()
			
 
				+    output = cosyvoice.inference_sft(query, role)
			
 
				+    end = time.process_time()
			
 
				+    print("infer time:", end-start, "seconds")
			
 
				+    buffer = io.BytesIO()
			
 
				+    torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
			
 
				+    buffer.seek(0)
			
 
				+    return Response(content=buffer.read(-1), media_type="audio/wav")
			
 
				+
			
 
				+@app.get("/api/voice/roles")
			
 
				+async def roles():
			
 
				+    return {"roles": cosyvoice.list_avaliable_spks()}
			
 
				+
			
 
				+@app.get("/", response_class=HTMLResponse)
			
 
				+async def root():
			
 
				+    return """
			
 
				+    <!DOCTYPE html>
			
 
				+    <html lang=zh-cn>
			
 
				+        <head>
			
 
				+            <meta charset=utf-8>
			
 
				+            <title>Api information</title>
			
 
				+        </head>
			
 
				+        <body>
			
 
				+            Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
			
 
				+        </body>
			
 
				+    </html>
			
 
				+    """
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,4 +25,6 @@ soundfile==0.12.1
 
				 tensorboard==2.14.0
			
 
				 torch==2.0.1
			
 
				 torchaudio==2.0.2
			
 
				-wget==3.2
			
 
				+wget==3.2
			
 
				+fastapi==0.111.0
			
 
				+fastapi-cli==0.0.4