1 hónapja · 3b44913782
--- a/cosyvoice/cli/cosyvoice.py
+++ b/cosyvoice/cli/cosyvoice.py
@@ -89,6 +89,8 @@ class CosyVoice:
 
				                 start_time = time.time()
			
 
				 
			
 
				     def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True):
			
 
				+        if self.__class__.__name__ == 'CosyVoice3' and '<|endofprompt|>' not in prompt_text + tts_text:
			
 
				+            logging.warning('<|endofprompt|> not found in CosyVoice3 inference, check your input text')
			
 
				         prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend)
			
 
				         for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)):
			
 
				             if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text):
			
--- a/cosyvoice/cli/frontend.py
+++ b/cosyvoice/cli/frontend.py
@@ -183,7 +183,7 @@ class CosyVoiceFrontEnd:
 
				                            'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
			
 
				                            'llm_embedding': embedding, 'flow_embedding': embedding}
			
 
				         else:
			
 
				-            model_input = self.spk2info[zero_shot_spk_id]
			
 
				+            model_input = {**self.spk2info[zero_shot_spk_id]}
			
 
				         model_input['text'] = tts_text_token
			
 
				         model_input['text_len'] = tts_text_token_len
			
 
				         return model_input
			
--- a/runtime/python/fastapi/server.py
+++ b/runtime/python/fastapi/server.py
@@ -24,7 +24,7 @@ import numpy as np
 
				 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				 sys.path.append('{}/../../..'.format(ROOT_DIR))
			
 
				 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
			
 
				-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
			
 
				+from cosyvoice.cli.cosyvoice import AutoModel
			
 
				 from cosyvoice.utils.file_utils import load_wav
			
 
				 
			
 
				 app = FastAPI()
			
@@ -88,14 +88,8 @@ if __name__ == '__main__':
 
				                         default=50000)
			
 
				     parser.add_argument('--model_dir',
			
 
				                         type=str,
			
 
				-                        default='iic/CosyVoice-300M',
			
 
				+                        default='iic/CosyVoice2-0.5B',
			
 
				                         help='local path or modelscope repo id')
			
 
				     args = parser.parse_args()
			
 
				-    try:
			
 
				-        cosyvoice = CosyVoice(args.model_dir)
			
 
				-    except Exception:
			
 
				-        try:
			
 
				-            cosyvoice = CosyVoice2(args.model_dir)
			
 
				-        except Exception:
			
 
				-            raise TypeError('no valid model_type!')
			
 
				+    cosyvoice = AutoModel(model_dir=args.model_dir)
			
 
				     uvicorn.run(app, host="0.0.0.0", port=args.port)
			
--- a/runtime/python/grpc/server.py
+++ b/runtime/python/grpc/server.py
@@ -25,7 +25,7 @@ import numpy as np
 
				 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				 sys.path.append('{}/../../..'.format(ROOT_DIR))
			
 
				 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
			
 
				-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
			
 
				+from cosyvoice.cli.cosyvoice import AutoModel
			
 
				 
			
 
				 logging.basicConfig(level=logging.DEBUG,
			
 
				                     format='%(asctime)s %(levelname)s %(message)s')
			
@@ -33,13 +33,7 @@ logging.basicConfig(level=logging.DEBUG,
 
				 
			
 
				 class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
			
 
				     def __init__(self, args):
			
 
				-        try:
			
 
				-            self.cosyvoice = CosyVoice(args.model_dir, trt_concurrent=args.max_conc)
			
 
				-        except Exception:
			
 
				-            try:
			
 
				-                self.cosyvoice = CosyVoice2(args.model_dir, trt_concurrent=args.max_conc)
			
 
				-            except Exception:
			
 
				-                raise TypeError('no valid model_type!')
			
 
				+        self.cosyvoice = AutoModel(model_dir=args.model_dir)
			
 
				         logging.info('grpc service initialized')
			
 
				 
			
 
				     def Inference(self, request, context):
			
@@ -90,7 +84,7 @@ if __name__ == '__main__':
 
				                         default=4)
			
 
				     parser.add_argument('--model_dir',
			
 
				                         type=str,
			
 
				-                        default='iic/CosyVoice-300M',
			
 
				+                        default='iic/CosyVoice2-0.5B',
			
 
				                         help='local path or modelscope repo id')
			
 
				     args = parser.parse_args()
			
 
				     main()
			
--- a/webui.py
+++ b/webui.py
@@ -167,7 +167,7 @@ if __name__ == '__main__':
 
				                         default=8000)
			
 
				     parser.add_argument('--model_dir',
			
 
				                         type=str,
			
 
				-                        default='pretrained_models/CosyVoice3-0.5B',
			
 
				+                        default='pretrained_models/CosyVoice2-0.5B',
			
 
				                         help='local path or modelscope repo id')
			
 
				     args = parser.parse_args()
			
 
				     cosyvoice = AutoModel(model_dir=args.model_dir)