lyuxiang.lx 1 hónapja
szülő
commit
3b44913782

+ 2 - 0
cosyvoice/cli/cosyvoice.py

@@ -89,6 +89,8 @@ class CosyVoice:
                 start_time = time.time()
 
     def inference_zero_shot(self, tts_text, prompt_text, prompt_wav, zero_shot_spk_id='', stream=False, speed=1.0, text_frontend=True):
+        if self.__class__.__name__ == 'CosyVoice3' and '<|endofprompt|>' not in prompt_text + tts_text:
+            logging.warning('<|endofprompt|> not found in CosyVoice3 inference, check your input text')
         prompt_text = self.frontend.text_normalize(prompt_text, split=False, text_frontend=text_frontend)
         for i in tqdm(self.frontend.text_normalize(tts_text, split=True, text_frontend=text_frontend)):
             if (not isinstance(i, Generator)) and len(i) < 0.5 * len(prompt_text):

+ 1 - 1
cosyvoice/cli/frontend.py

@@ -183,7 +183,7 @@ class CosyVoiceFrontEnd:
                            'prompt_speech_feat': speech_feat, 'prompt_speech_feat_len': speech_feat_len,
                            'llm_embedding': embedding, 'flow_embedding': embedding}
         else:
-            model_input = self.spk2info[zero_shot_spk_id]
+            model_input = {**self.spk2info[zero_shot_spk_id]}
         model_input['text'] = tts_text_token
         model_input['text_len'] = tts_text_token_len
         return model_input

+ 3 - 9
runtime/python/fastapi/server.py

@@ -24,7 +24,7 @@ import numpy as np
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append('{}/../../..'.format(ROOT_DIR))
 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
+from cosyvoice.cli.cosyvoice import AutoModel
 from cosyvoice.utils.file_utils import load_wav
 
 app = FastAPI()
@@ -88,14 +88,8 @@ if __name__ == '__main__':
                         default=50000)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='iic/CosyVoice-300M',
+                        default='iic/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
-    try:
-        cosyvoice = CosyVoice(args.model_dir)
-    except Exception:
-        try:
-            cosyvoice = CosyVoice2(args.model_dir)
-        except Exception:
-            raise TypeError('no valid model_type!')
+    cosyvoice = AutoModel(model_dir=args.model_dir)
     uvicorn.run(app, host="0.0.0.0", port=args.port)

+ 3 - 9
runtime/python/grpc/server.py

@@ -25,7 +25,7 @@ import numpy as np
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append('{}/../../..'.format(ROOT_DIR))
 sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
-from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
+from cosyvoice.cli.cosyvoice import AutoModel
 
 logging.basicConfig(level=logging.DEBUG,
                     format='%(asctime)s %(levelname)s %(message)s')
@@ -33,13 +33,7 @@ logging.basicConfig(level=logging.DEBUG,
 
 class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
     def __init__(self, args):
-        try:
-            self.cosyvoice = CosyVoice(args.model_dir, trt_concurrent=args.max_conc)
-        except Exception:
-            try:
-                self.cosyvoice = CosyVoice2(args.model_dir, trt_concurrent=args.max_conc)
-            except Exception:
-                raise TypeError('no valid model_type!')
+        self.cosyvoice = AutoModel(model_dir=args.model_dir)
         logging.info('grpc service initialized')
 
     def Inference(self, request, context):
@@ -90,7 +84,7 @@ if __name__ == '__main__':
                         default=4)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='iic/CosyVoice-300M',
+                        default='iic/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
     main()

+ 1 - 1
webui.py

@@ -167,7 +167,7 @@ if __name__ == '__main__':
                         default=8000)
     parser.add_argument('--model_dir',
                         type=str,
-                        default='pretrained_models/CosyVoice3-0.5B',
+                        default='pretrained_models/CosyVoice2-0.5B',
                         help='local path or modelscope repo id')
     args = parser.parse_args()
     cosyvoice = AutoModel(model_dir=args.model_dir)