|
|
@@ -135,9 +135,9 @@ class CosyVoice:
|
|
|
logging.info('synthesis text {}'.format(i))
|
|
|
for model_output in self.model.tts(**model_input, stream=stream, speed=speed):
|
|
|
speech_len = model_output['tts_speech'].shape[1] / self.sample_rate
|
|
|
- logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len))
|
|
|
- yield model_output
|
|
|
- start_time = time.time()
|
|
|
+ logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len))
|
|
|
+ yield model_output
|
|
|
+ start_time = time.time()
|
|
|
cuda_stream.synchronize()
|
|
|
self.stream_pool.put(cuda_stream)
|
|
|
|