1 an în urmă · 6cebcb3410
--- a/cosyvoice/dataset/processor.py
+++ b/cosyvoice/dataset/processor.py
@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
 
				             logging.fatal('Unsupported batch type {}'.format(batch_type))
			
 
				 
			
 
				 
			
 
				-def padding(data, mode='train'):
			
 
				+def padding(data, use_spk_embedding, mode='train'):
			
 
				     """ Padding the data into training data
			
 
				 
			
 
				         Args:
			
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
 
				                           'tts_index': tts_index,
			
 
				                           'tts_text_token': tts_text_token,
			
 
				                           'tts_text_token_len': tts_text_token_len})
			
 
				+        if use_spk_embedding is True:
			
 
				+            batch["embedding"] = batch["spk_embedding"]
			
 
				+        else:
			
 
				+            batch["embedding"] = batch["utt_embedding"]
			
 
				         yield batch
			
--- a/cosyvoice/utils/executor.py
+++ b/cosyvoice/utils/executor.py
@@ -52,10 +52,6 @@ class Executor:
 
				                 info_dict["batch_idx"] = batch_idx
			
 
				                 if cosyvoice_join(group_join, info_dict):
			
 
				                     break
			
 
				-                if info_dict["use_spk_embedding"] is True:
			
 
				-                    batch_dict["embedding"] = batch_dict["spk_embedding"]
			
 
				-                else:
			
 
				-                    batch_dict["embedding"] = batch_dict["utt_embedding"]
			
 
				 
			
 
				                 # Disable gradient synchronizations across DDP processes.
			
 
				                 # Within this context, gradients will be accumulated on module
			
--- a/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
 
				     batch_type: 'dynamic'
			
 
				     max_frames_in_batch: 12000
			
 
				 padding: !name:cosyvoice.dataset.processor.padding
			
 
				+    use_spk_embedding: False # change to True during sft
			
 
				 
			
 
				 # dataset processor pipeline
			
 
				 data_pipeline: [
			
@@ -190,7 +191,6 @@ train_conf:
 
				     scheduler: warmuplr
			
 
				     scheduler_conf:
			
 
				         warmup_steps: 25000
			
 
				-    use_spk_embedding: False # change to True during sft
			
 
				     max_epoch: 200
			
 
				     grad_clip: 5
			
 
				     accum_grad: 2
			
--- a/examples/libritts/cosyvoice/conf/cosyvoice.yaml
+++ b/examples/libritts/cosyvoice/conf/cosyvoice.yaml
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
 
				     batch_type: 'dynamic'
			
 
				     max_frames_in_batch: 2000
			
 
				 padding: !name:cosyvoice.dataset.processor.padding
			
 
				+    use_spk_embedding: False # change to True during sft
			
 
				 
			
 
				 # dataset processor pipeline
			
 
				 data_pipeline: [
			
@@ -190,7 +191,6 @@ train_conf:
 
				     scheduler: warmuplr # change to constantlr during sft
			
 
				     scheduler_conf:
			
 
				         warmup_steps: 2500
			
 
				-    use_spk_embedding: False # change to True during sft
			
 
				     max_epoch: 200
			
 
				     grad_clip: 5
			
 
				     accum_grad: 2