Răsfoiți Sursa

move use_spk_embedding to processor

lyuxiang.lx 1 an în urmă
părinte
comite
6cebcb3410

+ 5 - 1
cosyvoice/dataset/processor.py

@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
             logging.fatal('Unsupported batch type {}'.format(batch_type))
 
 
-def padding(data, mode='train'):
+def padding(data, use_spk_embedding, mode='train'):
     """ Padding the data into training data
 
         Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
                           'tts_index': tts_index,
                           'tts_text_token': tts_text_token,
                           'tts_text_token_len': tts_text_token_len})
+        if use_spk_embedding is True:
+            batch["embedding"] = batch["spk_embedding"]
+        else:
+            batch["embedding"] = batch["utt_embedding"]
         yield batch

+ 0 - 4
cosyvoice/utils/executor.py

@@ -52,10 +52,6 @@ class Executor:
                 info_dict["batch_idx"] = batch_idx
                 if cosyvoice_join(group_join, info_dict):
                     break
-                if info_dict["use_spk_embedding"] is True:
-                    batch_dict["embedding"] = batch_dict["spk_embedding"]
-                else:
-                    batch_dict["embedding"] = batch_dict["utt_embedding"]
 
                 # Disable gradient synchronizations across DDP processes.
                 # Within this context, gradients will be accumulated on module

+ 1 - 1
examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 12000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr
     scheduler_conf:
         warmup_steps: 25000
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2

+ 1 - 1
examples/libritts/cosyvoice/conf/cosyvoice.yaml

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 2000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr # change to constantlr during sft
     scheduler_conf:
         warmup_steps: 2500
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2