فهرست منبع

update dataset

lyuxiang.lx 1 ماه پیش
والد
کامیت
4c19646b9a
3فایلهای تغییر یافته به همراه5 افزوده شده و 3 حذف شده
  1. 5 1
      cosyvoice/dataset/dataset.py
  2. 0 1
      examples/libritts/cosyvoice2/run.sh
  3. 0 1
      examples/libritts/cosyvoice3/run.sh

+ 5 - 1
cosyvoice/dataset/dataset.py

@@ -145,7 +145,11 @@ def Dataset(data_list_file,
                        shuffle=shuffle,
                        partition=partition)
     # map partial arg to padding func
-    data_pipeline[-1] = partial(data_pipeline[-1], gan=gan, dpo=dpo)
+    for i in range(1, len(data_pipeline)):
+        if data_pipeline[i].func.__name__ == 'compute_fbank':
+            data_pipeline[i] = partial(data_pipeline[i], token_mel_ratio=0)
+        if data_pipeline[i].func.__name__ == 'padding':
+            data_pipeline[i] = partial(data_pipeline[i], gan=gan, dpo=dpo)
     for func in data_pipeline:
         dataset = Processor(dataset, func, mode=mode)
     return dataset

+ 0 - 1
examples/libritts/cosyvoice2/run.sh

@@ -66,7 +66,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
   fi
   cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
   cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
-  # NOTE will update llm/hift training later
   for model in llm flow hifigan; do
     torchrun --nnodes=1 --nproc_per_node=$num_gpus \
         --rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \

+ 0 - 1
examples/libritts/cosyvoice3/run.sh

@@ -68,7 +68,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
   fi
   cat data/{train-clean-100,train-clean-360,train-other-500}/parquet/data.list > data/train.data.list
   cat data/{dev-clean,dev-other}/parquet/data.list > data/dev.data.list
-  # NOTE will update llm/hift training later
   for model in llm flow hifigan; do
     torchrun --nnodes=1 --nproc_per_node=$num_gpus \
         --rdzv_id=$job_id --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \