2 months ago · 2145b585f0
--- a/examples/libritts/cosyvoice2/run.sh
+++ b/examples/libritts/cosyvoice2/run.sh
@@ -24,7 +24,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 
				   done
			
 
				 fi
			
 
				 
			
 
				-# NOTE embedding/token extraction is not necessary now as we support online feature extraction
			
 
				+# NOTE embedding/token extraction is not necessary now as we support online feature extraction, but training speed will be influenced
			
 
				+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
			
 
				+  echo "Extract campplus speaker embedding, you will get spk2embedding.pt and utt2embedding.pt in data/$x dir"
			
 
				+  for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
			
 
				+    tools/extract_embedding.py --dir data/$x \
			
 
				+      --onnx_path $pretrained_model_dir/campplus.onnx
			
 
				+  done
			
 
				+fi
			
 
				+
			
 
				+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			
 
				+  echo "Extract discrete speech token, you will get utt2speech_token.pt in data/$x dir"
			
 
				+  for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
			
 
				+    tools/extract_speech_token.py --dir data/$x \
			
 
				+      --onnx_path $pretrained_model_dir/speech_tokenizer_v3.onnx
			
 
				+  done
			
 
				+fi
			
 
				+
			
 
				 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
			
 
				   echo "Prepare required parquet format data, you should have prepared wav.scp/text/utt2spk/spk2utt/utt2embedding.pt/spk2embedding.pt/utt2speech_token.pt"
			
 
				   for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
			
--- a/examples/libritts/cosyvoice3/run.sh
+++ b/examples/libritts/cosyvoice3/run.sh
@@ -25,7 +25,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 
				   done
			
 
				 fi
			
 
				 
			
 
				-# NOTE embedding/token extraction is not necessary now as we support online feature extraction
			
 
				+# NOTE embedding/token extraction is not necessary now as we support online feature extraction, but training speed will be influenced
			
 
				+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
			
 
				+  echo "Extract campplus speaker embedding, you will get spk2embedding.pt and utt2embedding.pt in data/$x dir"
			
 
				+  for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
			
 
				+    tools/extract_embedding.py --dir data/$x \
			
 
				+      --onnx_path $pretrained_model_dir/campplus.onnx
			
 
				+  done
			
 
				+fi
			
 
				+
			
 
				+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			
 
				+  echo "Extract discrete speech token, you will get utt2speech_token.pt in data/$x dir"
			
 
				+  for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
			
 
				+    tools/extract_speech_token.py --dir data/$x \
			
 
				+      --onnx_path $pretrained_model_dir/speech_tokenizer_v3.onnx
			
 
				+  done
			
 
				+fi
			
 
				+
			
 
				 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
			
 
				   echo "Prepare required parquet format data, you should have prepared wav.scp/text/utt2spk/spk2utt/utt2embedding.pt/spk2embedding.pt/utt2speech_token.pt"
			
 
				   for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do