1 tháng trước cách đây · 988d395162
--- a/runtime/triton_trtllm/run_stepaudio2_dit_token2wav.sh
+++ b/runtime/triton_trtllm/run_stepaudio2_dit_token2wav.sh
@@ -9,6 +9,8 @@ export PYTHONPATH=${cosyvoice_path}:$PYTHONPATH
 
				 export PYTHONPATH=${cosyvoice_path}/third_party/Matcha-TTS:$PYTHONPATH
			
 
				 stage=$1
			
 
				 stop_stage=$2
			
 
				+N_GPUS=2 # set the number of GPUs to use
			
 
				+
			
 
				 
			
 
				 huggingface_model_local_dir=./cosyvoice2_llm
			
 
				 model_scope_model_local_dir=./CosyVoice2-0.5B
			
@@ -128,8 +130,32 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
 
				 fi
			
 
				 
			
 
				 if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
			
 
				-   echo "Starting Triton server"
			
 
				-   tritonserver --model-repository $model_repo --http-port 18000
			
 
				+   echo "Starting Triton server on $N_GPUS GPUs"
			
 
				+   for i in $(seq 0 $(($N_GPUS - 1))); do
			
 
				+       echo "Starting server on GPU $i"
			
 
				+       http_port=$((19000 + $i))
			
 
				+       grpc_port=$((18000 + $i))
			
 
				+       metrics_port=$((17000 + $i))
			
 
				+       CUDA_VISIBLE_DEVICES=$i tritonserver --model-repository $model_repo --http-port $http_port --grpc-port $grpc_port --metrics-port $metrics_port &
			
 
				+   done
			
 
				+
			
 
				+   echo "Servers are running in the background. Press Ctrl+C to stop them and the script."
			
 
				+   wait
			
 
				+fi
			
 
				+
			
 
				+if [ $stage -le 30 ] && [ $stop_stage -ge 30 ]; then
			
 
				+   echo "Starting Triton server on $N_GPUS GPUs"
			
 
				+   N_GPUS=1
			
 
				+   for i in $(seq 0 $(($N_GPUS - 1))); do
			
 
				+       echo "Starting server on GPU $i"
			
 
				+       http_port=$((19000 + $i))
			
 
				+       grpc_port=$((18000 + $i))
			
 
				+       metrics_port=$((17000 + $i))
			
 
				+       CUDA_VISIBLE_DEVICES=0 tritonserver --model-repository $model_repo --http-port $http_port --grpc-port $grpc_port --metrics-port $metrics_port &
			
 
				+   done
			
 
				+
			
 
				+   echo "Servers are running in the background. Press Ctrl+C to stop them and the script."
			
 
				+   wait
			
 
				 fi
			
 
				 
			
 
				 if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
			
@@ -142,21 +168,47 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
 
				 fi
			
 
				 
			
 
				 if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
			
 
				-    echo "Running benchmark client grpc"
			
 
				-    num_task=4
			
 
				+    echo "Running benchmark client grpc on $N_GPUS GPUs"
			
 
				+    num_task=1
			
 
				 
			
 
				     mode=streaming
			
 
				     BLS_INSTANCE_NUM=4
			
 
				 
			
 
				-    python3 client_grpc.py \
			
 
				-        --server-addr localhost \
			
 
				-        --model-name cosyvoice2_dit \
			
 
				-        --num-tasks $num_task \
			
 
				-        --mode $mode \
			
 
				-        --huggingface-dataset yuekai/seed_tts_cosy2 \
			
 
				-        --log-dir ./log_debug_concurrent_tasks_${num_task}_${mode}_bls_${BLS_INSTANCE_NUM}
			
 
				+    for i in $(seq 0 $(($N_GPUS - 1))); do
			
 
				+        grpc_port=$((18000 + $i))
			
 
				+        echo "Running client for server on localhost:$grpc_port"
			
 
				+        python3 client_grpc.py \
			
 
				+            --server-addr localhost \
			
 
				+            --server-port $grpc_port \
			
 
				+            --model-name cosyvoice2_dit \
			
 
				+            --num-tasks $num_task \
			
 
				+            --mode $mode \
			
 
				+            --huggingface-dataset yuekai/seed_tts_cosy2 \
			
 
				+            --log-dir ./log_debug_concurrent_tasks_${num_task}_${mode}_bls_${BLS_INSTANCE_NUM}_gpu${i} &
			
 
				+    done
			
 
				+    wait
			
 
				 fi
			
 
				+if [ $stage -le 50 ] && [ $stop_stage -ge 50 ]; then
			
 
				+    echo "Running benchmark client grpc on $N_GPUS GPUs"
			
 
				+    num_task=4
			
 
				+    N_GPUS=1
			
 
				+    mode=streaming
			
 
				+    BLS_INSTANCE_NUM=4
			
 
				 
			
 
				+    for i in $(seq 0 $(($N_GPUS - 1))); do
			
 
				+        grpc_port=$((18000 + $i))
			
 
				+        echo "Running client for server on localhost:$grpc_port"
			
 
				+        python3 client_grpc.py \
			
 
				+            --server-addr localhost \
			
 
				+            --server-port $grpc_port \
			
 
				+            --model-name cosyvoice2_dit \
			
 
				+            --num-tasks $num_task \
			
 
				+            --mode $mode \
			
 
				+            --huggingface-dataset yuekai/seed_tts_cosy2 \
			
 
				+            --log-dir ./log_single_card_concurrent_tasks_${num_task}_${mode}_bls_${BLS_INSTANCE_NUM} &
			
 
				+    done
			
 
				+    wait
			
 
				+fi
			
 
				 if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
			
 
				   echo "stage 6: Offline inference benchmark"
			
 
				   n_gpus=1