فهرست منبع

add contributor info

lyuxiang.lx 3 ماه پیش
والد
کامیت
05bdf4c769
4فایلهای تغییر یافته به همراه13 افزوده شده و 4 حذف شده
  1. 4 0
      README.md
  2. 2 0
      runtime/triton_trtllm/Dockerfile.server
  3. 2 0
      runtime/triton_trtllm/README.md
  4. 5 4
      runtime/triton_trtllm/run.sh

+ 4 - 0
README.md

@@ -29,6 +29,10 @@
 
 ## Roadmap
 
+- [x] 2025/08
+
+    - [x] Thanks to the contribution from NVIDIA Yuekai Zhang, add triton trtllm runtime support
+
 - [x] 2025/07
 
     - [x] release cosyvoice 3.0 eval set

+ 2 - 0
runtime/triton_trtllm/Dockerfile.server

@@ -1,4 +1,6 @@
 FROM nvcr.io/nvidia/tritonserver:25.06-trtllm-python-py3
+LABEL maintainer="zhangyuekai@foxmail.com"
+
 RUN apt-get update && apt-get install -y cmake
 RUN git clone https://github.com/pytorch/audio.git && cd audio && git checkout c670ad8 && PATH=/usr/local/cuda/bin:$PATH python3 setup.py develop
 COPY ./requirements.txt /workspace/requirements.txt

+ 2 - 0
runtime/triton_trtllm/README.md

@@ -1,5 +1,7 @@
 ## Best Practices for Serving CosyVoice with NVIDIA Triton Inference Server
 
+Thanks to the contribution from NVIDIA Yuekai Zhang.
+
 ### Quick Start
 Launch the service directly with Docker Compose:
 ```sh

+ 5 - 4
runtime/triton_trtllm/run.sh

@@ -1,4 +1,5 @@
-
+#!/bin/bash
+# Copyright (c) 2025 NVIDIA (authors: Yuekai Zhang)
 export CUDA_VISIBLE_DEVICES=0
 cosyvoice_path=/workspace/CosyVoice
 export PYTHONPATH=${cosyvoice_path}:$PYTHONPATH
@@ -24,8 +25,8 @@ fi
 
 if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
     echo "Downloading CosyVoice2-0.5B"
-    huggingface-cli download --local-dir $huggingface_model_local_dir yuekai/cosyvoice2_llm 
-    modelscope download --model iic/CosyVoice2-0.5B --local_dir $model_scope_model_local_dir 
+    huggingface-cli download --local-dir $huggingface_model_local_dir yuekai/cosyvoice2_llm
+    modelscope download --model iic/CosyVoice2-0.5B --local_dir $model_scope_model_local_dir
 fi
 
 
@@ -67,7 +68,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
     BLS_INSTANCE_NUM=4
     TRITON_MAX_BATCH_SIZE=16
     DECOUPLED_MODE=False
-   
+
     python3 scripts/fill_template.py -i ${model_repo}/token2wav/config.pbtxt model_dir:${MODEL_DIR},triton_max_batch_size:${TRITON_MAX_BATCH_SIZE},max_queue_delay_microseconds:${MAX_QUEUE_DELAY_MICROSECONDS}
     python3 scripts/fill_template.py -i ${model_repo}/audio_tokenizer/config.pbtxt model_dir:${MODEL_DIR},triton_max_batch_size:${TRITON_MAX_BATCH_SIZE},max_queue_delay_microseconds:${MAX_QUEUE_DELAY_MICROSECONDS}
     python3 scripts/fill_template.py -i ${model_repo}/${cosyvoice2_dir}/config.pbtxt model_dir:${MODEL_DIR},bls_instance_num:${BLS_INSTANCE_NUM},llm_tokenizer_dir:${LLM_TOKENIZER_DIR},triton_max_batch_size:${TRITON_MAX_BATCH_SIZE},decoupled_mode:${DECOUPLED_MODE},max_queue_delay_microseconds:${MAX_QUEUE_DELAY_MICROSECONDS}