1 bulan lalu · 881177287c
--- a/README.md
+++ b/README.md
@@ -152,14 +152,18 @@ python example.py
 
				 ```
			
 
				 
			
 
				 #### CosyVoice2 vllm Usage
			
 
				-If you want to use vllm for inference, please install `vllm==v0.9.0`. Older vllm version do not support CosyVoice2 inference.
			
 
				+CosyVoice2 now supports **vLLM 0.11.x+ (V1 engine)** and **vLLM 0.9.0 (legacy)**.
			
 
				+Older vllm version(<0.9.0) do not support CosyVoice2 inference, and versions in between (e.g., 0.10.x) are not tested.
			
 
				 
			
 
				 Notice that `vllm==v0.9.0` has a lot of specific requirements, for example `torch==2.7.0`. You can create a new env to in case your hardward do not support vllm and old env is corrupted.
			
 
				 
			
 
				 ``` sh
			
 
				 conda create -n cosyvoice_vllm --clone cosyvoice
			
 
				 conda activate cosyvoice_vllm
			
 
				+# for vllm==0.9.0
			
 
				 pip install vllm==v0.9.0 transformers==4.51.3 numpy==1.26.4 -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
			
 
				+# for vllm>=0.11.0
			
 
				+pip install vllm==v0.11.0 transformers==4.57.1 numpy==1.26.4 -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
			
 
				 python vllm_example.py
			
 
				 ```
			
 
				 
			
--- a/cosyvoice/vllm/cosyvoice2.py
+++ b/cosyvoice/vllm/cosyvoice2.py
@@ -23,6 +23,15 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 """Inference-only Qwen2 model compatible with HuggingFace weights."""
			
 
				+from typing import Optional
			
 
				+from packaging.version import parse as vparse
			
 
				+import vllm
			
 
				+
			
 
				+# vLLM-0.11.0+ only support V1 engine
			
 
				+VLLM_V1_ENGINE_ONLY: bool = vparse(vllm.__version__) >= vparse("0.11.0")
			
 
				+if VLLM_V1_ENGINE_ONLY:
			
 
				+    from vllm.v1.sample.metadata import SamplingMetadata
			
 
				+
			
 
				 from vllm.model_executor.models.qwen2 import *
			
 
				 
			
 
				 
			
@@ -87,10 +96,14 @@ class CosyVoice2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
 
				     def compute_logits(
			
 
				         self,
			
 
				         hidden_states: torch.Tensor,
			
 
				-        sampling_metadata: SamplingMetadata,
			
 
				+        sampling_metadata: Optional[SamplingMetadata] = None,
			
 
				     ) -> Optional[torch.Tensor]:
			
 
				-        logits = self.logits_processor(self.lm_head, hidden_states,
			
 
				-                                       sampling_metadata, self.lm_head.bias)
			
 
				+        if VLLM_V1_ENGINE_ONLY:
			
 
				+            logits = self.logits_processor(self.lm_head, hidden_states, 
			
 
				+                                           self.lm_head.bias)
			
 
				+        else:
			
 
				+            logits = self.logits_processor(self.lm_head, hidden_states,
			
 
				+                                           sampling_metadata, self.lm_head.bias)
			
 
				         return logits
			
 
				 
			
 
				     def load_weights(self, weights: Iterable[tuple[str,