|
@@ -441,6 +441,8 @@ class Qwen2LM(TransformerLM):
|
|
|
# in stream mode, yield token one by one
|
|
# in stream mode, yield token one by one
|
|
|
yield top_ids
|
|
yield top_ids
|
|
|
out_tokens.append(top_ids)
|
|
out_tokens.append(top_ids)
|
|
|
|
|
+ if len(out_tokens) == max_len:
|
|
|
|
|
+ break
|
|
|
time.sleep(0.001)
|
|
time.sleep(0.001)
|
|
|
with self.lock:
|
|
with self.lock:
|
|
|
self.vllm_output_queue.pop(uuid)
|
|
self.vllm_output_queue.pop(uuid)
|