|
|
async def inference_processor(self, task_queue):
|
|
async def inference_processor(self, task_queue):
|
|
|
out_queue, prompt_token_ids, request_id, stop_token_ids, max_tokens = task_queue.get()
|
|
out_queue, prompt_token_ids, request_id, stop_token_ids, max_tokens = task_queue.get()
|