vLLM 推理 Qwen2.5-VL-7B 图像
flyfish
from transformers import AutoProcessor
from vllm import LLM, SamplingParams
from qwen_vl_utils import process_vision_infoMODEL_PATH = "/media/model/Qwen/Qwen25-VL-7B-Instruct/"
image_path = "/home/images/1.jpg"
llm = LLM(model=MODEL_PATH,gpu_memory_utilization=0.8,tensor_parallel_size=1,max_model_len=8192,dtype="bfloat16",enforce_eager=True,limit_mm_per_prompt={"image": 1, "video": 1},
)sampling_params = SamplingParams(temperature=0.1,top_p=0.001,repetition_penalty=1.05,max_tokens=8192,stop_token_ids=[],
)messages = [{"role": "user","content": [{"type": "image", "image": "file:///" + image_path},{"type": "text", "text": "描述这张图像。"},],}
]processor = AutoProcessor.from_pretrained(MODEL_PATH)
prompt = processor.apply_chat_template(messages,tokenize=False,add_generation_prompt=True,
)
image_inputs, video_inputs = process_vision_info(messages)mm_data = {}
if image_inputs is not None:mm_data["image"] = image_inputsllm_inputs = {"prompt": prompt,"multi_modal_data": mm_data,
}outputs = llm.generate([llm_inputs], sampling_params=sampling_params)
generated_text = outputs[0].outputs[0].textprint(generated_text)