""" LLM请求模块 提供多模态和文本LLM请求功能,支持图片和文本输入 """ import io import os import sys import time import base64 import logging import numpy as np import requests from PIL import Image from openai import OpenAI from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from logger_setup import logger from conf import * from tos import HttpMethodType def image_to_base64(image): """ 将PIL Image对象转换为base64编码字符串 Args: image: PIL Image对象 Returns: base64编码的字符串 """ image_io = io.BytesIO() image.save(image_io, format='JPEG', quality=95) image_io.seek(0) image_base64 = base64.b64encode(image_io.read()).decode('utf-8') return image_base64 def download_image_with_retry(url, max_retries=3, timeout=30): """ 下载图片并重试机制 Args: url: 图片URL max_retries: 最大重试次数 timeout: 超时时间(秒) Returns: PIL Image对象,失败返回None """ session = requests.Session() retry_strategy = Retry( total=max_retries, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504], ) adapter = HTTPAdapter(max_retries=retry_strategy) session.mount("http://", adapter) session.mount("https://", adapter) try: logger.info(f"正在下载图片: {url}") response = session.get(url, timeout=timeout) response.raise_for_status() logger.info("图片下载成功") return Image.open(io.BytesIO(response.content)) except Exception as e: logger.error(f"下载图片失败: {e}") return None def image_reader(image): """ 图片读取器,将各种格式的图片转换为base64编码的data URI 支持: - 本地文件路径(字符串) - HTTP/HTTPS URL(字符串) - numpy数组 - PIL Image对象 Args: image: 图片输入(路径、URL、numpy数组或PIL Image) Returns: base64编码的data URI字符串 Raises: Exception: 如果下载图片失败 """ if isinstance(image, str): if image.startswith("http"): # 下载网络图片 out_image = download_image_with_retry(image) if out_image is None: raise Exception(f"无法下载图片: {image}") else: # 读取本地图片 out_image = Image.open(image) elif isinstance(image, np.ndarray): out_image = Image.fromarray(image) else: out_image = image out_image = out_image.convert('RGB') base64_img = image_to_base64(out_image) return f"data:image/jpeg;base64,{base64_img}" def get_lm_text(sys_prompt, user_prompt): """ 文本LLM请求(已废弃,使用llm_request类替代) Args: sys_prompt: 系统提示词 user_prompt: 用户提示词 Returns: LLM返回的文本 """ completion = LMConfig.lm_client.chat.completions.create( messages = [ {"role": "system", "content": sys_prompt}, {"role": "user", "content": user_prompt}, ], model=LMConfig.model, ) return completion.choices[0].message.content # ==================== 图片处理工具 ==================== def compress_image(input_path, output_path): """ 压缩图片到目标大小 Args: input_path: 输入图片路径 output_path: 输出图片路径 Returns: 最终使用的压缩质量 """ img = Image.open(input_path) current_size = os.path.getsize(input_path) # 粗略的估计压缩质量,也可以从常量开始,逐步减小压缩质量,直到文件大小小于目标大小 image_quality = int(float(MMMConfig.target_size / current_size) * 100) img.save(output_path, optimize=True, quality=int(float(MMMConfig.target_size / current_size) * 100)) # 如果压缩后文件大小仍然大于目标大小,则继续压缩 # 压缩质量递减,直到文件大小小于目标大小 while os.path.getsize(output_path) > MMMConfig.target_size: img = Image.open(output_path) image_quality -= 10 if image_quality <= 0: break img.save(output_path, optimize=True, quality=image_quality) return image_quality def upload_tos(filename, tos_object_key): """ 上传文件到TOS并获取预签名URL Args: filename: 本地文件路径 tos_object_key: TOS对象键 Returns: 预签名的URL Raises: Exception: 上传失败时抛出异常 """ tos_client, inner_tos_client = MMMConfig.tos_client, MMMConfig.inner_tos_client try: # 将本地文件上传到目标桶中, filename为本地压缩后图片的完整路径 tos_client.put_object_from_file(MMMConfig.tos_bucket_name, tos_object_key, filename) # 获取上传后预签名的 url return inner_tos_client.pre_signed_url(HttpMethodType.Http_Method_Get, MMMConfig.tos_bucket_name, tos_object_key) except Exception as e: if isinstance(e, tos.exceptions.TosClientError): # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常 logger.error('TOS客户端错误, message:{}, cause: {}'.format(e.message, e.cause)) elif isinstance(e, tos.exceptions.TosServerError): # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息 logger.error('TOS服务端错误, code: {}'.format(e.code)) # request id 可定位具体问题,强烈建议日志中保存 logger.error('error with request id: {}'.format(e.request_id)) logger.error('error with message: {}'.format(e.message)) logger.error('error with http code: {}'.format(e.status_code)) else: logger.error('TOS上传失败,未知错误: {}'.format(e)) raise e # def doubao_MMM_request(pre_signed_url_output, prompt): # client = MMMConfig.client # response = client.chat.completions.create( # model=MMMConfig.model, # messages=[{"role": "user","content": [ # {"type": "text", "text": prompt}, # {"type": "image_url", "image_url": {"url": pre_signed_url_output.signed_url}} # ], # }], # temperature=0.8, # extra_headers={"x-ark-beta-vision": "true"} # ) # result = response.choices[0].message.content # return result class llm_request: """ LLM请求类 提供多模态和文本LLM请求功能 """ def __init__(self, api_key, base_url, model): """ 初始化LLM请求客户端 Args: api_key: API密钥 base_url: API基础URL model: 模型名称 """ self.api_key = api_key self.base_url = base_url self.model = model def llm_mm_request(self, usr_text, img, sys_text="You are a helpful assistant."): """ 多模态请求(单张图片) Args: usr_text: 用户文本提示 img: 图片(路径、URL、numpy数组或PIL Image) sys_text: 系统提示词 Returns: LLM返回的文本内容 """ client = OpenAI( api_key=self.api_key, base_url=self.base_url ) completion = client.chat.completions.create( model=self.model, messages=[ { "role": "system", "content": [{"type": "text", "text": sys_text}] }, { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": image_reader(img)}, }, {"type": "text", "text": usr_text}, ], } ], temperature=0.5, top_p=0.7, timeout=120.0 ) return completion.choices[0].message.content def llm_mm_2_request(self, usr_text, imgs, sys_text="You are a helpful assistant."): """ 多模态请求(多张图片) Args: usr_text: 用户文本提示 imgs: 图片列表(路径、URL、numpy数组或PIL Image) sys_text: 系统提示词 Returns: LLM返回的文本内容 """ client = OpenAI( api_key=self.api_key, base_url=self.base_url ) image_content_list = [ { "type": "image_url", "image_url": {"url": image_reader(img)}, } for img in imgs ] text_content = {"type": "text", "text": usr_text} user_content = image_content_list + [text_content] completion = client.chat.completions.create( model=self.model, messages=[ { "role": "system", "content": [{"type": "text", "text": sys_text}] }, { "role": "user", "content": user_content, } ], temperature=0.5, top_p=0.7, timeout=120.0 ) return completion.choices[0].message.content def llm_text_request(self, text, sys_text="You are a helpful assistant."): """ 纯文本LLM请求 Args: text: 用户文本提示 sys_text: 系统提示词 Returns: LLM返回的文本内容 """ client = OpenAI( api_key=self.api_key, base_url=self.base_url ) completion = client.chat.completions.create( model=self.model, messages=[ { "role": "system", "content": sys_text }, { "role": "user", "content": text, } ], temperature=0.9, timeout=120.0 ) return completion.choices[0].message.content if __name__=="__main__": ##ali # ky="sk-TstsKbfIFjdNpjNGo6uBHzZayp5Bq8FjTV0b6BwyXflaOFLs" # baseurl="https://api.openaius.com/v1" # model="gpt-5" #ali ky="sk-04b63960983445f980d85ff185a17876" baseurl="https://dashscope.aliyuncs.com/compatible-mode/v1" model="qwen3-vl-plus" ##doubao # ky='817dff39-5586-4f9b-acba-55004167c0b1' # baseurl="https://ark.cn-beijing.volces.com/api/v3" # model="doubao-1-5-vision-pro-32k-250115" llm=llm_request(ky,baseurl,model) imgs=r"H:\data\线稿图\S1261A097_S1261A097_concatenated.jpg" res1=llm.llm_mm_request("判断一下图2是不是图1的平铺图,纽扣数量是否一致",imgs) print(res1) # res2=llm.llm_text_request("你好!你是谁") # print(res2)