| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377 |
- """
- LLM请求模块
- 提供多模态和文本LLM请求功能,支持图片和文本输入
- """
- import io
- import os
- import sys
- import time
- import base64
- import logging
- import numpy as np
- import requests
- from PIL import Image
- from openai import OpenAI
- from requests.adapters import HTTPAdapter
- from urllib3.util.retry import Retry
- from logger_setup import logger
- from conf import *
- from tos import HttpMethodType
- def image_to_base64(image):
- """
- 将PIL Image对象转换为base64编码字符串
-
- Args:
- image: PIL Image对象
-
- Returns:
- base64编码的字符串
- """
- image_io = io.BytesIO()
- image.save(image_io, format='JPEG', quality=95)
- image_io.seek(0)
- image_base64 = base64.b64encode(image_io.read()).decode('utf-8')
- return image_base64
- def download_image_with_retry(url, max_retries=3, timeout=30):
- """
- 下载图片并重试机制
-
- Args:
- url: 图片URL
- max_retries: 最大重试次数
- timeout: 超时时间(秒)
-
- Returns:
- PIL Image对象,失败返回None
- """
- session = requests.Session()
- retry_strategy = Retry(
- total=max_retries,
- backoff_factor=1,
- status_forcelist=[429, 500, 502, 503, 504],
- )
- adapter = HTTPAdapter(max_retries=retry_strategy)
- session.mount("http://", adapter)
- session.mount("https://", adapter)
-
- try:
- logger.info(f"正在下载图片: {url}")
- response = session.get(url, timeout=timeout)
- response.raise_for_status()
- logger.info("图片下载成功")
- return Image.open(io.BytesIO(response.content))
- except Exception as e:
- logger.error(f"下载图片失败: {e}")
- return None
- def image_reader(image):
- """
- 图片读取器,将各种格式的图片转换为base64编码的data URI
-
- 支持:
- - 本地文件路径(字符串)
- - HTTP/HTTPS URL(字符串)
- - numpy数组
- - PIL Image对象
-
- Args:
- image: 图片输入(路径、URL、numpy数组或PIL Image)
-
- Returns:
- base64编码的data URI字符串
-
- Raises:
- Exception: 如果下载图片失败
- """
- if isinstance(image, str):
- if image.startswith("http"):
- # 下载网络图片
- out_image = download_image_with_retry(image)
- if out_image is None:
- raise Exception(f"无法下载图片: {image}")
- else:
- # 读取本地图片
- out_image = Image.open(image)
- elif isinstance(image, np.ndarray):
- out_image = Image.fromarray(image)
- else:
- out_image = image
-
- out_image = out_image.convert('RGB')
- base64_img = image_to_base64(out_image)
- return f"data:image/jpeg;base64,{base64_img}"
- def get_lm_text(sys_prompt, user_prompt):
- """
- 文本LLM请求(已废弃,使用llm_request类替代)
-
- Args:
- sys_prompt: 系统提示词
- user_prompt: 用户提示词
-
- Returns:
- LLM返回的文本
- """
- completion = LMConfig.lm_client.chat.completions.create(
- messages = [
- {"role": "system", "content": sys_prompt},
- {"role": "user", "content": user_prompt},
- ],
- model=LMConfig.model,
- )
- return completion.choices[0].message.content
- # ==================== 图片处理工具 ====================
- def compress_image(input_path, output_path):
- """
- 压缩图片到目标大小
-
- Args:
- input_path: 输入图片路径
- output_path: 输出图片路径
-
- Returns:
- 最终使用的压缩质量
- """
- img = Image.open(input_path)
- current_size = os.path.getsize(input_path)
- # 粗略的估计压缩质量,也可以从常量开始,逐步减小压缩质量,直到文件大小小于目标大小
- image_quality = int(float(MMMConfig.target_size / current_size) * 100)
- img.save(output_path, optimize=True, quality=int(float(MMMConfig.target_size / current_size) * 100))
- # 如果压缩后文件大小仍然大于目标大小,则继续压缩
- # 压缩质量递减,直到文件大小小于目标大小
- while os.path.getsize(output_path) > MMMConfig.target_size:
- img = Image.open(output_path)
- image_quality -= 10
- if image_quality <= 0:
- break
- img.save(output_path, optimize=True, quality=image_quality)
- return image_quality
- def upload_tos(filename, tos_object_key):
- """
- 上传文件到TOS并获取预签名URL
-
- Args:
- filename: 本地文件路径
- tos_object_key: TOS对象键
-
- Returns:
- 预签名的URL
-
- Raises:
- Exception: 上传失败时抛出异常
- """
- tos_client, inner_tos_client = MMMConfig.tos_client, MMMConfig.inner_tos_client
- try:
- # 将本地文件上传到目标桶中, filename为本地压缩后图片的完整路径
- tos_client.put_object_from_file(MMMConfig.tos_bucket_name, tos_object_key, filename)
- # 获取上传后预签名的 url
- return inner_tos_client.pre_signed_url(HttpMethodType.Http_Method_Get, MMMConfig.tos_bucket_name, tos_object_key)
- except Exception as e:
- if isinstance(e, tos.exceptions.TosClientError):
- # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
- logger.error('TOS客户端错误, message:{}, cause: {}'.format(e.message, e.cause))
- elif isinstance(e, tos.exceptions.TosServerError):
- # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
- logger.error('TOS服务端错误, code: {}'.format(e.code))
- # request id 可定位具体问题,强烈建议日志中保存
- logger.error('error with request id: {}'.format(e.request_id))
- logger.error('error with message: {}'.format(e.message))
- logger.error('error with http code: {}'.format(e.status_code))
- else:
- logger.error('TOS上传失败,未知错误: {}'.format(e))
- raise e
- # def doubao_MMM_request(pre_signed_url_output, prompt):
- # client = MMMConfig.client
-
- # response = client.chat.completions.create(
- # model=MMMConfig.model,
- # messages=[{"role": "user","content": [
- # {"type": "text", "text": prompt},
- # {"type": "image_url", "image_url": {"url": pre_signed_url_output.signed_url}}
- # ],
- # }],
- # temperature=0.8,
- # extra_headers={"x-ark-beta-vision": "true"}
- # )
- # result = response.choices[0].message.content
- # return result
- class llm_request:
- """
- LLM请求类
-
- 提供多模态和文本LLM请求功能
- """
-
- def __init__(self, api_key, base_url, model):
- """
- 初始化LLM请求客户端
-
- Args:
- api_key: API密钥
- base_url: API基础URL
- model: 模型名称
- """
- self.api_key = api_key
- self.base_url = base_url
- self.model = model
- def llm_mm_request(self, usr_text, img, sys_text="You are a helpful assistant."):
- """
- 多模态请求(单张图片)
-
- Args:
- usr_text: 用户文本提示
- img: 图片(路径、URL、numpy数组或PIL Image)
- sys_text: 系统提示词
-
- Returns:
- LLM返回的文本内容
- """
- client = OpenAI(
- api_key=self.api_key,
- base_url=self.base_url
- )
- completion = client.chat.completions.create(
- model=self.model,
- messages=[
- {
- "role": "system",
- "content": [{"type": "text", "text": sys_text}]
- },
- {
- "role": "user",
- "content": [
- {
- "type": "image_url",
- "image_url": {"url": image_reader(img)},
- },
- {"type": "text", "text": usr_text},
- ],
- }
- ],
- temperature=0.5,
- top_p=0.7,
- timeout=120.0
- )
- return completion.choices[0].message.content
- def llm_mm_2_request(self, usr_text, imgs, sys_text="You are a helpful assistant."):
- """
- 多模态请求(多张图片)
-
- Args:
- usr_text: 用户文本提示
- imgs: 图片列表(路径、URL、numpy数组或PIL Image)
- sys_text: 系统提示词
-
- Returns:
- LLM返回的文本内容
- """
- client = OpenAI(
- api_key=self.api_key,
- base_url=self.base_url
- )
- image_content_list = [
- {
- "type": "image_url",
- "image_url": {"url": image_reader(img)},
- }
- for img in imgs
- ]
- text_content = {"type": "text", "text": usr_text}
- user_content = image_content_list + [text_content]
-
- completion = client.chat.completions.create(
- model=self.model,
- messages=[
- {
- "role": "system",
- "content": [{"type": "text", "text": sys_text}]
- },
- {
- "role": "user",
- "content": user_content,
- }
- ],
- temperature=0.5,
- top_p=0.7,
- timeout=120.0
- )
- return completion.choices[0].message.content
- def llm_text_request(self, text, sys_text="You are a helpful assistant."):
- """
- 纯文本LLM请求
-
- Args:
- text: 用户文本提示
- sys_text: 系统提示词
-
- Returns:
- LLM返回的文本内容
- """
- client = OpenAI(
- api_key=self.api_key,
- base_url=self.base_url
- )
- completion = client.chat.completions.create(
- model=self.model,
- messages=[
- {
- "role": "system",
- "content": sys_text
- },
- {
- "role": "user",
- "content": text,
- }
- ],
- temperature=0.9,
- timeout=120.0
- )
- return completion.choices[0].message.content
-
- if __name__=="__main__":
- ##ali
- # ky="sk-TstsKbfIFjdNpjNGo6uBHzZayp5Bq8FjTV0b6BwyXflaOFLs"
- # baseurl="https://api.openaius.com/v1"
- # model="gpt-5"
- #ali
- ky="sk-04b63960983445f980d85ff185a17876"
- baseurl="https://dashscope.aliyuncs.com/compatible-mode/v1"
- model="qwen3-vl-plus"
- ##doubao
- # ky='817dff39-5586-4f9b-acba-55004167c0b1'
- # baseurl="https://ark.cn-beijing.volces.com/api/v3"
- # model="doubao-1-5-vision-pro-32k-250115"
- llm=llm_request(ky,baseurl,model)
- imgs=r"H:\data\线稿图\S1261A097_S1261A097_concatenated.jpg"
- res1=llm.llm_mm_request("判断一下图2是不是图1的平铺图,纽扣数量是否一致",imgs)
- print(res1)
- # res2=llm.llm_text_request("你好!你是谁")
- # print(res2)
|