| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- import requests
- import base64
- from PIL import Image
- import json
- import io
- import os
- from dotenv import load_dotenv
- from interfaces.image_output import ImageOutput
- from utils.logger_config import setup_logger
- logger = setup_logger(__name__)
- load_dotenv()
- API_KEY = os.getenv("GEMINI_API_KEY")
- def image_to_base64(image_path):
- """将图片转换为base64字符串"""
- with Image.open(image_path) as img:
- # 转换为RGB模式(如果需要)
- if img.mode != 'RGB':
- img = img.convert('RGB')
-
- # 将图片转换为base64
- buffer = io.BytesIO()
- img.save(buffer, format='PNG')
- img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
- return img_base64
- def generate_image_from_prompt_and_images(
- prompt="An office group photo of these people, they are making funny faces.",
- image_paths=['output_front.png'],
- aspect_ratio="5:4",
- resolution="1K",
- api_key=API_KEY,
- url="https://api.openaius.com/v1beta/models/gemini-3-pro-image-preview:generateContent"
- ):
- """
- 使用Gemini API根据提示词和参考图片生成新图片
-
- Args:
- prompt (str): 提示词
- image_paths (list): 参考图片路径列表
- aspect_ratio (str): 图片宽高比
- resolution (str): 图片分辨率
- api_key (str): API密钥
- url (str): API端点URL
-
- Returns:
- dict: API响应结果
- """
- # 读取并转换所有图片
- images_base64 = []
- for img_path in image_paths:
- try:
- img_b64 = image_to_base64(img_path)
- images_base64.append(img_b64)
- except Exception as e:
- logger.error(f"Error loading image {img_path}: {e}")
- return None
- # 构建请求体
- payload = {
- "contents": [
- {
- "role": "user",
- "parts": [
- {"text": prompt},
- *[{"inline_data": {"mime_type": "image/png", "data": img_b64}} for img_b64 in images_base64]
- ]
- }
- ],
- "generation_config": {
- "response_modalities": ["TEXT", "IMAGE"],
- "image_config": {
- "aspect_ratio": aspect_ratio,
- "image_size": resolution
- }
- }
- }
- # 设置请求头
- headers = {
- "Authorization": f"Bearer {api_key}",
- "Content-Type": "application/json"
- }
- # 发送请求
- try:
- response = requests.post(url, headers=headers, json=payload, timeout=60)
- response.raise_for_status() # 检查HTTP错误
-
- result = response.json()
- # 处理响应
- if "candidates" in result and len(result["candidates"]) > 0:
- candidate = result["candidates"][0]
- if "content" in candidate and "parts" in candidate["content"]:
- for part in candidate["content"]["parts"]:
- if "text" in part:
- logger.info(part["text"])
- elif "inlineData" in part and part["inlineData"]["mimeType"] == "image/jpeg":
- img_data = base64.b64decode(part["inlineData"]["data"])
-
- return ImageOutput(fmt="b64", ext="png", data=img_data)
-
- except requests.exceptions.RequestException as e:
- logger.error(f"Request failed: {e}")
- if hasattr(e, 'response') and e.response is not None:
- logger.error(f"Response status: {e.response.status_code}")
- logger.error(f"Response body: {e.response.text}")
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse JSON response: {e}")
- logger.error(f"Raw response: {response.text}")
- except Exception as e:
- logger.error(f"Unexpected error: {e}")
-
- return None
- # 如果直接运行此脚本,则执行函数
- if __name__ == "__main__":
- prompt = "生成一张美女图片,表情可爱"
- result = generate_image_from_prompt_and_images(prompt, image_paths=[])
- result.save_img("output_banana_pro.png")
|