import requests import base64 from PIL import Image import json import io import os from dotenv import load_dotenv from interfaces.image_output import ImageOutput from utils.logger_config import setup_logger logger = setup_logger(__name__) load_dotenv() API_KEY = os.getenv("GEMINI_API_KEY") def image_to_base64(image_path): """将图片转换为base64字符串""" with Image.open(image_path) as img: # 转换为RGB模式(如果需要) if img.mode != 'RGB': img = img.convert('RGB') # 将图片转换为base64 buffer = io.BytesIO() img.save(buffer, format='PNG') img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') return img_base64 def generate_image_from_prompt_and_images( prompt="An office group photo of these people, they are making funny faces.", image_paths=['output_front.png'], aspect_ratio="5:4", resolution="1K", api_key=API_KEY, url="https://api.openaius.com/v1beta/models/gemini-3-pro-image-preview:generateContent" ): """ 使用Gemini API根据提示词和参考图片生成新图片 Args: prompt (str): 提示词 image_paths (list): 参考图片路径列表 aspect_ratio (str): 图片宽高比 resolution (str): 图片分辨率 api_key (str): API密钥 url (str): API端点URL Returns: dict: API响应结果 """ # 读取并转换所有图片 images_base64 = [] for img_path in image_paths: try: img_b64 = image_to_base64(img_path) images_base64.append(img_b64) except Exception as e: logger.error(f"Error loading image {img_path}: {e}") return None # 构建请求体 payload = { "contents": [ { "role": "user", "parts": [ {"text": prompt}, *[{"inline_data": {"mime_type": "image/png", "data": img_b64}} for img_b64 in images_base64] ] } ], "generation_config": { "response_modalities": ["TEXT", "IMAGE"], "image_config": { "aspect_ratio": aspect_ratio, "image_size": resolution } } } # 设置请求头 headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } # 发送请求 try: response = requests.post(url, headers=headers, json=payload, timeout=60) response.raise_for_status() # 检查HTTP错误 result = response.json() # 处理响应 if "candidates" in result and len(result["candidates"]) > 0: candidate = result["candidates"][0] if "content" in candidate and "parts" in candidate["content"]: for part in candidate["content"]["parts"]: if "text" in part: logger.info(part["text"]) elif "inlineData" in part and part["inlineData"]["mimeType"] == "image/jpeg": img_data = base64.b64decode(part["inlineData"]["data"]) return ImageOutput(fmt="b64", ext="png", data=img_data) except requests.exceptions.RequestException as e: logger.error(f"Request failed: {e}") if hasattr(e, 'response') and e.response is not None: logger.error(f"Response status: {e.response.status_code}") logger.error(f"Response body: {e.response.text}") except json.JSONDecodeError as e: logger.error(f"Failed to parse JSON response: {e}") logger.error(f"Raw response: {response.text}") except Exception as e: logger.error(f"Unexpected error: {e}") return None # 如果直接运行此脚本,则执行函数 if __name__ == "__main__": prompt = "生成一张美女图片,表情可爱" result = generate_image_from_prompt_and_images(prompt, image_paths=[]) result.save_img("output_banana_pro.png")