gemini3.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import requests
  2. import base64
  3. from PIL import Image
  4. import json
  5. import io
  6. import os
  7. from dotenv import load_dotenv
  8. from interfaces.image_output import ImageOutput
  9. from utils.logger_config import setup_logger
  10. logger = setup_logger(__name__)
  11. load_dotenv()
  12. API_KEY = os.getenv("GEMINI_API_KEY")
  13. def image_to_base64(image_path):
  14. """将图片转换为base64字符串"""
  15. with Image.open(image_path) as img:
  16. # 转换为RGB模式(如果需要)
  17. if img.mode != 'RGB':
  18. img = img.convert('RGB')
  19. # 将图片转换为base64
  20. buffer = io.BytesIO()
  21. img.save(buffer, format='PNG')
  22. img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
  23. return img_base64
  24. def generate_image_from_prompt_and_images(
  25. prompt="An office group photo of these people, they are making funny faces.",
  26. image_paths=['output_front.png'],
  27. aspect_ratio="5:4",
  28. resolution="1K",
  29. api_key=API_KEY,
  30. url="https://api.openaius.com/v1beta/models/gemini-3-pro-image-preview:generateContent"
  31. ):
  32. """
  33. 使用Gemini API根据提示词和参考图片生成新图片
  34. Args:
  35. prompt (str): 提示词
  36. image_paths (list): 参考图片路径列表
  37. aspect_ratio (str): 图片宽高比
  38. resolution (str): 图片分辨率
  39. api_key (str): API密钥
  40. url (str): API端点URL
  41. Returns:
  42. dict: API响应结果
  43. """
  44. # 读取并转换所有图片
  45. images_base64 = []
  46. for img_path in image_paths:
  47. try:
  48. img_b64 = image_to_base64(img_path)
  49. images_base64.append(img_b64)
  50. except Exception as e:
  51. logger.error(f"Error loading image {img_path}: {e}")
  52. return None
  53. # 构建请求体
  54. payload = {
  55. "contents": [
  56. {
  57. "role": "user",
  58. "parts": [
  59. {"text": prompt},
  60. *[{"inline_data": {"mime_type": "image/png", "data": img_b64}} for img_b64 in images_base64]
  61. ]
  62. }
  63. ],
  64. "generation_config": {
  65. "response_modalities": ["TEXT", "IMAGE"],
  66. "image_config": {
  67. "aspect_ratio": aspect_ratio,
  68. "image_size": resolution
  69. }
  70. }
  71. }
  72. # 设置请求头
  73. headers = {
  74. "Authorization": f"Bearer {api_key}",
  75. "Content-Type": "application/json"
  76. }
  77. # 发送请求
  78. try:
  79. response = requests.post(url, headers=headers, json=payload, timeout=60)
  80. response.raise_for_status() # 检查HTTP错误
  81. result = response.json()
  82. # 处理响应
  83. if "candidates" in result and len(result["candidates"]) > 0:
  84. candidate = result["candidates"][0]
  85. if "content" in candidate and "parts" in candidate["content"]:
  86. for part in candidate["content"]["parts"]:
  87. if "text" in part:
  88. logger.info(part["text"])
  89. elif "inlineData" in part and part["inlineData"]["mimeType"] == "image/jpeg":
  90. img_data = base64.b64decode(part["inlineData"]["data"])
  91. return ImageOutput(fmt="b64", ext="png", data=img_data)
  92. except requests.exceptions.RequestException as e:
  93. logger.error(f"Request failed: {e}")
  94. if hasattr(e, 'response') and e.response is not None:
  95. logger.error(f"Response status: {e.response.status_code}")
  96. logger.error(f"Response body: {e.response.text}")
  97. except json.JSONDecodeError as e:
  98. logger.error(f"Failed to parse JSON response: {e}")
  99. logger.error(f"Raw response: {response.text}")
  100. except Exception as e:
  101. logger.error(f"Unexpected error: {e}")
  102. return None
  103. # 如果直接运行此脚本,则执行函数
  104. if __name__ == "__main__":
  105. prompt = "生成一张美女图片,表情可爱"
  106. result = generate_image_from_prompt_and_images(prompt, image_paths=[])
  107. result.save_img("output_banana_pro.png")