script2video_pipeline.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. import os
  2. import json
  3. import time
  4. import asyncio
  5. from typing import Optional
  6. from utils.tools import (
  7. string_to_json,
  8. save_json_file,
  9. setup_logger,
  10. efficient_sort
  11. )
  12. from tools.banana_pro import generate_image_from_prompt_and_images
  13. from tools.text_generator import media_captioner
  14. from tools.image_generator import image_generator
  15. from tools.video_generator import video_generator
  16. from tools.video_composer import video_composer, concat_videos
  17. from mcps.story_create import story_creator
  18. from mcps.character_extract import character_extractor
  19. from mcps.character_portraits_generate import character_portraits_generator
  20. from mcps.storyboard_create import storyboard_creator
  21. from mcps.camera_tree import camera_tree_creator
  22. from mcps.reference_image_select import reference_image_selector
  23. logger = setup_logger(__name__)
  24. class Script2VideoPipeline:
  25. def __init__(
  26. self
  27. ):
  28. pass
  29. def video_create_pipeline(
  30. self,
  31. idea: str,
  32. user_requirement: Optional[str] = None,
  33. style: Optional[str] = None,
  34. ):
  35. # 1. 创建故事
  36. logger.info("Creating story...")
  37. if os.path.exists("./output/story.txt"):
  38. with open("./output/story.txt", "r", encoding='utf-8') as f:
  39. story = f.read()
  40. else:
  41. story = story_creator.develop_story(
  42. idea=idea,
  43. user_requirement=user_requirement
  44. )
  45. with open("./output/story.txt", "w", encoding='utf-8') as f:
  46. f.write(story)
  47. # 2. 创建剧本: 分场景创建
  48. logger.info("Writing script...")
  49. if os.path.exists("./output/script.json"):
  50. with open("./output/script.json", "r", encoding='utf-8') as f:
  51. script = json.load(f)
  52. else:
  53. script = story_creator.write_script_on_story(
  54. story=story,
  55. user_requirement=user_requirement
  56. )
  57. with open("./output/script.json", "w", encoding='utf-8') as f:
  58. json.dump(script, f, ensure_ascii=False, indent=4)
  59. # 3. 抽取角色
  60. logger.info("Extracting characters...")
  61. if os.path.exists("./output/characters.json"):
  62. with open("./output/characters.json", "r", encoding='utf-8') as f:
  63. characters = json.load(f)
  64. else:
  65. characters = character_extractor.extract_characters(
  66. script=script
  67. )
  68. with open("./output/characters.json", "w", encoding='utf-8') as f:
  69. json.dump(characters, f, ensure_ascii=False, indent=4)
  70. # 4. 设计角色稿
  71. logger.info("Designing character portraits...")
  72. if os.path.exists("./output/character_portraits.json"):
  73. with open("./output/character_portraits.json", "r", encoding='utf-8') as f:
  74. character_portraits = json.load(f)
  75. else:
  76. character_portraits = self._character_portraits_generator(
  77. characters=characters,
  78. style=style
  79. )
  80. with open("./output/character_portraits.json", "w", encoding='utf-8') as f:
  81. json.dump(character_portraits, f, ensure_ascii=False, indent=4)
  82. # 5. 为每个场景剧本创建分镜脚本
  83. logger.info("Creating storyboard...")
  84. if os.path.exists("./output/storyboards.json"):
  85. with open("./output/storyboards.json", "r", encoding='utf-8') as f:
  86. storyboards = json.load(f)
  87. else:
  88. storyboards = self._create_storyboard(
  89. script=script,
  90. characters=str(characters),
  91. user_requirement=user_requirement
  92. )
  93. with open("./output/storyboards.json", "w", encoding='utf-8') as f:
  94. json.dump(storyboards, f, ensure_ascii=False, indent=4)
  95. # 6. 构建相机树
  96. logger.info("Building camera tree...")
  97. if os.path.exists("./output/storyboards_with_camera_tree.json"):
  98. with open("./output/storyboards_with_camera_tree.json", "r", encoding='utf-8') as f:
  99. storyboards_with_camera_tree = json.load(f)
  100. else:
  101. storyboards_with_camera_tree = self._create_camera_tree(
  102. storyboards=storyboards
  103. )
  104. with open("./output/storyboards_with_camera_tree.json", "w", encoding='utf-8') as f:
  105. json.dump(storyboards_with_camera_tree, f, ensure_ascii=False, indent=4)
  106. # 7. 视频帧生成
  107. logger.info("Generating video frames...")
  108. if os.path.exists("./output/storyboards_with_frames.json"):
  109. with open("./output/storyboards_with_frames.json", "r", encoding='utf-8') as f:
  110. storyboards_with_frames = json.load(f)
  111. else:
  112. storyboards_with_frames = self._generate_video_frames_for_scene(
  113. storyboards_with_camera_tree=storyboards_with_camera_tree,
  114. character_portraits=character_portraits
  115. )
  116. with open("./output/storyboards_with_frames.json", "w", encoding='utf-8') as f:
  117. json.dump(storyboards_with_frames, f, ensure_ascii=False, indent=4)
  118. # 8. 视频片段生成
  119. logger.info("Generating video segments...")
  120. if os.path.exists("./output/storyboards_with_segments.json"):
  121. with open("./output/storyboards_with_segments.json", "r", encoding='utf-8') as f:
  122. storyboards_with_segments = json.load(f)
  123. else:
  124. storyboards_with_segments = video_generator.generate(
  125. video_script_data=storyboards_with_frames
  126. )
  127. with open("./output/storyboards_with_segments.json", "w", encoding='utf-8') as f:
  128. json.dump(storyboards_with_segments[0], f, ensure_ascii=False, indent=4)
  129. # 9. 拼接视频
  130. logger.info("Splicing video...")
  131. if os.path.exists("./output/final_video.mp4"):
  132. logger.info("Video spliced.")
  133. else:
  134. concat_videos("./output/storyboards_with_segments.json", "./output/final_video.mp4")
  135. logger.info("Video spliced.")
  136. def _create_storyboard(
  137. self,
  138. script: dict,
  139. characters: str,
  140. user_requirement: Optional[str] = None,
  141. ):
  142. scene_storyboard = []
  143. for idx, scene_script in enumerate(script["script"]):
  144. logger.info(f"Creating storyboard for scene {idx}...")
  145. if os.path.exists(f"./output/storyboard_{idx}.json"):
  146. with open(f"./output/storyboard_{idx}.json", "r", encoding='utf-8') as f:
  147. storyboard = json.load(f)
  148. else:
  149. storyboard = storyboard_creator.create_storyboard(
  150. script=scene_script,
  151. characters=characters,
  152. user_requirement=user_requirement
  153. )
  154. with open(f"./output/storyboard_{idx}.json", "w", encoding='utf-8') as f:
  155. json.dump(storyboard, f, ensure_ascii=False, indent=4)
  156. scene_storyboard.append(storyboard)
  157. logger.info(f"Storyboard for scene {idx} created.")
  158. storyboards = {
  159. "storyboards": scene_storyboard
  160. }
  161. return storyboards
  162. def _create_camera_tree(
  163. self,
  164. storyboards: dict
  165. ):
  166. for idx, storyboard in enumerate(storyboards["storyboards"]):
  167. logger.info(f"Creating camera tree for scene {idx}...")
  168. if os.path.exists(f"./output/storyboard_{idx}_with_camera_tree.json"):
  169. with open(f"./output/storyboard_{idx}_with_camera_tree.json", "r", encoding='utf-8') as f:
  170. camera_tree = json.load(f)
  171. else:
  172. camera_tree = camera_tree_creator.create_camera_tree(
  173. shot_descriptions=storyboard["storyboard"]
  174. )
  175. with open(f"./output/storyboard_{idx}_with_camera_tree.json", "w", encoding='utf-8') as f:
  176. json.dump(camera_tree, f, ensure_ascii=False, indent=4)
  177. storyboard |= camera_tree
  178. logger.info(f"Camera tree for scene {idx} created.")
  179. return storyboards
  180. def _character_portraits_generator(
  181. self,
  182. characters: dict,
  183. style: str
  184. ):
  185. for idx, character in enumerate(characters["characters"]):
  186. logger.info(f"Designing portrait for character {idx}...")
  187. if os.path.exists(f"./output/portraits_{idx}.json"):
  188. logger.info(f"Portrait for character {idx} already exists.")
  189. with open(f"./output/portraits_{idx}.json", "r", encoding='utf-8') as f:
  190. portrait_info = json.load(f)
  191. else:
  192. front_image_path = f"./output/front_portrait_{idx}.png"
  193. side_image_path = f"./output/side_portrait_{idx}.png"
  194. back_image_path = f"./output/back_portrait_{idx}.png"
  195. front_portrait = character_portraits_generator.generate_front_portrait(
  196. character=character,
  197. style=style
  198. )
  199. front_portrait.save(front_image_path)
  200. side_portrait = character_portraits_generator.generate_side_portrait(
  201. character=character,
  202. front_image_path=[front_image_path]
  203. )
  204. side_portrait.save(side_image_path)
  205. back_portrait = character_portraits_generator.generate_back_portrait(
  206. character=character,
  207. front_image_path=[front_image_path]
  208. )
  209. back_portrait.save(back_image_path)
  210. portrait_info = {
  211. "front_portrait": front_image_path,
  212. "side_portrait": side_image_path,
  213. "back_portrait": back_image_path
  214. }
  215. with open(f"./output/portraits_{idx}.json", "w", encoding='utf-8') as f:
  216. json.dump(portrait_info, f, ensure_ascii=False, indent=4)
  217. character |= portrait_info
  218. logger.info(f"Portrait for character {idx} designed.")
  219. return characters
  220. def _generate_video_frames_for_scene(
  221. self,
  222. storyboards_with_camera_tree: dict,
  223. character_portraits: dict
  224. ):
  225. shot_num = 0
  226. for scene_idx, storyboard_with_camera_tree in enumerate(storyboards_with_camera_tree["storyboards"]):
  227. logger.info(f"Generating video frames for scene {scene_idx}...")
  228. storyboard = storyboard_with_camera_tree["storyboard"]
  229. camera_tree = storyboard_with_camera_tree["camera_tree"]
  230. parent_shot_idxs = [0]
  231. active_shot_idxs = []
  232. for _, item in enumerate(camera_tree):
  233. if item["parent_shot_idx"] is not None:
  234. parent_shot_idxs.append(item["parent_shot_idx"])
  235. active_shot_idxs.append(item["active_shot_idxs"])
  236. process_order = efficient_sort(parent_shot_idxs, active_shot_idxs)
  237. for cam_idx in process_order:
  238. logger.info(f"Processing scene {scene_idx} - camera {cam_idx}...")
  239. camera_item = camera_tree[cam_idx]
  240. prev_frame_path_and_text_pairs = []
  241. for _, shot_idx in enumerate(camera_item["active_shot_idxs"]):
  242. logger.info(f"Processing scene {scene_idx} - camera {cam_idx} - shot {shot_idx}...")
  243. frame_description = storyboard[shot_idx]["ff_desc"]
  244. vis_char_idxs = storyboard[shot_idx]["ff_vis_char_idxs"]
  245. shot_num += 1
  246. image_path_and_text_pairs = []
  247. frame_save_path = f"./output/frame_scene{scene_idx}_camera{cam_idx}_shot{shot_idx}.png"
  248. if os.path.exists(frame_save_path):
  249. logger.info(f"Frame for scene {scene_idx} - camera {cam_idx} - shot {shot_idx} already exists.")
  250. continue
  251. else:
  252. # 参考可见角色三视图
  253. for vis_char_idx in vis_char_idxs:
  254. logger.info(f"Referencing character {vis_char_idx} portrait...")
  255. image_path_and_text_pairs.append((character_portraits["characters"][vis_char_idx]["front_portrait"], f"{character_portraits['characters'][vis_char_idx]['identifier_in_scene']}的正面肖像"))
  256. image_path_and_text_pairs.append((character_portraits["characters"][vis_char_idx]["side_portrait"], f"{character_portraits['characters'][vis_char_idx]['identifier_in_scene']}的侧面肖像"))
  257. image_path_and_text_pairs.append((character_portraits["characters"][vis_char_idx]["back_portrait"], f"{character_portraits['characters'][vis_char_idx]['identifier_in_scene']}的背面肖像"))
  258. # 参考前序帧
  259. image_path_and_text_pairs.extend(prev_frame_path_and_text_pairs)
  260. # 参考父帧
  261. if camera_item["parent_shot_idx"] is not None:
  262. image_path_and_text_pairs.append((storyboard[camera_item["parent_shot_idx"]]["ff_path"], storyboard[camera_item["parent_shot_idx"]]["ff_desc"]))
  263. # 筛选参考图像,生成生图提示词
  264. info_for_gen_frame = reference_image_selector.select_reference_images_and_generate_prompt(
  265. image_path_and_text_pairs=image_path_and_text_pairs,
  266. frame_description=frame_description
  267. )
  268. logger.info(f"目标帧描述:\n{frame_description}")
  269. logger.info(f"可参考帧:\n{image_path_and_text_pairs}")
  270. logger.info(f"实际参考:\n{info_for_gen_frame}")
  271. # 生成序列帧
  272. frame_prompt = info_for_gen_frame["text_prompt"]
  273. image_urls = [item[0] for item in info_for_gen_frame["reference_image_path_and_text_pairs"]]
  274. logger.info(f"Frame prompt: {frame_prompt}")
  275. logger.info(f"Reference images: {image_urls}")
  276. # 开始生成帧
  277. # if len(image_urls) == 0:
  278. # frame = asyncio.run(image_generator.generate_without_refer(frame_prompt))
  279. # frame.save_url(frame_save_path)
  280. # else:
  281. # frame = asyncio.run(image_generator.generate(frame_prompt, image_urls))
  282. # frame.save_url(frame_save_path)
  283. frame = generate_image_from_prompt_and_images(frame_prompt, image_paths=image_urls)
  284. frame.save(frame_save_path)
  285. # 保存前序帧
  286. prev_frame_path_and_text_pairs.append((frame_save_path, frame_description))
  287. # storyboard[shot_idx]["ff_url"] = frame.data
  288. storyboard[shot_idx]["ff_path"] = frame_save_path
  289. # shot_num += 1
  290. logger.info(f"Generated {shot_num} video frames.")
  291. with open(f"./output/final_storyboards.json", "w", encoding='utf-8') as f:
  292. json.dump(storyboards_with_camera_tree, f, ensure_ascii=False, indent=4)
  293. return storyboards_with_camera_tree
  294. if __name__ == "__main__":
  295. pipeline = Script2VideoPipeline()
  296. pipeline.video_create_pipeline(
  297. idea="身穿时尚服装的美女在街头漫步",
  298. user_requirement="剧情要连贯,最多三个场景",
  299. style="写实风格"
  300. )
  301. # with open("./output/storyboards_with_camera_tree.json", "r") as f:
  302. # storyboards = json.load(f)
  303. # full_items = storyboards["storyboards"]
  304. # for item in full_items:
  305. # camera_tree = item["camera_tree"]
  306. # for camera in camera_tree:
  307. # active_shot_idxs = camera["active_shot_idxs"][0]
  308. # camera["active_shot_idxs"] = active_shot_idxs
  309. # with open("./output/storyboards_with_camera_treess.json", "w") as f:
  310. # json.dump(storyboards, f, ensure_ascii=False, indent=4)
  311. # 生成角色肖像三视图
  312. # with open("./output/characters.json", "r", encoding='utf-8') as f:
  313. # characters = json.load(f)
  314. # character_portraits = pipeline._character_portraits_generator(
  315. # characters=characters,
  316. # style="cartoon"
  317. # )
  318. # with open("./output/character_portraits.json", "w", encoding='utf-8') as f:
  319. # json.dump(character_portraits, f, ensure_ascii=False, indent=4)
  320. # with open("./output/character_portraits.json", "r", encoding='utf-8') as f:
  321. # character_portraits = json.load(f)
  322. # with open("./output/storyboards_with_camera_tree.json", "r", encoding='utf-8') as f:
  323. # storyboards_with_camera_tree = json.load(f)
  324. # shot_num = 0
  325. # for scene_idx, storyboard_with_camera_tree in enumerate(storyboards_with_camera_tree["storyboards"]):
  326. # for shot_idx, shot in enumerate(storyboard_with_camera_tree["storyboard"]):
  327. # ff_path = f"./output/frame_scene{scene_idx}_camera{shot['cam_idx']}_shot{shot_idx}.png"
  328. # if os.path.exists(ff_path):
  329. # shot["ff_path"] = ff_path
  330. # shot_num += 1
  331. # logger.info(f"Total shot number: {shot_num}")
  332. # # 生成视频帧
  333. # result = pipeline._generate_video_frames_for_scene(
  334. # storyboards_with_camera_tree=storyboards_with_camera_tree,
  335. # character_portraits=character_portraits
  336. # )
  337. # with open("./output/storyboards_with_frames.json", "w", encoding='utf-8') as f:
  338. # json.dump(storyboards_with_camera_tree, f, ensure_ascii=False, indent=4)
  339. # # 将指定目录下的所有frame_scene*.png文件重命名为new_frame_scene*.png
  340. # for file in os.listdir("./output"):
  341. # if file.startswith("frame_scene") and file.endswith(".png"):
  342. # new_file = file.replace("frame_scene", "new_frame_scene")
  343. # os.rename(os.path.join("./output", file), os.path.join("./output", new_file))
  344. # 生成视频片段
  345. # with open("./output/storyboards_with_frames.json", "r", encoding='utf-8') as f:
  346. # final_storyboards = json.load(f)
  347. # storyboards_with_segments = video_generator.generate(
  348. # video_script_data=final_storyboards
  349. # )
  350. # with open("./output/storyboards_with_segments.json", "w", encoding='utf-8') as f:
  351. # json.dump(storyboards_with_segments, f, ensure_ascii=False, indent=4)
  352. # concat_videos("./output/storyboards_with_segments.json")