import os import re import json import httpx from volcenginesdkarkruntime import Ark client = Ark( base_url="https://ark.cn-beijing.volces.com/api/v3", api_key="817dff39-5586-4f9b-acba-55004167c0b1", timeout=1800 ) def read_input(json_file): with open(json_file, 'r', encoding='utf-8') as file: data = json.load(file) # 将数据转换为字符串 json_string = json.dumps(data, ensure_ascii=False, indent=4) return json_string def convert_json_to_dict(json_string, output_file_path=None): match = re.search(r'\{.*\}', json_string, re.DOTALL) if match: json_content = match.group(0) # 提取匹配的内容 else: print("错误: 未找到有效的 JSON 内容") return {} try: # 尝试将提取的内容转换为字典 data_dict = json.loads(json_content) if output_file_path: with open(output_file_path, 'w', encoding='utf-8') as file: json.dump(data_dict, file, ensure_ascii=False, indent=4) return data_dict except json.JSONDecodeError as e: print(f"JSON 解码错误: {e}") return {} def caption_correct(user_prompt): count = len(user_prompt) cut = "不需要断句,直接输出纠正错别字后的文本" if count > 15: cut = """必须要断句,且只能断句一处;用符号"-"进行断句,断句位置要合理""" system_prompt = f""" ## 对输入文本进行错别字纠正,错别字通常都是因为字词的发音相似而引起的。如"歌莉娅"写成了"哥李呀";"的"写成了"得"。 ## 用户输入:{user_prompt} ## 要求: - 需要去除文本中的标点符号。 - {cut} - 与"歌莉娅"发音相似的词,都要纠正为"歌莉娅" - 只输出纠正错别字后的文本,不能有任何多余的文本输出 """ completion = client.chat.completions.create( messages = [ {"role": "system", "content": system_prompt}, ], model="deepseek-v3-241226", # ep-20241018084532-cgm84 deepseek-v3-241226 deepseek-r1-250120 temperature = 0.01, max_tokens = 500 ) result = completion.choices[0].message.content.replace("-", "\n") return result def director(user_prompt): system_prompt = """ ## 我需要剪辑一个衣服口播讲解视频,该视频需要包含三个视频片段;请从用户输入中挑选讲解衣服面料、版型、工艺的视频片段各一到三个;并输出三个衣服口播讲解视频脚本(各视频片段组合方式);并以JSON格式进行输出。 ## 输出案例: ```json { "面料":["clip_001.mp4", "clip_004.mp4", "clip_006.mp4"], "版型":["clip_011.mp4", "clip_002.mp4", "clip_007.mp4"], "工艺":["clip_012.mp4", "clip_013.mp4", "clip_014.mp4"], "脚本":[["clip_001.mp4", "clip_011.mp4", "clip_013.mp4"], ["clip_004.mp4", "clip_014.mp4", "clip_002.mp4"], ["clip_006.mp4", "clip_011.mp4", "clip_012.mp4"]] } ## 严格按照输出案例的格式输出结果,不能输出任何多余的内容。 ## 尽可能找句子比较长的视频片段 """ completion = client.chat.completions.create( messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], model="deepseek-r1-250120", # ep-20241018084532-cgm84 deepseek-v3-241226 deepseek-r1-250120 temperature = 0.01, max_tokens = 500 ) return completion.choices[0].message.content def director_json(json_path): user_input = read_input(json_path) answer = director(user_input) output_path = json_path.replace("filter_4", "script") dict_answer = convert_json_to_dict(answer, output_path) return dict_answer if __name__ == "__main__": print(director_json("output/filter_4/videoa.json"))