123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- import os
- import re
- import json
- import httpx
- from volcenginesdkarkruntime import Ark
- client = Ark(
- base_url="https://ark.cn-beijing.volces.com/api/v3",
- api_key="817dff39-5586-4f9b-acba-55004167c0b1",
- timeout=1800
- )
- def read_input(json_file):
- with open(json_file, 'r', encoding='utf-8') as file:
- data = json.load(file)
- # 将数据转换为字符串
- json_string = json.dumps(data, ensure_ascii=False, indent=4)
- return json_string
- def convert_json_to_dict(json_string, output_file_path=None):
- match = re.search(r'\{.*\}', json_string, re.DOTALL)
- if match:
- json_content = match.group(0) # 提取匹配的内容
- else:
- print("错误: 未找到有效的 JSON 内容")
- return {}
- try:
- # 尝试将提取的内容转换为字典
- data_dict = json.loads(json_content)
- if output_file_path:
- with open(output_file_path, 'w', encoding='utf-8') as file:
- json.dump(data_dict, file, ensure_ascii=False, indent=4)
- return data_dict
-
- except json.JSONDecodeError as e:
- print(f"JSON 解码错误: {e}")
- return {}
-
- def caption_correct(user_prompt):
- count = len(user_prompt)
- cut = "不需要断句,直接输出纠正错别字后的文本"
- if count > 15:
- cut = """必须要断句,且只能断句一处;用符号"-"进行断句,断句位置要合理"""
- system_prompt = f"""
- ## 对输入文本进行错别字纠正,错别字通常都是因为字词的发音相似而引起的。如"歌莉娅"写成了"哥李呀";"的"写成了"得"。
- ## 用户输入:{user_prompt}
- ## 要求:
- - 需要去除文本中的标点符号。
- - {cut}
- - 与"歌莉娅"发音相似的词,都要纠正为"歌莉娅"
- - 只输出纠正错别字后的文本,不能有任何多余的文本输出
- """
- completion = client.chat.completions.create(
- messages = [
- {"role": "system", "content": system_prompt},
- ],
- model="deepseek-v3-241226", # ep-20241018084532-cgm84 deepseek-v3-241226 deepseek-r1-250120
- temperature = 0.01,
- max_tokens = 500
- )
- result = completion.choices[0].message.content.replace("-", "\n")
- return result
- def director(user_prompt):
- system_prompt = """
- ## 我需要剪辑一个衣服口播讲解视频,该视频需要包含三个视频片段;请从用户输入中挑选讲解衣服面料、版型、工艺的视频片段各一到三个;并输出三个衣服口播讲解视频脚本(各视频片段组合方式);并以JSON格式进行输出。
- ## 输出案例:
- ```json
- {
- "面料":["clip_001.mp4", "clip_004.mp4", "clip_006.mp4"],
- "版型":["clip_011.mp4", "clip_002.mp4", "clip_007.mp4"],
- "工艺":["clip_012.mp4", "clip_013.mp4", "clip_014.mp4"],
- "脚本":[["clip_001.mp4", "clip_011.mp4", "clip_013.mp4"], ["clip_004.mp4", "clip_014.mp4", "clip_002.mp4"], ["clip_006.mp4", "clip_011.mp4", "clip_012.mp4"]]
- }
- ## 严格按照输出案例的格式输出结果,不能输出任何多余的内容。
- ## 尽可能找句子比较长的视频片段
- """
- completion = client.chat.completions.create(
- messages = [
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": user_prompt},
- ],
- model="deepseek-r1-250120", # ep-20241018084532-cgm84 deepseek-v3-241226 deepseek-r1-250120
- temperature = 0.01,
- max_tokens = 500
- )
- return completion.choices[0].message.content
- def director_json(json_path):
- user_input = read_input(json_path)
- answer = director(user_input)
- output_path = json_path.replace("filter_4", "script")
- dict_answer = convert_json_to_dict(answer, output_path)
- return dict_answer
- if __name__ == "__main__":
-
- print(director_json("output/filter_4/videoa.json"))
|