tools.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. import os
  2. import re
  3. import toon
  4. import json
  5. import requests
  6. from taskflow import get_logger
  7. import tos
  8. from dotenv import load_dotenv
  9. logger = get_logger("examples.video_create.utils.tools")
  10. def string_to_json(markdown_string):
  11. try:
  12. json_content = re.sub(r'^```json|\n```$', '', markdown_string, flags=re.MULTILINE).strip()
  13. if not json_content:
  14. json_content = markdown_string
  15. # raise ValueError("字符串中未找到有效的JSON内容")
  16. # 解析JSON内容
  17. json_data = json.loads(json_content)
  18. return json_data
  19. except Exception as e:
  20. logger.error(f"生成结果解析失败:\n{markdown_string}")
  21. raise e
  22. def download_video(video_url, output_path):
  23. """
  24. 根据视频URL下载视频到本地指定路径
  25. 参数:
  26. video_url (str): 视频的URL地址
  27. output_path (str): 本地保存路径(包含文件名和扩展名)
  28. 返回:
  29. bool: 下载成功返回True,失败返回False
  30. """
  31. try:
  32. # 创建目录(如果不存在)
  33. os.makedirs(os.path.dirname(output_path), exist_ok=True)
  34. # 发送HTTP GET请求
  35. headers = {
  36. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
  37. }
  38. response = requests.get(video_url, headers=headers, stream=True)
  39. response.raise_for_status() # 检查请求是否成功
  40. # 获取文件总大小(用于进度显示)
  41. total_size = int(response.headers.get('content-length', 0))
  42. # 以二进制写入模式保存视频
  43. with open(output_path, 'wb') as f:
  44. downloaded_size = 0
  45. for chunk in response.iter_content(chunk_size=8192):
  46. if chunk: # 过滤掉保持连接的新块
  47. f.write(chunk)
  48. downloaded_size += len(chunk)
  49. logger.info(f"\n视频已成功保存到: {output_path}")
  50. return True
  51. except requests.exceptions.RequestException as e:
  52. logger.info(f"下载视频时出错: {e}")
  53. except IOError as e:
  54. logger.info(f"保存视频时出错: {e}")
  55. except Exception as e:
  56. logger.info(f"发生未知错误: {e}")
  57. return False
  58. def download_image(image_url, output_path):
  59. """
  60. 根据图片URL下载图片到本地指定路径
  61. 参数:
  62. image_url (str): 图片的URL地址
  63. output_path (str): 本地保存路径(包含文件名和扩展名)
  64. """
  65. try:
  66. # 创建目录(如果不存在)
  67. os.makedirs(os.path.dirname(output_path), exist_ok=True)
  68. # 发送HTTP GET请求
  69. headers = {
  70. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
  71. }
  72. response = requests.get(image_url, headers=headers, stream=True)
  73. response.raise_for_status() # 检查请求是否成功
  74. # 以二进制写入模式保存图片
  75. with open(output_path, 'wb') as f:
  76. for chunk in response.iter_content(chunk_size=8192):
  77. if chunk: # 过滤掉保持连接的新块
  78. f.write(chunk)
  79. logger.info(f"\n图片已成功保存到: {output_path}")
  80. return True
  81. except requests.exceptions.RequestException as e:
  82. logger.info(f"下载图片时出错: {e}")
  83. except IOError as e:
  84. logger.info(f"保存图片时出错: {e}")
  85. except Exception as e:
  86. logger.info(f"发生未知错误: {e}")
  87. return False
  88. def efficient_sort(a, b):
  89. """
  90. 高效排序算法,通过贪心策略
  91. """
  92. n = len(a)
  93. selected = [False] * n
  94. result = []
  95. available = set()
  96. # 统计每个元素的依赖
  97. dependencies = []
  98. for i in range(n):
  99. # 如果a[i]在b[i]中,它可以立即被处理
  100. immediate = a[i] in b[i]
  101. dependencies.append((immediate, i))
  102. # 先处理可以立即处理的元素
  103. for immediate, i in dependencies:
  104. if immediate and not selected[i]:
  105. result.append(i)
  106. selected[i] = True
  107. available.update(b[i])
  108. # 然后处理其他元素
  109. while len(result) < n:
  110. progress = False
  111. for i in range(n):
  112. if not selected[i] and a[i] in available:
  113. result.append(i)
  114. selected[i] = True
  115. available.update(b[i])
  116. progress = True
  117. # 如果没有进展,选择第一个未处理的元素
  118. if not progress:
  119. for i in range(n):
  120. if not selected[i]:
  121. result.append(i)
  122. selected[i] = True
  123. available.update(b[i])
  124. break
  125. return result
  126. # 加载环境变量
  127. load_dotenv()
  128. # 从环境变量获取 AK 和 SK 信息
  129. ak = os.getenv('TOS_ACCESS_KEY')
  130. sk = os.getenv('TOS_SECRET_KEY')
  131. # 存储桶配置信息
  132. endpoint = "https://tos-cn-guangzhou.volces.com"
  133. region = "cn-guangzhou"
  134. bucket_name = "guide-material"
  135. def upload_file_to_tos(file_name: str) -> str:
  136. """
  137. 上传文件到TOS存储桶并返回访问URL
  138. Args:
  139. file_name (str): 本地文件的完整路径
  140. Returns:
  141. str: 上传文件的访问URL
  142. Raises:
  143. Exception: 上传过程中的任何错误
  144. """
  145. try:
  146. # 检查文件是否存在
  147. if not os.path.exists(file_name):
  148. raise FileNotFoundError(f"文件不存在: {file_name}")
  149. # 从文件路径中提取文件名作为object_key,确保使用正斜杠
  150. filename = os.path.basename(file_name)
  151. object_key = f"video-create/{filename}".replace("\\", "/")
  152. logger.info(f'开始上传文件: {file_name}')
  153. logger.info(f'文件将保存为: {object_key}')
  154. # 创建客户端并上传文件
  155. client = tos.TosClientV2(ak, sk, endpoint, region)
  156. client.put_object_from_file(bucket_name, object_key, file_name)
  157. # 验证上传是否成功 - 尝试获取对象元数据
  158. try:
  159. head_response = client.head_object(bucket_name, object_key)
  160. if head_response is None:
  161. raise Exception("无法验证文件是否上传成功:head_object返回None")
  162. logger.info(f'文件上传验证成功,ETag: {getattr(head_response, "etag", "N/A")}')
  163. except Exception as verify_error:
  164. logger.warning(f'验证上传状态时出现警告: {str(verify_error)}')
  165. # 不抛出异常,因为上传可能已经成功,只是验证失败
  166. # 生成访问URL,确保object_key使用正斜杠
  167. object_key_normalized = object_key.replace("\\", "/")
  168. object_url = f"https://testdgxcx-oss.gloria.com.cn/{object_key_normalized}"
  169. logger.info(f'文件上传成功,访问URL: {object_url}')
  170. logger.info(f'Object Key (用于调试): {object_key_normalized}')
  171. return object_url
  172. except tos.exceptions.TosClientError as e:
  173. error_msg = f'上传失败,客户端错误: message={e.message}, cause={e.cause}'
  174. logger.error(error_msg)
  175. raise Exception(error_msg)
  176. except tos.exceptions.TosServerError as e:
  177. error_msg = f'上传失败,服务端错误: code={e.code}, request_id={e.request_id}, message={e.message}, status_code={e.status_code}, ec={e.ec}, request_url={e.request_url}'
  178. logger.error(error_msg)
  179. raise Exception(error_msg)
  180. except Exception as e:
  181. error_msg = f'上传失败,未知错误: {str(e)}'
  182. logger.error(error_msg)
  183. raise Exception(error_msg)
  184. if __name__ == "__main__":
  185. # data = {
  186. # "name": "John",
  187. # "age": 30,
  188. # "city": "New York"
  189. # }
  190. # toon_str = toon.encode(data)
  191. # print(type(toon_str), toon_str)
  192. # decoded_data = toon.decode(toon_str)
  193. # print(type(decoded_data), decoded_data)
  194. # python -m utils.upload
  195. test_file = "./image.jpg"
  196. try:
  197. url = upload_file_to_tos(test_file)
  198. print(f"文件上传成功,访问URL: {url}")
  199. except Exception as e:
  200. print(f"上传失败: {str(e)}")