video_cut.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import os
  2. import json
  3. import time
  4. import subprocess
  5. from tqdm import tqdm
  6. from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
  7. from utils.caption import generate_srt_file, burn_subtitles_ffmpeg
  8. from utils.logger_config import setup_logger
  9. logger = setup_logger(__name__)
  10. # # 视频裁切-ffmpeg
  11. # def ffmpeg_cut(input_path, output_path, start_time, end_time):
  12. # cmd = [
  13. # 'ffmpeg',
  14. # '-hwaccel', 'cuvid', # 启用NVIDIA GPU加速(可选)
  15. # '-i', input_path,
  16. # '-ss', str(start_time),
  17. # '-to', str(end_time),
  18. # '-c:v', 'h264_nvenc', # 使用NVIDIA硬件编码(可选)
  19. # '-c:a', 'copy',
  20. # '-c:v', 'copy', # 关键:避免重新编码,直接复制视频流
  21. # output_path
  22. # ]
  23. # subprocess.run(cmd, check=True)
  24. # # 视频裁切-ffmpeg
  25. # def cut_video_ffmpeg(input_path, output_path, start_time, duration):
  26. # command = [
  27. # 'ffmpeg',
  28. # '-ss', str(start_time), # 开始时间(格式:00:00:00 或秒数)
  29. # '-i', input_path,
  30. # '-t', str(duration), # 持续时间
  31. # '-c', 'copy', # 直接复制流,不重新编码
  32. # '-y', # 覆盖输出文件
  33. # output_path
  34. # ]
  35. # subprocess.run(command, check=True)
  36. # 视频裁切-moviepy
  37. def moviepy_cut(input_path, output_path, start_time, end_time):
  38. """
  39. start_time: xxx sec
  40. end_time: xxx secs
  41. """
  42. logger.info(f"视频裁切中:{output_path} - start: {start_time} - end: {end_time}")
  43. ffmpeg_extract_subclip(input_path, start_time, end_time, targetname=output_path)
  44. def video_cut_by_json(json_path, add_caption=False):
  45. # 读取JSON文件
  46. try:
  47. with open(json_path, 'r', encoding='utf-8') as file:
  48. data = json.load(file)
  49. except FileNotFoundError:
  50. print(f"错误: 文件 '{json_path}' 不存在")
  51. raise
  52. # 获取原视频路径与视频文件名
  53. raw_video_path = data["raw_video"]
  54. raw_video_name = os.path.splitext(os.path.basename(raw_video_path))[0]
  55. # 读取视频片段元数据,进行:视频片段裁切、SRT文件生成、视频加字幕
  56. clips_meta = data["oral_dict_list"]
  57. for i, clip_meta in tqdm(enumerate(clips_meta)):
  58. # 0、去除timestamp字段
  59. clip_meta.pop("timestamp", None)
  60. # 1、提取语句内容,开始时间戳,结束时间戳
  61. clip_text, clip_start_time, clip_end_time = clip_meta["text"], clip_meta["start"] / 1000, clip_meta["end"] / 1000
  62. # 2、裁剪视频片段
  63. clip_name = clip_meta["clip_name"]
  64. clip_path = "./data/clip_video/" + clip_name
  65. moviepy_cut(raw_video_path, clip_path, clip_start_time, clip_end_time)
  66. # 海外线上业务不添加字幕;国内线上业务添加字幕
  67. if add_caption:
  68. # 3、生成字幕SRT文件
  69. srt_list = [(0.0, (clip_end_time - clip_start_time), clip_text)]
  70. srt_path = "./data/clip_srt/" + clip_name
  71. generate_srt_file(srt_list, srt_path)
  72. # 4、视频添加字幕
  73. output_path = "./output/video_clips/" + clip_name
  74. burn_subtitles_ffmpeg(clip_path, srt_path, output_path, style_config=None)
  75. else:
  76. pass
  77. if __name__ == "__main__":
  78. json_path = "new_transcript.json"
  79. video_cut_by_json(json_path)