call_sencevoice.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import os
  2. import json
  3. import time
  4. from pathlib import Path
  5. from utils.logger_config import setup_logger
  6. from modules.audio_processing.audio_extractor import extract_audio
  7. from modules.audio_processing.voice_recognition import SenseVoiceTranscriber
  8. logger = setup_logger(__name__)
  9. def process_audio(video_path):
  10. """
  11. Process the audio part of the video: extract and transcribe
  12. Args:
  13. video_path (str): Path to the video file
  14. Returns:
  15. str: Audio transcription result
  16. Raises:
  17. Exception: When audio processing fails
  18. """
  19. try:
  20. # 1. Extract audio
  21. audio_path = extract_audio(video_path)
  22. logger.info(f"音频文件提取:{audio_path}")
  23. # 2. Transcribe audio
  24. logger.info("音频文件执行STT。。。")
  25. transcriber = SenseVoiceTranscriber()
  26. transcript = transcriber.transcribe(audio_path)
  27. return transcript
  28. except Exception as e:
  29. logger.error(f"Error processing audio for video {video_path}: {str(e)}")
  30. raise
  31. if __name__ == "__main__":
  32. start = time.time()
  33. video_path = "/data/data/luosy/project/oral/data/raw_video/test_video.flv"
  34. transcript = process_audio(video_path)
  35. with open("transcript.json", "w", encoding="utf-8") as f:
  36. json.dump(transcript, f, ensure_ascii=False, indent=4)
  37. print(f"cost {time.time() - start} secs")