12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import os
- import json
- import time
- from pathlib import Path
- from utils.logger_config import setup_logger
- from modules.audio_processing.audio_extractor import extract_audio
- from modules.audio_processing.voice_recognition import SenseVoiceTranscriber
- logger = setup_logger(__name__)
- def process_audio(video_path):
- """
- Process the audio part of the video: extract and transcribe
-
- Args:
- video_path (str): Path to the video file
-
- Returns:
- str: Audio transcription result
-
- Raises:
- Exception: When audio processing fails
- """
- try:
- # 1. Extract audio
-
- audio_path = extract_audio(video_path)
- logger.info(f"音频文件提取:{audio_path}")
-
- # 2. Transcribe audio
- logger.info("音频文件执行STT。。。")
- transcriber = SenseVoiceTranscriber()
- transcript = transcriber.transcribe(audio_path)
-
- return transcript
-
- except Exception as e:
- logger.error(f"Error processing audio for video {video_path}: {str(e)}")
- raise
- if __name__ == "__main__":
- start = time.time()
- video_path = "/data/data/luosy/project/oral/data/raw_video/test_video.flv"
- transcript = process_audio(video_path)
- with open("transcript.json", "w", encoding="utf-8") as f:
- json.dump(transcript, f, ensure_ascii=False, indent=4)
- print(f"cost {time.time() - start} secs")
|