import os import json import time from pathlib import Path from utils.logger_config import setup_logger from modules.audio_processing.audio_extractor import extract_audio from modules.audio_processing.voice_recognition import SenseVoiceTranscriber logger = setup_logger(__name__) def process_audio(video_path): """ Process the audio part of the video: extract and transcribe Args: video_path (str): Path to the video file Returns: str: Audio transcription result Raises: Exception: When audio processing fails """ try: # 1. Extract audio audio_path = extract_audio(video_path) logger.info(f"音频文件提取:{audio_path}") # 2. Transcribe audio logger.info("音频文件执行STT。。。") transcriber = SenseVoiceTranscriber() transcript = transcriber.transcribe(audio_path) return transcript except Exception as e: logger.error(f"Error processing audio for video {video_path}: {str(e)}") raise if __name__ == "__main__": start = time.time() video_path = "/data/data/luosy/project/oral/data/raw_video/test_video.flv" transcript = process_audio(video_path) with open("transcript.json", "w", encoding="utf-8") as f: json.dump(transcript, f, ensure_ascii=False, indent=4) print(f"cost {time.time() - start} secs")