Spaces:

facebook
/

omniasr-transcriptions

Running on A100

App Files Files Community

omniasr-transcriptions / server /video_utils.py

jeanma

Omnilingual ASR transcription demo

ae238b3 verified about 1 month ago

raw

history blame contribute delete

6.26 kB

	import json
	import logging
	import os
	import subprocess
	import tempfile
	from pathlib import Path

	logger = logging.getLogger(__name__)


	def combine_video_with_subtitles(
	video_file_path: str,
	subtitle_content: str,
	subtitle_format: str = "srt",
	output_format: str = "mp4",
	language: str = "eng",
	) -> str:
	"""
	Combine video file with subtitle content using FFmpeg.

	Args:
	video_file_path: Path to the input video file
	subtitle_content: String content of the subtitles (SRT or WebVTT)
	subtitle_format: Format of subtitles ("srt" or "webvtt")
	output_format: Output container format ("mp4" or "mkv")
	language: Language code for subtitle track

	Returns:
	Path to the output video file with embedded subtitles
	"""

	# Create temporary files
	with tempfile.NamedTemporaryFile(
	mode="w", suffix=f".{subtitle_format}", delete=False
	) as sub_file:
	sub_file.write(subtitle_content)
	subtitle_file_path = sub_file.name

	# Generate output filename
	input_path = Path(video_file_path)
	output_path = (
	input_path.parent / f"{input_path.stem}_with_subtitles.{output_format}"
	)

	try:
	if output_format.lower() == "mkv":
	# MKV has better subtitle support
	if subtitle_format.lower() == "webvtt":
	codec = "webvtt"
	else:
	codec = "srt"

	cmd = [
	"ffmpeg",
	"-y", # -y to overwrite output file
	"-i",
	video_file_path,
	"-i",
	subtitle_file_path,
	"-c:v",
	"copy", # Copy video stream
	"-c:a",
	"copy", # Copy audio stream
	"-c:s",
	codec, # Subtitle codec
	"-metadata:s:s:0",
	f"language={language}",
	str(output_path),
	]
	else:
	# MP4 format
	cmd = [
	"ffmpeg",
	"-y",
	"-i",
	video_file_path,
	"-i",
	subtitle_file_path,
	"-c:v",
	"copy", # Copy video stream
	"-c:a",
	"copy", # Copy audio stream
	"-c:s:0",
	"mov_text", # MP4 subtitle format
	"-map",
	"0:v", # Map video from first input
	"-map",
	"0:a", # Map audio from first input
	"-map",
	"1:s", # Map subtitles from second input
	"-metadata:s:s:0",
	f"language={language}",
	"-disposition:s:0",
	"default", # Make subtitles default
	str(output_path),
	]

	# Execute FFmpeg command
	logger.info(f"Executing FFmpeg command: {' '.join(cmd)}")
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)

	# Log FFmpeg output for debugging
	if result.stdout:
	logger.debug(f"FFmpeg stdout: {result.stdout}")
	if result.stderr:
	logger.debug(f"FFmpeg stderr: {result.stderr}")

	logger.info(f"FFmpeg completed successfully, output file: {output_path}")

	return str(output_path)

	except subprocess.CalledProcessError as e:
	raise RuntimeError(f"FFmpeg failed: {e.stderr}")
	except FileNotFoundError:
	raise RuntimeError("FFmpeg not found. Please install FFmpeg.")
	finally:
	# Clean up temporary subtitle file
	try:
	os.unlink(subtitle_file_path)
	except OSError:
	pass


	def check_ffmpeg_available() -> bool:
	"""Check if FFmpeg is available on the system."""
	try:
	subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
	return True
	except (subprocess.CalledProcessError, FileNotFoundError):
	return False


	def extract_audio_from_video(video_file_path: str, output_audio_path: str = None) -> str:
	"""
	Extract audio from video file using FFmpeg.

	Args:
	video_file_path: Path to the input video file
	output_audio_path: Path for output audio file (optional)

	Returns:
	Path to the extracted audio file
	"""
	if not check_ffmpeg_available():
	raise RuntimeError("FFmpeg not found. Please install FFmpeg.")

	# Generate output filename if not provided
	if output_audio_path is None:
	input_path = Path(video_file_path)
	output_audio_path = str(input_path.with_suffix('.wav'))

	try:
	# FFmpeg command to extract audio
	# -vn: disable video stream
	# -acodec pcm_s16le: use 16-bit PCM encoding
	# -ar 16000: set sample rate to 16kHz (optimal for speech recognition)
	# -ac 1: mono audio (single channel)
	cmd = [
	"ffmpeg",
	"-i", video_file_path,
	"-vn", # No video
	"-acodec", "pcm_s16le", # 16-bit PCM
	"-ar", "16000", # 16kHz sample rate
	"-ac", "1", # Mono
	"-y", # Overwrite output file if it exists
	output_audio_path
	]

	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	logger.info(f"Audio extracted successfully to: {output_audio_path}")
	return output_audio_path

	except subprocess.CalledProcessError as e:
	raise RuntimeError(f"FFmpeg audio extraction failed: {e.stderr}")
	except FileNotFoundError:
	raise RuntimeError("FFmpeg not found. Please install FFmpeg.")


	def get_video_info(video_file_path: str) -> dict:
	"""Get basic information about a video file."""
	try:
	cmd = [
	"ffprobe",
	"-v",
	"quiet",
	"-print_format",
	"json",
	"-show_format",
	"-show_streams",
	video_file_path,
	]

	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	return json.loads(result.stdout)

	except (subprocess.CalledProcessError, FileNotFoundError):
	return {}
	except json.JSONDecodeError:
	return {}