Files
sdb-back/app/services/sound_normalizer_service.py

804 lines
26 KiB
Python

"""Sound normalization service using ffmpeg loudnorm filter."""
import hashlib
import json
import logging
import re
from pathlib import Path
import ffmpeg
from app.database import db
from app.models.sound import Sound
logger = logging.getLogger(__name__)
class SoundNormalizerService:
"""Service for normalizing sound files using ffmpeg loudnorm."""
SUPPORTED_EXTENSIONS = {
".mp3",
".wav",
".ogg",
".flac",
".m4a",
".aac",
".opus",
}
# Sound directories by type
SOUND_DIRS = {
"SDB": "sounds/soundboard",
"SAY": "sounds/say",
"STR": "sounds/stream"
}
NORMALIZED_DIRS = {
"SDB": "sounds/normalized/soundboard",
"SAY": "sounds/normalized/say",
"STR": "sounds/normalized/stream"
}
LOUDNORM_PARAMS = {
"integrated": -16,
"true_peak": -1.5,
"lra": 11.0,
"print_format": "summary",
}
@staticmethod
def normalize_sound(
sound_id: int,
overwrite: bool = False,
two_pass: bool = True,
) -> dict:
"""Normalize a specific sound file using ffmpeg loudnorm.
Args:
sound_id: ID of the sound to normalize
overwrite: Whether to overwrite existing normalized file
two_pass: Whether to use two-pass normalization (default: True)
Returns:
dict: Result of the normalization operation
"""
try:
sound = Sound.query.get(sound_id)
if not sound:
return {
"success": False,
"error": f"Sound with ID {sound_id} not found",
}
# Get directories based on sound type
sound_dir = SoundNormalizerService.SOUND_DIRS.get(sound.type)
normalized_dir = SoundNormalizerService.NORMALIZED_DIRS.get(sound.type)
if not sound_dir or not normalized_dir:
return {
"success": False,
"error": f"Unsupported sound type: {sound.type}",
}
source_path = Path(sound_dir) / sound.filename
if not source_path.exists():
return {
"success": False,
"error": f"Source file not found: {source_path}",
}
# Always output as WAV regardless of input format
filename_without_ext = Path(sound.filename).stem
normalized_filename = f"{filename_without_ext}.wav"
normalized_path = Path(normalized_dir) / normalized_filename
normalized_path.parent.mkdir(parents=True, exist_ok=True)
if normalized_path.exists() and not overwrite:
return {
"success": False,
"error": f"Normalized file already exists: {normalized_path}. Use overwrite=True to replace it.",
}
logger.info(
f"Starting normalization of {sound.name} ({sound.filename})",
)
if two_pass:
result = SoundNormalizerService._normalize_with_ffmpeg(
str(source_path),
str(normalized_path),
)
else:
result = (
SoundNormalizerService._normalize_with_ffmpeg_single_pass(
str(source_path),
str(normalized_path),
)
)
if result["success"]:
# Calculate normalized file metadata
normalized_metadata = (
SoundNormalizerService._get_normalized_metadata(
str(normalized_path),
)
)
# Update sound record with normalized information
sound.set_normalized_info(
normalized_filename=normalized_filename,
normalized_duration=normalized_metadata["duration"],
normalized_size=normalized_metadata["size"],
normalized_hash=normalized_metadata["hash"],
)
# Commit the database changes
db.session.commit()
logger.info(f"Successfully normalized {sound.name}")
return {
"success": True,
"sound_id": sound_id,
"sound_name": sound.name,
"source_path": str(source_path),
"normalized_path": str(normalized_path),
"normalized_filename": normalized_filename,
"normalized_duration": normalized_metadata["duration"],
"normalized_size": normalized_metadata["size"],
"normalized_hash": normalized_metadata["hash"],
"loudnorm_stats": result.get("stats", {}),
}
return result
except Exception as e:
logger.error(f"Error normalizing sound {sound_id}: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def normalize_all_sounds(
overwrite: bool = False,
limit: int = None,
two_pass: bool = True,
) -> dict:
"""Normalize all soundboard files.
Args:
overwrite: Whether to overwrite existing normalized files
limit: Maximum number of files to process (None for all)
two_pass: Whether to use two-pass normalization (default: True)
Returns:
dict: Summary of the normalization operation
"""
try:
query = Sound.query.filter_by(type="SDB")
if limit:
query = query.limit(limit)
sounds = query.all()
if not sounds:
return {
"success": True,
"message": "No soundboard files found to normalize",
"processed": 0,
"successful": 0,
"failed": 0,
"skipped": 0,
}
logger.info(f"Starting bulk normalization of {len(sounds)} sounds")
processed = 0
successful = 0
failed = 0
skipped = 0
errors = []
for sound in sounds:
result = SoundNormalizerService.normalize_sound(
sound.id,
overwrite,
two_pass,
)
processed += 1
if result["success"]:
successful += 1
elif "already exists" in result.get("error", ""):
skipped += 1
else:
failed += 1
errors.append(f"{sound.name}: {result['error']}")
logger.info(
f"Bulk normalization completed: {successful} successful, {failed} failed, {skipped} skipped",
)
return {
"success": True,
"message": f"Processed {processed} sounds: {successful} successful, {failed} failed, {skipped} skipped",
"processed": processed,
"successful": successful,
"failed": failed,
"skipped": skipped,
"errors": errors,
}
except Exception as e:
logger.error(f"Error during bulk normalization: {e}")
return {
"success": False,
"error": str(e),
"processed": 0,
"successful": 0,
"failed": 0,
"skipped": 0,
}
@staticmethod
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
"""Run ffmpeg loudnorm on a single file using two-pass normalization.
Two-pass normalization provides better quality by:
1. First pass: Analyze the audio to measure its characteristics
2. Second pass: Apply normalization using the measured parameters
Args:
source_path: Path to source audio file
output_path: Path for normalized output file (will be WAV format)
Returns:
dict: Result with success status and loudnorm statistics
"""
try:
params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug(
f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
)
# FIRST PASS: Analyze the audio to get optimal parameters
logger.debug("Starting first pass (analysis)")
first_pass_result = SoundNormalizerService._run_first_pass(
source_path,
params,
)
if not first_pass_result["success"]:
return first_pass_result
measured_params = first_pass_result["measured_params"]
# SECOND PASS: Apply normalization using measured parameters
logger.debug("Starting second pass (normalization)")
second_pass_result = SoundNormalizerService._run_second_pass(
source_path,
output_path,
params,
measured_params,
)
if not second_pass_result["success"]:
return second_pass_result
# Combine statistics from both passes
stats = {
**first_pass_result.get("stats", {}),
**second_pass_result.get("stats", {}),
"two_pass": True,
"measured_params": measured_params,
}
if not Path(output_path).exists():
return {
"success": False,
"error": "Output file was not created after second pass",
}
logger.debug("Two-pass normalization completed successfully")
return {"success": True, "stats": stats}
except ffmpeg.Error as e:
error_msg = (
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
)
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error running two-pass ffmpeg normalization: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _normalize_with_ffmpeg_single_pass(
source_path: str,
output_path: str,
) -> dict:
"""Run ffmpeg loudnorm on a single file using single-pass normalization.
This is the legacy single-pass method for backward compatibility.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file (will be WAV format)
Returns:
dict: Result with success status and loudnorm statistics
"""
try:
params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug(
f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
)
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# Apply loudnorm filter
loudnorm_filter = f"loudnorm=I={params['integrated']}:TP={params['true_peak']}:LRA={params['lra']}:print_format={params['print_format']}"
# Create output stream with WAV format
output_stream = ffmpeg.output(
input_stream,
output_path,
acodec="pcm_s16le", # 16-bit PCM for WAV
ar=44100, # 44.1kHz sample rate
af=loudnorm_filter,
y=None, # Overwrite output file
)
# Run the ffmpeg process
out, err = ffmpeg.run(
output_stream,
capture_stdout=True,
capture_stderr=True,
)
# Parse loudnorm statistics from stderr
stats = SoundNormalizerService._parse_loudnorm_stats(
err.decode() if err else "",
)
if not Path(output_path).exists():
return {
"success": False,
"error": "Output file was not created",
}
return {"success": True, "stats": stats}
except ffmpeg.Error as e:
error_msg = (
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
)
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error running single-pass ffmpeg: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _run_first_pass(source_path: str, params: dict) -> dict:
"""Run first pass of loudnorm to analyze audio characteristics.
Args:
source_path: Path to source audio file
params: Loudnorm target parameters
Returns:
dict: Result with measured parameters and analysis stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# First pass: analyze only, output to null
loudnorm_filter = (
f"loudnorm=I={params['integrated']}:"
f"TP={params['true_peak']}:"
f"LRA={params['lra']}:"
f"print_format=json"
)
# Output to null device for analysis
output_stream = ffmpeg.output(
input_stream,
"/dev/null",
af=loudnorm_filter,
f="null",
)
# Run the first pass
out, err = ffmpeg.run(
output_stream,
capture_stdout=True,
capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse measured parameters from JSON output
measured_params = SoundNormalizerService._parse_measured_params(
stderr_text,
)
if not measured_params:
return {
"success": False,
"error": "Failed to parse measured parameters from first pass",
}
# Parse basic stats
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {
"success": True,
"measured_params": measured_params,
"stats": stats,
}
except ffmpeg.Error as e:
error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in first pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _run_second_pass(
source_path: str,
output_path: str,
target_params: dict,
measured_params: dict,
) -> dict:
"""Run second pass of loudnorm using measured parameters.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file
target_params: Target loudnorm parameters
measured_params: Parameters measured from first pass
Returns:
dict: Result with normalization stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# Second pass: normalize using measured parameters
loudnorm_filter = (
f"loudnorm=I={target_params['integrated']}:"
f"TP={target_params['true_peak']}:"
f"LRA={target_params['lra']}:"
f"measured_I={measured_params['input_i']}:"
f"measured_TP={measured_params['input_tp']}:"
f"measured_LRA={measured_params['input_lra']}:"
f"measured_thresh={measured_params['input_thresh']}:"
f"offset={measured_params['target_offset']}:"
f"linear=true:"
f"print_format=summary"
)
# Create output stream with WAV format
output_stream = ffmpeg.output(
input_stream,
output_path,
acodec="pcm_s16le", # 16-bit PCM for WAV
ar=44100, # 44.1kHz sample rate
af=loudnorm_filter,
y=None, # Overwrite output file
)
# Run the second pass
out, err = ffmpeg.run(
output_stream,
capture_stdout=True,
capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse final statistics
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {"success": True, "stats": stats}
except ffmpeg.Error as e:
error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in second pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _parse_measured_params(stderr_output: str) -> dict:
"""Parse measured parameters from first pass JSON output.
Args:
stderr_output: ffmpeg stderr output containing JSON data
Returns:
dict: Parsed measured parameters, empty if parsing fails
"""
try:
# Find JSON block in stderr output
json_match = re.search(
r'\{[^}]*"input_i"[^}]*\}',
stderr_output,
re.DOTALL,
)
if not json_match:
logger.warning("No JSON block found in first pass output")
return {}
json_str = json_match.group(0)
measured_data = json.loads(json_str)
# Extract required parameters
return {
"input_i": measured_data.get("input_i", 0),
"input_tp": measured_data.get("input_tp", 0),
"input_lra": measured_data.get("input_lra", 0),
"input_thresh": measured_data.get("input_thresh", 0),
"target_offset": measured_data.get("target_offset", 0),
}
except (json.JSONDecodeError, KeyError, AttributeError) as e:
logger.warning(f"Failed to parse measured parameters: {e}")
return {}
@staticmethod
def _parse_loudnorm_stats(stderr_output: str) -> dict:
"""Parse loudnorm statistics from ffmpeg stderr output.
Args:
stderr_output: ffmpeg stderr output containing loudnorm stats
Returns:
dict: Parsed loudnorm statistics
"""
stats = {}
if not stderr_output:
return stats
lines = stderr_output.split("\n")
for line in lines:
line = line.strip()
if "Input Integrated:" in line:
try:
stats["input_integrated"] = float(line.split()[-2])
except (ValueError, IndexError):
pass
elif "Input True Peak:" in line:
try:
stats["input_true_peak"] = float(line.split()[-2])
except (ValueError, IndexError):
pass
elif "Input LRA:" in line:
try:
stats["input_lra"] = float(line.split()[-1])
except (ValueError, IndexError):
pass
elif "Output Integrated:" in line:
try:
stats["output_integrated"] = float(line.split()[-2])
except (ValueError, IndexError):
pass
elif "Output True Peak:" in line:
try:
stats["output_true_peak"] = float(line.split()[-2])
except (ValueError, IndexError):
pass
elif "Output LRA:" in line:
try:
stats["output_lra"] = float(line.split()[-1])
except (ValueError, IndexError):
pass
return stats
@staticmethod
def _get_normalized_metadata(file_path: str) -> dict:
"""Calculate metadata for normalized file.
Args:
file_path: Path to the normalized audio file
Returns:
dict: Metadata including duration and hash
"""
try:
# Get file size
file_size = Path(file_path).stat().st_size
# Calculate file hash
file_hash = SoundNormalizerService._calculate_file_hash(file_path)
# Get duration using ffmpeg
probe = ffmpeg.probe(file_path)
audio_stream = next(
(s for s in probe['streams'] if s['codec_type'] == 'audio'),
None
)
if audio_stream and 'duration' in audio_stream:
duration = int(float(audio_stream['duration']) * 1000) # Convert to milliseconds
else:
duration = 0
return {
"duration": duration,
"size": file_size,
"hash": file_hash,
}
except Exception as e:
logger.error(f"Error calculating metadata for {file_path}: {e}")
return {
"duration": 0,
"size": Path(file_path).stat().st_size,
"hash": "",
}
@staticmethod
def _calculate_file_hash(file_path: str) -> str:
"""Calculate SHA256 hash of file contents."""
sha256_hash = hashlib.sha256()
with Path(file_path).open("rb") as f:
# Read file in chunks to handle large files
for chunk in iter(lambda: f.read(4096), b""):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
@staticmethod
def get_normalization_status() -> dict:
"""Get statistics about normalized vs original files.
Returns:
dict: Statistics about normalization status
"""
try:
total_sounds = Sound.query.filter_by(type="SDB").count()
normalized_count = 0
total_original_size = 0
total_normalized_size = 0
sounds = Sound.query.filter_by(type="SDB").all()
for sound in sounds:
original_path = (
Path(SoundNormalizerService.SOUNDS_DIR) / sound.filename
)
if original_path.exists():
total_original_size += original_path.stat().st_size
# Use database field to check if normalized, not file existence
if sound.is_normalized and sound.normalized_filename:
normalized_count += 1
normalized_path = (
Path(SoundNormalizerService.NORMALIZED_DIR)
/ sound.normalized_filename
)
if normalized_path.exists():
total_normalized_size += normalized_path.stat().st_size
return {
"total_sounds": total_sounds,
"normalized_count": normalized_count,
"normalization_percentage": (
(normalized_count / total_sounds * 100)
if total_sounds > 0
else 0
),
"total_original_size": total_original_size,
"total_normalized_size": total_normalized_size,
"size_difference": (
total_normalized_size - total_original_size
if normalized_count > 0
else 0
),
}
except Exception as e:
logger.error(f"Error getting normalization status: {e}")
return {
"error": str(e),
"total_sounds": 0,
"normalized_count": 0,
"normalization_percentage": 0,
}
@staticmethod
def check_ffmpeg_availability() -> dict:
"""Check if ffmpeg is available and supports loudnorm filter.
Returns:
dict: Information about ffmpeg availability and capabilities
"""
try:
# Create a minimal test audio file to check ffmpeg
import tempfile
with tempfile.NamedTemporaryFile(
suffix=".wav",
delete=False,
) as temp_file:
temp_path = temp_file.name
try:
# Try a simple ffmpeg operation to check availability
test_input = ffmpeg.input(
"anullsrc=channel_layout=stereo:sample_rate=44100",
f="lavfi",
t=0.1,
)
test_output = ffmpeg.output(test_input, temp_path)
ffmpeg.run(
test_output,
capture_stdout=True,
capture_stderr=True,
quiet=True,
)
# If we get here, basic ffmpeg is working
# Now test loudnorm filter
try:
norm_input = ffmpeg.input(temp_path)
norm_output = ffmpeg.output(
norm_input,
"/dev/null",
af="loudnorm=I=-16:TP=-1.5:LRA=11.0",
f="null",
)
ffmpeg.run(
norm_output,
capture_stdout=True,
capture_stderr=True,
quiet=True,
)
has_loudnorm = True
except ffmpeg.Error:
has_loudnorm = False
return {
"available": True,
"version": "ffmpeg-python wrapper available",
"has_loudnorm": has_loudnorm,
"ready": has_loudnorm,
}
finally:
# Clean up temp file
temp_file_path = Path(temp_path)
if temp_file_path.exists():
temp_file_path.unlink()
except Exception as e:
return {
"available": False,
"error": f"ffmpeg not available via python-ffmpeg: {e!s}",
}