feat(sound_normalizer): implement two-pass normalization and enhance error handling

This commit is contained in:
JSC
2025-07-02 17:29:28 +02:00
parent 7128ca727b
commit c241a72c60

View File

@@ -1,7 +1,9 @@
"""Sound normalization service using ffmpeg loudnorm filter."""
import hashlib
import json
import logging
import re
from pathlib import Path
import ffmpeg
@@ -36,12 +38,13 @@ class SoundNormalizerService:
}
@staticmethod
def normalize_sound(sound_id: int, overwrite: bool = False) -> dict:
def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
"""Normalize a specific sound file using ffmpeg loudnorm.
Args:
sound_id: ID of the sound to normalize
overwrite: Whether to overwrite existing normalized file
two_pass: Whether to use two-pass normalization (default: True)
Returns:
dict: Result of the normalization operation
@@ -79,9 +82,14 @@ class SoundNormalizerService:
f"Starting normalization of {sound.name} ({sound.filename})",
)
result = SoundNormalizerService._normalize_with_ffmpeg(
str(source_path), str(normalized_path),
)
if two_pass:
result = SoundNormalizerService._normalize_with_ffmpeg(
str(source_path), str(normalized_path),
)
else:
result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
str(source_path), str(normalized_path),
)
if result["success"]:
# Calculate normalized file metadata
@@ -123,13 +131,14 @@ class SoundNormalizerService:
@staticmethod
def normalize_all_sounds(
overwrite: bool = False, limit: int = None,
overwrite: bool = False, limit: int = None, two_pass: bool = True,
) -> dict:
"""Normalize all soundboard files.
Args:
overwrite: Whether to overwrite existing normalized files
limit: Maximum number of files to process (None for all)
two_pass: Whether to use two-pass normalization (default: True)
Returns:
dict: Summary of the normalization operation
@@ -162,7 +171,7 @@ class SoundNormalizerService:
for sound in sounds:
result = SoundNormalizerService.normalize_sound(
sound.id, overwrite,
sound.id, overwrite, two_pass,
)
processed += 1
@@ -201,7 +210,11 @@ class SoundNormalizerService:
@staticmethod
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
"""Run ffmpeg loudnorm on a single file using python-ffmpeg.
"""Run ffmpeg loudnorm on a single file using two-pass normalization.
Two-pass normalization provides better quality by:
1. First pass: Analyze the audio to measure its characteristics
2. Second pass: Apply normalization using the measured parameters
Args:
source_path: Path to source audio file
@@ -215,7 +228,77 @@ class SoundNormalizerService:
params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug(
f"Running ffmpeg normalization: {source_path} -> {output_path}",
f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
)
# FIRST PASS: Analyze the audio to get optimal parameters
logger.debug("Starting first pass (analysis)")
first_pass_result = SoundNormalizerService._run_first_pass(
source_path, params
)
if not first_pass_result["success"]:
return first_pass_result
measured_params = first_pass_result["measured_params"]
# SECOND PASS: Apply normalization using measured parameters
logger.debug("Starting second pass (normalization)")
second_pass_result = SoundNormalizerService._run_second_pass(
source_path, output_path, params, measured_params
)
if not second_pass_result["success"]:
return second_pass_result
# Combine statistics from both passes
stats = {
**first_pass_result.get("stats", {}),
**second_pass_result.get("stats", {}),
"two_pass": True,
"measured_params": measured_params,
}
if not Path(output_path).exists():
return {
"success": False,
"error": "Output file was not created after second pass",
}
logger.debug("Two-pass normalization completed successfully")
return {"success": True, "stats": stats}
except ffmpeg.Error as e:
error_msg = (
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
)
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error running two-pass ffmpeg normalization: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
"""Run ffmpeg loudnorm on a single file using single-pass normalization.
This is the legacy single-pass method for backward compatibility.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file (will be WAV format)
Returns:
dict: Result with success status and loudnorm statistics
"""
try:
params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug(
f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
)
# Create ffmpeg input stream
@@ -259,9 +342,170 @@ class SoundNormalizerService:
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error running ffmpeg: {e}")
logger.error(f"Error running single-pass ffmpeg: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _run_first_pass(source_path: str, params: dict) -> dict:
"""Run first pass of loudnorm to analyze audio characteristics.
Args:
source_path: Path to source audio file
params: Loudnorm target parameters
Returns:
dict: Result with measured parameters and analysis stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# First pass: analyze only, output to null
loudnorm_filter = (
f"loudnorm=I={params['integrated']}:"
f"TP={params['true_peak']}:"
f"LRA={params['lra']}:"
f"print_format=json"
)
# Output to null device for analysis
output_stream = ffmpeg.output(
input_stream,
"/dev/null",
af=loudnorm_filter,
f="null"
)
# Run the first pass
out, err = ffmpeg.run(
output_stream, capture_stdout=True, capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse measured parameters from JSON output
measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
if not measured_params:
return {
"success": False,
"error": "Failed to parse measured parameters from first pass"
}
# Parse basic stats
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {
"success": True,
"measured_params": measured_params,
"stats": stats
}
except ffmpeg.Error as e:
error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in first pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
"""Run second pass of loudnorm using measured parameters.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file
target_params: Target loudnorm parameters
measured_params: Parameters measured from first pass
Returns:
dict: Result with normalization stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# Second pass: normalize using measured parameters
loudnorm_filter = (
f"loudnorm=I={target_params['integrated']}:"
f"TP={target_params['true_peak']}:"
f"LRA={target_params['lra']}:"
f"measured_I={measured_params['input_i']}:"
f"measured_TP={measured_params['input_tp']}:"
f"measured_LRA={measured_params['input_lra']}:"
f"measured_thresh={measured_params['input_thresh']}:"
f"offset={measured_params['target_offset']}:"
f"linear=true:"
f"print_format=summary"
)
# Create output stream with WAV format
output_stream = ffmpeg.output(
input_stream,
output_path,
acodec="pcm_s16le", # 16-bit PCM for WAV
ar=44100, # 44.1kHz sample rate
af=loudnorm_filter,
y=None, # Overwrite output file
)
# Run the second pass
out, err = ffmpeg.run(
output_stream, capture_stdout=True, capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse final statistics
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {
"success": True,
"stats": stats
}
except ffmpeg.Error as e:
error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in second pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _parse_measured_params(stderr_output: str) -> dict:
"""Parse measured parameters from first pass JSON output.
Args:
stderr_output: ffmpeg stderr output containing JSON data
Returns:
dict: Parsed measured parameters, empty if parsing fails
"""
try:
# Find JSON block in stderr output
json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
if not json_match:
logger.warning("No JSON block found in first pass output")
return {}
json_str = json_match.group(0)
measured_data = json.loads(json_str)
# Extract required parameters
return {
"input_i": measured_data.get("input_i", 0),
"input_tp": measured_data.get("input_tp", 0),
"input_lra": measured_data.get("input_lra", 0),
"input_thresh": measured_data.get("input_thresh", 0),
"target_offset": measured_data.get("target_offset", 0),
}
except (json.JSONDecodeError, KeyError, AttributeError) as e:
logger.warning(f"Failed to parse measured parameters: {e}")
return {}
@staticmethod
def _parse_loudnorm_stats(stderr_output: str) -> dict:
"""Parse loudnorm statistics from ffmpeg stderr output.