feat(sound_normalizer): implement two-pass normalization and enhance error handling

This commit is contained in:
JSC
2025-07-02 17:29:28 +02:00
parent 7128ca727b
commit c241a72c60

View File

@@ -1,7 +1,9 @@
"""Sound normalization service using ffmpeg loudnorm filter.""" """Sound normalization service using ffmpeg loudnorm filter."""
import hashlib import hashlib
import json
import logging import logging
import re
from pathlib import Path from pathlib import Path
import ffmpeg import ffmpeg
@@ -36,12 +38,13 @@ class SoundNormalizerService:
} }
@staticmethod @staticmethod
def normalize_sound(sound_id: int, overwrite: bool = False) -> dict: def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
"""Normalize a specific sound file using ffmpeg loudnorm. """Normalize a specific sound file using ffmpeg loudnorm.
Args: Args:
sound_id: ID of the sound to normalize sound_id: ID of the sound to normalize
overwrite: Whether to overwrite existing normalized file overwrite: Whether to overwrite existing normalized file
two_pass: Whether to use two-pass normalization (default: True)
Returns: Returns:
dict: Result of the normalization operation dict: Result of the normalization operation
@@ -79,9 +82,14 @@ class SoundNormalizerService:
f"Starting normalization of {sound.name} ({sound.filename})", f"Starting normalization of {sound.name} ({sound.filename})",
) )
if two_pass:
result = SoundNormalizerService._normalize_with_ffmpeg( result = SoundNormalizerService._normalize_with_ffmpeg(
str(source_path), str(normalized_path), str(source_path), str(normalized_path),
) )
else:
result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
str(source_path), str(normalized_path),
)
if result["success"]: if result["success"]:
# Calculate normalized file metadata # Calculate normalized file metadata
@@ -123,13 +131,14 @@ class SoundNormalizerService:
@staticmethod @staticmethod
def normalize_all_sounds( def normalize_all_sounds(
overwrite: bool = False, limit: int = None, overwrite: bool = False, limit: int = None, two_pass: bool = True,
) -> dict: ) -> dict:
"""Normalize all soundboard files. """Normalize all soundboard files.
Args: Args:
overwrite: Whether to overwrite existing normalized files overwrite: Whether to overwrite existing normalized files
limit: Maximum number of files to process (None for all) limit: Maximum number of files to process (None for all)
two_pass: Whether to use two-pass normalization (default: True)
Returns: Returns:
dict: Summary of the normalization operation dict: Summary of the normalization operation
@@ -162,7 +171,7 @@ class SoundNormalizerService:
for sound in sounds: for sound in sounds:
result = SoundNormalizerService.normalize_sound( result = SoundNormalizerService.normalize_sound(
sound.id, overwrite, sound.id, overwrite, two_pass,
) )
processed += 1 processed += 1
@@ -201,7 +210,11 @@ class SoundNormalizerService:
@staticmethod @staticmethod
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict: def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
"""Run ffmpeg loudnorm on a single file using python-ffmpeg. """Run ffmpeg loudnorm on a single file using two-pass normalization.
Two-pass normalization provides better quality by:
1. First pass: Analyze the audio to measure its characteristics
2. Second pass: Apply normalization using the measured parameters
Args: Args:
source_path: Path to source audio file source_path: Path to source audio file
@@ -215,7 +228,77 @@ class SoundNormalizerService:
params = SoundNormalizerService.LOUDNORM_PARAMS params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug( logger.debug(
f"Running ffmpeg normalization: {source_path} -> {output_path}", f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
)
# FIRST PASS: Analyze the audio to get optimal parameters
logger.debug("Starting first pass (analysis)")
first_pass_result = SoundNormalizerService._run_first_pass(
source_path, params
)
if not first_pass_result["success"]:
return first_pass_result
measured_params = first_pass_result["measured_params"]
# SECOND PASS: Apply normalization using measured parameters
logger.debug("Starting second pass (normalization)")
second_pass_result = SoundNormalizerService._run_second_pass(
source_path, output_path, params, measured_params
)
if not second_pass_result["success"]:
return second_pass_result
# Combine statistics from both passes
stats = {
**first_pass_result.get("stats", {}),
**second_pass_result.get("stats", {}),
"two_pass": True,
"measured_params": measured_params,
}
if not Path(output_path).exists():
return {
"success": False,
"error": "Output file was not created after second pass",
}
logger.debug("Two-pass normalization completed successfully")
return {"success": True, "stats": stats}
except ffmpeg.Error as e:
error_msg = (
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
)
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error running two-pass ffmpeg normalization: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
"""Run ffmpeg loudnorm on a single file using single-pass normalization.
This is the legacy single-pass method for backward compatibility.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file (will be WAV format)
Returns:
dict: Result with success status and loudnorm statistics
"""
try:
params = SoundNormalizerService.LOUDNORM_PARAMS
logger.debug(
f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
) )
# Create ffmpeg input stream # Create ffmpeg input stream
@@ -259,9 +342,170 @@ class SoundNormalizerService:
logger.error(error_msg) logger.error(error_msg)
return {"success": False, "error": error_msg} return {"success": False, "error": error_msg}
except Exception as e: except Exception as e:
logger.error(f"Error running ffmpeg: {e}") logger.error(f"Error running single-pass ffmpeg: {e}")
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
@staticmethod
def _run_first_pass(source_path: str, params: dict) -> dict:
"""Run first pass of loudnorm to analyze audio characteristics.
Args:
source_path: Path to source audio file
params: Loudnorm target parameters
Returns:
dict: Result with measured parameters and analysis stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# First pass: analyze only, output to null
loudnorm_filter = (
f"loudnorm=I={params['integrated']}:"
f"TP={params['true_peak']}:"
f"LRA={params['lra']}:"
f"print_format=json"
)
# Output to null device for analysis
output_stream = ffmpeg.output(
input_stream,
"/dev/null",
af=loudnorm_filter,
f="null"
)
# Run the first pass
out, err = ffmpeg.run(
output_stream, capture_stdout=True, capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse measured parameters from JSON output
measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
if not measured_params:
return {
"success": False,
"error": "Failed to parse measured parameters from first pass"
}
# Parse basic stats
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {
"success": True,
"measured_params": measured_params,
"stats": stats
}
except ffmpeg.Error as e:
error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in first pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
"""Run second pass of loudnorm using measured parameters.
Args:
source_path: Path to source audio file
output_path: Path for normalized output file
target_params: Target loudnorm parameters
measured_params: Parameters measured from first pass
Returns:
dict: Result with normalization stats
"""
try:
# Create ffmpeg input stream
input_stream = ffmpeg.input(source_path)
# Second pass: normalize using measured parameters
loudnorm_filter = (
f"loudnorm=I={target_params['integrated']}:"
f"TP={target_params['true_peak']}:"
f"LRA={target_params['lra']}:"
f"measured_I={measured_params['input_i']}:"
f"measured_TP={measured_params['input_tp']}:"
f"measured_LRA={measured_params['input_lra']}:"
f"measured_thresh={measured_params['input_thresh']}:"
f"offset={measured_params['target_offset']}:"
f"linear=true:"
f"print_format=summary"
)
# Create output stream with WAV format
output_stream = ffmpeg.output(
input_stream,
output_path,
acodec="pcm_s16le", # 16-bit PCM for WAV
ar=44100, # 44.1kHz sample rate
af=loudnorm_filter,
y=None, # Overwrite output file
)
# Run the second pass
out, err = ffmpeg.run(
output_stream, capture_stdout=True, capture_stderr=True,
)
stderr_text = err.decode() if err else ""
# Parse final statistics
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
return {
"success": True,
"stats": stats
}
except ffmpeg.Error as e:
error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
logger.error(error_msg)
return {"success": False, "error": error_msg}
except Exception as e:
logger.error(f"Error in second pass: {e}")
return {"success": False, "error": str(e)}
@staticmethod
def _parse_measured_params(stderr_output: str) -> dict:
"""Parse measured parameters from first pass JSON output.
Args:
stderr_output: ffmpeg stderr output containing JSON data
Returns:
dict: Parsed measured parameters, empty if parsing fails
"""
try:
# Find JSON block in stderr output
json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
if not json_match:
logger.warning("No JSON block found in first pass output")
return {}
json_str = json_match.group(0)
measured_data = json.loads(json_str)
# Extract required parameters
return {
"input_i": measured_data.get("input_i", 0),
"input_tp": measured_data.get("input_tp", 0),
"input_lra": measured_data.get("input_lra", 0),
"input_thresh": measured_data.get("input_thresh", 0),
"target_offset": measured_data.get("target_offset", 0),
}
except (json.JSONDecodeError, KeyError, AttributeError) as e:
logger.warning(f"Failed to parse measured parameters: {e}")
return {}
@staticmethod @staticmethod
def _parse_loudnorm_stats(stderr_output: str) -> dict: def _parse_loudnorm_stats(stderr_output: str) -> dict:
"""Parse loudnorm statistics from ffmpeg stderr output. """Parse loudnorm statistics from ffmpeg stderr output.