feat(sound_normalizer): implement two-pass normalization and enhance error handling
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
"""Sound normalization service using ffmpeg loudnorm filter."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import ffmpeg
|
||||
@@ -36,12 +38,13 @@ class SoundNormalizerService:
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def normalize_sound(sound_id: int, overwrite: bool = False) -> dict:
|
||||
def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
|
||||
"""Normalize a specific sound file using ffmpeg loudnorm.
|
||||
|
||||
Args:
|
||||
sound_id: ID of the sound to normalize
|
||||
overwrite: Whether to overwrite existing normalized file
|
||||
two_pass: Whether to use two-pass normalization (default: True)
|
||||
|
||||
Returns:
|
||||
dict: Result of the normalization operation
|
||||
@@ -79,9 +82,14 @@ class SoundNormalizerService:
|
||||
f"Starting normalization of {sound.name} ({sound.filename})",
|
||||
)
|
||||
|
||||
if two_pass:
|
||||
result = SoundNormalizerService._normalize_with_ffmpeg(
|
||||
str(source_path), str(normalized_path),
|
||||
)
|
||||
else:
|
||||
result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
|
||||
str(source_path), str(normalized_path),
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
# Calculate normalized file metadata
|
||||
@@ -123,13 +131,14 @@ class SoundNormalizerService:
|
||||
|
||||
@staticmethod
|
||||
def normalize_all_sounds(
|
||||
overwrite: bool = False, limit: int = None,
|
||||
overwrite: bool = False, limit: int = None, two_pass: bool = True,
|
||||
) -> dict:
|
||||
"""Normalize all soundboard files.
|
||||
|
||||
Args:
|
||||
overwrite: Whether to overwrite existing normalized files
|
||||
limit: Maximum number of files to process (None for all)
|
||||
two_pass: Whether to use two-pass normalization (default: True)
|
||||
|
||||
Returns:
|
||||
dict: Summary of the normalization operation
|
||||
@@ -162,7 +171,7 @@ class SoundNormalizerService:
|
||||
|
||||
for sound in sounds:
|
||||
result = SoundNormalizerService.normalize_sound(
|
||||
sound.id, overwrite,
|
||||
sound.id, overwrite, two_pass,
|
||||
)
|
||||
processed += 1
|
||||
|
||||
@@ -201,7 +210,11 @@ class SoundNormalizerService:
|
||||
|
||||
@staticmethod
|
||||
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
|
||||
"""Run ffmpeg loudnorm on a single file using python-ffmpeg.
|
||||
"""Run ffmpeg loudnorm on a single file using two-pass normalization.
|
||||
|
||||
Two-pass normalization provides better quality by:
|
||||
1. First pass: Analyze the audio to measure its characteristics
|
||||
2. Second pass: Apply normalization using the measured parameters
|
||||
|
||||
Args:
|
||||
source_path: Path to source audio file
|
||||
@@ -215,7 +228,77 @@ class SoundNormalizerService:
|
||||
params = SoundNormalizerService.LOUDNORM_PARAMS
|
||||
|
||||
logger.debug(
|
||||
f"Running ffmpeg normalization: {source_path} -> {output_path}",
|
||||
f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
|
||||
)
|
||||
|
||||
# FIRST PASS: Analyze the audio to get optimal parameters
|
||||
logger.debug("Starting first pass (analysis)")
|
||||
|
||||
first_pass_result = SoundNormalizerService._run_first_pass(
|
||||
source_path, params
|
||||
)
|
||||
|
||||
if not first_pass_result["success"]:
|
||||
return first_pass_result
|
||||
|
||||
measured_params = first_pass_result["measured_params"]
|
||||
|
||||
# SECOND PASS: Apply normalization using measured parameters
|
||||
logger.debug("Starting second pass (normalization)")
|
||||
|
||||
second_pass_result = SoundNormalizerService._run_second_pass(
|
||||
source_path, output_path, params, measured_params
|
||||
)
|
||||
|
||||
if not second_pass_result["success"]:
|
||||
return second_pass_result
|
||||
|
||||
# Combine statistics from both passes
|
||||
stats = {
|
||||
**first_pass_result.get("stats", {}),
|
||||
**second_pass_result.get("stats", {}),
|
||||
"two_pass": True,
|
||||
"measured_params": measured_params,
|
||||
}
|
||||
|
||||
if not Path(output_path).exists():
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Output file was not created after second pass",
|
||||
}
|
||||
|
||||
logger.debug("Two-pass normalization completed successfully")
|
||||
return {"success": True, "stats": stats}
|
||||
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = (
|
||||
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||
)
|
||||
logger.error(error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
except Exception as e:
|
||||
logger.error(f"Error running two-pass ffmpeg normalization: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
|
||||
"""Run ffmpeg loudnorm on a single file using single-pass normalization.
|
||||
|
||||
This is the legacy single-pass method for backward compatibility.
|
||||
|
||||
Args:
|
||||
source_path: Path to source audio file
|
||||
output_path: Path for normalized output file (will be WAV format)
|
||||
|
||||
Returns:
|
||||
dict: Result with success status and loudnorm statistics
|
||||
|
||||
"""
|
||||
try:
|
||||
params = SoundNormalizerService.LOUDNORM_PARAMS
|
||||
|
||||
logger.debug(
|
||||
f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
|
||||
)
|
||||
|
||||
# Create ffmpeg input stream
|
||||
@@ -259,9 +342,170 @@ class SoundNormalizerService:
|
||||
logger.error(error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
except Exception as e:
|
||||
logger.error(f"Error running ffmpeg: {e}")
|
||||
logger.error(f"Error running single-pass ffmpeg: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _run_first_pass(source_path: str, params: dict) -> dict:
|
||||
"""Run first pass of loudnorm to analyze audio characteristics.
|
||||
|
||||
Args:
|
||||
source_path: Path to source audio file
|
||||
params: Loudnorm target parameters
|
||||
|
||||
Returns:
|
||||
dict: Result with measured parameters and analysis stats
|
||||
"""
|
||||
try:
|
||||
# Create ffmpeg input stream
|
||||
input_stream = ffmpeg.input(source_path)
|
||||
|
||||
# First pass: analyze only, output to null
|
||||
loudnorm_filter = (
|
||||
f"loudnorm=I={params['integrated']}:"
|
||||
f"TP={params['true_peak']}:"
|
||||
f"LRA={params['lra']}:"
|
||||
f"print_format=json"
|
||||
)
|
||||
|
||||
# Output to null device for analysis
|
||||
output_stream = ffmpeg.output(
|
||||
input_stream,
|
||||
"/dev/null",
|
||||
af=loudnorm_filter,
|
||||
f="null"
|
||||
)
|
||||
|
||||
# Run the first pass
|
||||
out, err = ffmpeg.run(
|
||||
output_stream, capture_stdout=True, capture_stderr=True,
|
||||
)
|
||||
|
||||
stderr_text = err.decode() if err else ""
|
||||
|
||||
# Parse measured parameters from JSON output
|
||||
measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
|
||||
|
||||
if not measured_params:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Failed to parse measured parameters from first pass"
|
||||
}
|
||||
|
||||
# Parse basic stats
|
||||
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"measured_params": measured_params,
|
||||
"stats": stats
|
||||
}
|
||||
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||
logger.error(error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in first pass: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
|
||||
"""Run second pass of loudnorm using measured parameters.
|
||||
|
||||
Args:
|
||||
source_path: Path to source audio file
|
||||
output_path: Path for normalized output file
|
||||
target_params: Target loudnorm parameters
|
||||
measured_params: Parameters measured from first pass
|
||||
|
||||
Returns:
|
||||
dict: Result with normalization stats
|
||||
"""
|
||||
try:
|
||||
# Create ffmpeg input stream
|
||||
input_stream = ffmpeg.input(source_path)
|
||||
|
||||
# Second pass: normalize using measured parameters
|
||||
loudnorm_filter = (
|
||||
f"loudnorm=I={target_params['integrated']}:"
|
||||
f"TP={target_params['true_peak']}:"
|
||||
f"LRA={target_params['lra']}:"
|
||||
f"measured_I={measured_params['input_i']}:"
|
||||
f"measured_TP={measured_params['input_tp']}:"
|
||||
f"measured_LRA={measured_params['input_lra']}:"
|
||||
f"measured_thresh={measured_params['input_thresh']}:"
|
||||
f"offset={measured_params['target_offset']}:"
|
||||
f"linear=true:"
|
||||
f"print_format=summary"
|
||||
)
|
||||
|
||||
# Create output stream with WAV format
|
||||
output_stream = ffmpeg.output(
|
||||
input_stream,
|
||||
output_path,
|
||||
acodec="pcm_s16le", # 16-bit PCM for WAV
|
||||
ar=44100, # 44.1kHz sample rate
|
||||
af=loudnorm_filter,
|
||||
y=None, # Overwrite output file
|
||||
)
|
||||
|
||||
# Run the second pass
|
||||
out, err = ffmpeg.run(
|
||||
output_stream, capture_stdout=True, capture_stderr=True,
|
||||
)
|
||||
|
||||
stderr_text = err.decode() if err else ""
|
||||
|
||||
# Parse final statistics
|
||||
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"stats": stats
|
||||
}
|
||||
|
||||
except ffmpeg.Error as e:
|
||||
error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||
logger.error(error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in second pass: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _parse_measured_params(stderr_output: str) -> dict:
|
||||
"""Parse measured parameters from first pass JSON output.
|
||||
|
||||
Args:
|
||||
stderr_output: ffmpeg stderr output containing JSON data
|
||||
|
||||
Returns:
|
||||
dict: Parsed measured parameters, empty if parsing fails
|
||||
"""
|
||||
try:
|
||||
# Find JSON block in stderr output
|
||||
json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
|
||||
if not json_match:
|
||||
logger.warning("No JSON block found in first pass output")
|
||||
return {}
|
||||
|
||||
json_str = json_match.group(0)
|
||||
measured_data = json.loads(json_str)
|
||||
|
||||
# Extract required parameters
|
||||
return {
|
||||
"input_i": measured_data.get("input_i", 0),
|
||||
"input_tp": measured_data.get("input_tp", 0),
|
||||
"input_lra": measured_data.get("input_lra", 0),
|
||||
"input_thresh": measured_data.get("input_thresh", 0),
|
||||
"target_offset": measured_data.get("target_offset", 0),
|
||||
}
|
||||
|
||||
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
||||
logger.warning(f"Failed to parse measured parameters: {e}")
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _parse_loudnorm_stats(stderr_output: str) -> dict:
|
||||
"""Parse loudnorm statistics from ffmpeg stderr output.
|
||||
|
||||
Reference in New Issue
Block a user