feat(sound_normalizer): implement two-pass normalization and enhance error handling
This commit is contained in:
@@ -1,7 +1,9 @@
|
|||||||
"""Sound normalization service using ffmpeg loudnorm filter."""
|
"""Sound normalization service using ffmpeg loudnorm filter."""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
@@ -36,12 +38,13 @@ class SoundNormalizerService:
|
|||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize_sound(sound_id: int, overwrite: bool = False) -> dict:
|
def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
|
||||||
"""Normalize a specific sound file using ffmpeg loudnorm.
|
"""Normalize a specific sound file using ffmpeg loudnorm.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sound_id: ID of the sound to normalize
|
sound_id: ID of the sound to normalize
|
||||||
overwrite: Whether to overwrite existing normalized file
|
overwrite: Whether to overwrite existing normalized file
|
||||||
|
two_pass: Whether to use two-pass normalization (default: True)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Result of the normalization operation
|
dict: Result of the normalization operation
|
||||||
@@ -79,9 +82,14 @@ class SoundNormalizerService:
|
|||||||
f"Starting normalization of {sound.name} ({sound.filename})",
|
f"Starting normalization of {sound.name} ({sound.filename})",
|
||||||
)
|
)
|
||||||
|
|
||||||
result = SoundNormalizerService._normalize_with_ffmpeg(
|
if two_pass:
|
||||||
str(source_path), str(normalized_path),
|
result = SoundNormalizerService._normalize_with_ffmpeg(
|
||||||
)
|
str(source_path), str(normalized_path),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
|
||||||
|
str(source_path), str(normalized_path),
|
||||||
|
)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
# Calculate normalized file metadata
|
# Calculate normalized file metadata
|
||||||
@@ -123,13 +131,14 @@ class SoundNormalizerService:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize_all_sounds(
|
def normalize_all_sounds(
|
||||||
overwrite: bool = False, limit: int = None,
|
overwrite: bool = False, limit: int = None, two_pass: bool = True,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Normalize all soundboard files.
|
"""Normalize all soundboard files.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
overwrite: Whether to overwrite existing normalized files
|
overwrite: Whether to overwrite existing normalized files
|
||||||
limit: Maximum number of files to process (None for all)
|
limit: Maximum number of files to process (None for all)
|
||||||
|
two_pass: Whether to use two-pass normalization (default: True)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Summary of the normalization operation
|
dict: Summary of the normalization operation
|
||||||
@@ -162,7 +171,7 @@ class SoundNormalizerService:
|
|||||||
|
|
||||||
for sound in sounds:
|
for sound in sounds:
|
||||||
result = SoundNormalizerService.normalize_sound(
|
result = SoundNormalizerService.normalize_sound(
|
||||||
sound.id, overwrite,
|
sound.id, overwrite, two_pass,
|
||||||
)
|
)
|
||||||
processed += 1
|
processed += 1
|
||||||
|
|
||||||
@@ -201,7 +210,11 @@ class SoundNormalizerService:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
|
def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
|
||||||
"""Run ffmpeg loudnorm on a single file using python-ffmpeg.
|
"""Run ffmpeg loudnorm on a single file using two-pass normalization.
|
||||||
|
|
||||||
|
Two-pass normalization provides better quality by:
|
||||||
|
1. First pass: Analyze the audio to measure its characteristics
|
||||||
|
2. Second pass: Apply normalization using the measured parameters
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
source_path: Path to source audio file
|
source_path: Path to source audio file
|
||||||
@@ -215,7 +228,77 @@ class SoundNormalizerService:
|
|||||||
params = SoundNormalizerService.LOUDNORM_PARAMS
|
params = SoundNormalizerService.LOUDNORM_PARAMS
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Running ffmpeg normalization: {source_path} -> {output_path}",
|
f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# FIRST PASS: Analyze the audio to get optimal parameters
|
||||||
|
logger.debug("Starting first pass (analysis)")
|
||||||
|
|
||||||
|
first_pass_result = SoundNormalizerService._run_first_pass(
|
||||||
|
source_path, params
|
||||||
|
)
|
||||||
|
|
||||||
|
if not first_pass_result["success"]:
|
||||||
|
return first_pass_result
|
||||||
|
|
||||||
|
measured_params = first_pass_result["measured_params"]
|
||||||
|
|
||||||
|
# SECOND PASS: Apply normalization using measured parameters
|
||||||
|
logger.debug("Starting second pass (normalization)")
|
||||||
|
|
||||||
|
second_pass_result = SoundNormalizerService._run_second_pass(
|
||||||
|
source_path, output_path, params, measured_params
|
||||||
|
)
|
||||||
|
|
||||||
|
if not second_pass_result["success"]:
|
||||||
|
return second_pass_result
|
||||||
|
|
||||||
|
# Combine statistics from both passes
|
||||||
|
stats = {
|
||||||
|
**first_pass_result.get("stats", {}),
|
||||||
|
**second_pass_result.get("stats", {}),
|
||||||
|
"two_pass": True,
|
||||||
|
"measured_params": measured_params,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not Path(output_path).exists():
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "Output file was not created after second pass",
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug("Two-pass normalization completed successfully")
|
||||||
|
return {"success": True, "stats": stats}
|
||||||
|
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
error_msg = (
|
||||||
|
f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||||
|
)
|
||||||
|
logger.error(error_msg)
|
||||||
|
return {"success": False, "error": error_msg}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error running two-pass ffmpeg normalization: {e}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
|
||||||
|
"""Run ffmpeg loudnorm on a single file using single-pass normalization.
|
||||||
|
|
||||||
|
This is the legacy single-pass method for backward compatibility.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_path: Path to source audio file
|
||||||
|
output_path: Path for normalized output file (will be WAV format)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Result with success status and loudnorm statistics
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
params = SoundNormalizerService.LOUDNORM_PARAMS
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create ffmpeg input stream
|
# Create ffmpeg input stream
|
||||||
@@ -259,9 +342,170 @@ class SoundNormalizerService:
|
|||||||
logger.error(error_msg)
|
logger.error(error_msg)
|
||||||
return {"success": False, "error": error_msg}
|
return {"success": False, "error": error_msg}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error running ffmpeg: {e}")
|
logger.error(f"Error running single-pass ffmpeg: {e}")
|
||||||
return {"success": False, "error": str(e)}
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _run_first_pass(source_path: str, params: dict) -> dict:
|
||||||
|
"""Run first pass of loudnorm to analyze audio characteristics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_path: Path to source audio file
|
||||||
|
params: Loudnorm target parameters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Result with measured parameters and analysis stats
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Create ffmpeg input stream
|
||||||
|
input_stream = ffmpeg.input(source_path)
|
||||||
|
|
||||||
|
# First pass: analyze only, output to null
|
||||||
|
loudnorm_filter = (
|
||||||
|
f"loudnorm=I={params['integrated']}:"
|
||||||
|
f"TP={params['true_peak']}:"
|
||||||
|
f"LRA={params['lra']}:"
|
||||||
|
f"print_format=json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Output to null device for analysis
|
||||||
|
output_stream = ffmpeg.output(
|
||||||
|
input_stream,
|
||||||
|
"/dev/null",
|
||||||
|
af=loudnorm_filter,
|
||||||
|
f="null"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run the first pass
|
||||||
|
out, err = ffmpeg.run(
|
||||||
|
output_stream, capture_stdout=True, capture_stderr=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stderr_text = err.decode() if err else ""
|
||||||
|
|
||||||
|
# Parse measured parameters from JSON output
|
||||||
|
measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
|
||||||
|
|
||||||
|
if not measured_params:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "Failed to parse measured parameters from first pass"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse basic stats
|
||||||
|
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"measured_params": measured_params,
|
||||||
|
"stats": stats
|
||||||
|
}
|
||||||
|
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return {"success": False, "error": error_msg}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in first pass: {e}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
|
||||||
|
"""Run second pass of loudnorm using measured parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_path: Path to source audio file
|
||||||
|
output_path: Path for normalized output file
|
||||||
|
target_params: Target loudnorm parameters
|
||||||
|
measured_params: Parameters measured from first pass
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Result with normalization stats
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Create ffmpeg input stream
|
||||||
|
input_stream = ffmpeg.input(source_path)
|
||||||
|
|
||||||
|
# Second pass: normalize using measured parameters
|
||||||
|
loudnorm_filter = (
|
||||||
|
f"loudnorm=I={target_params['integrated']}:"
|
||||||
|
f"TP={target_params['true_peak']}:"
|
||||||
|
f"LRA={target_params['lra']}:"
|
||||||
|
f"measured_I={measured_params['input_i']}:"
|
||||||
|
f"measured_TP={measured_params['input_tp']}:"
|
||||||
|
f"measured_LRA={measured_params['input_lra']}:"
|
||||||
|
f"measured_thresh={measured_params['input_thresh']}:"
|
||||||
|
f"offset={measured_params['target_offset']}:"
|
||||||
|
f"linear=true:"
|
||||||
|
f"print_format=summary"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create output stream with WAV format
|
||||||
|
output_stream = ffmpeg.output(
|
||||||
|
input_stream,
|
||||||
|
output_path,
|
||||||
|
acodec="pcm_s16le", # 16-bit PCM for WAV
|
||||||
|
ar=44100, # 44.1kHz sample rate
|
||||||
|
af=loudnorm_filter,
|
||||||
|
y=None, # Overwrite output file
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run the second pass
|
||||||
|
out, err = ffmpeg.run(
|
||||||
|
output_stream, capture_stdout=True, capture_stderr=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stderr_text = err.decode() if err else ""
|
||||||
|
|
||||||
|
# Parse final statistics
|
||||||
|
stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"stats": stats
|
||||||
|
}
|
||||||
|
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
return {"success": False, "error": error_msg}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in second pass: {e}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_measured_params(stderr_output: str) -> dict:
|
||||||
|
"""Parse measured parameters from first pass JSON output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
stderr_output: ffmpeg stderr output containing JSON data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Parsed measured parameters, empty if parsing fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Find JSON block in stderr output
|
||||||
|
json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
|
||||||
|
if not json_match:
|
||||||
|
logger.warning("No JSON block found in first pass output")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
json_str = json_match.group(0)
|
||||||
|
measured_data = json.loads(json_str)
|
||||||
|
|
||||||
|
# Extract required parameters
|
||||||
|
return {
|
||||||
|
"input_i": measured_data.get("input_i", 0),
|
||||||
|
"input_tp": measured_data.get("input_tp", 0),
|
||||||
|
"input_lra": measured_data.get("input_lra", 0),
|
||||||
|
"input_thresh": measured_data.get("input_thresh", 0),
|
||||||
|
"target_offset": measured_data.get("target_offset", 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
||||||
|
logger.warning(f"Failed to parse measured parameters: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_loudnorm_stats(stderr_output: str) -> dict:
|
def _parse_loudnorm_stats(stderr_output: str) -> dict:
|
||||||
"""Parse loudnorm statistics from ffmpeg stderr output.
|
"""Parse loudnorm statistics from ffmpeg stderr output.
|
||||||
|
|||||||
Reference in New Issue
Block a user