feat(sound_normalizer): implement two-pass normalization and enhance error handling

2025-07-02 17:29:28 +02:00
parent 7128ca727b
commit c241a72c60
1 changed files with 253 additions and 9 deletions
--- a/app/services/sound_normalizer_service.py
+++ b/app/services/sound_normalizer_service.py
@@ -1,7 +1,9 @@
 """Sound normalization service using ffmpeg loudnorm filter."""
 import hashlib
 import json
 import logging
 import re
 from pathlib import Path
 import ffmpeg
@@ -36,12 +38,13 @@ class SoundNormalizerService:
    }
    @staticmethod
-    def normalize_sound(sound_id: int, overwrite: bool = False) -> dict:
+    def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
        """Normalize a specific sound file using ffmpeg loudnorm.
        Args:
            sound_id: ID of the sound to normalize
            overwrite: Whether to overwrite existing normalized file
            two_pass: Whether to use two-pass normalization (default: True)
        Returns:
            dict: Result of the normalization operation
@@ -79,9 +82,14 @@ class SoundNormalizerService:
                f"Starting normalization of {sound.name} ({sound.filename})",
            )
            if two_pass:
                result = SoundNormalizerService._normalize_with_ffmpeg(
                    str(source_path), str(normalized_path),
                )
            else:
                result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
                    str(source_path), str(normalized_path),
                )
            if result["success"]:
                # Calculate normalized file metadata
@@ -123,13 +131,14 @@ class SoundNormalizerService:
    @staticmethod
    def normalize_all_sounds(
-        overwrite: bool = False, limit: int = None,
+        overwrite: bool = False, limit: int = None, two_pass: bool = True,
    ) -> dict:
        """Normalize all soundboard files.
        Args:
            overwrite: Whether to overwrite existing normalized files
            limit: Maximum number of files to process (None for all)
            two_pass: Whether to use two-pass normalization (default: True)
        Returns:
            dict: Summary of the normalization operation
@@ -162,7 +171,7 @@ class SoundNormalizerService:
            for sound in sounds:
                result = SoundNormalizerService.normalize_sound(
-                    sound.id, overwrite,
+                    sound.id, overwrite, two_pass,
                )
                processed += 1
@@ -201,7 +210,11 @@ class SoundNormalizerService:
    @staticmethod
    def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
-        """Run ffmpeg loudnorm on a single file using python-ffmpeg.
+        """Run ffmpeg loudnorm on a single file using two-pass normalization.
        Two-pass normalization provides better quality by:
        1. First pass: Analyze the audio to measure its characteristics
        2. Second pass: Apply normalization using the measured parameters
        Args:
            source_path: Path to source audio file
@@ -215,7 +228,77 @@ class SoundNormalizerService:
            params = SoundNormalizerService.LOUDNORM_PARAMS
            logger.debug(
-                f"Running ffmpeg normalization: {source_path} -> {output_path}",
+                f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
            )
            # FIRST PASS: Analyze the audio to get optimal parameters
            logger.debug("Starting first pass (analysis)")
            first_pass_result = SoundNormalizerService._run_first_pass(
                source_path, params
            )
            if not first_pass_result["success"]:
                return first_pass_result
            measured_params = first_pass_result["measured_params"]
            # SECOND PASS: Apply normalization using measured parameters
            logger.debug("Starting second pass (normalization)")
            second_pass_result = SoundNormalizerService._run_second_pass(
                source_path, output_path, params, measured_params
            )
            if not second_pass_result["success"]:
                return second_pass_result
            # Combine statistics from both passes
            stats = {
                **first_pass_result.get("stats", {}),
                **second_pass_result.get("stats", {}),
                "two_pass": True,
                "measured_params": measured_params,
            }
            if not Path(output_path).exists():
                return {
                    "success": False,
                    "error": "Output file was not created after second pass",
                }
            logger.debug("Two-pass normalization completed successfully")
            return {"success": True, "stats": stats}
        except ffmpeg.Error as e:
            error_msg = (
                f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
            )
            logger.error(error_msg)
            return {"success": False, "error": error_msg}
        except Exception as e:
            logger.error(f"Error running two-pass ffmpeg normalization: {e}")
            return {"success": False, "error": str(e)}
    @staticmethod
    def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
        """Run ffmpeg loudnorm on a single file using single-pass normalization.
        This is the legacy single-pass method for backward compatibility.
        Args:
            source_path: Path to source audio file
            output_path: Path for normalized output file (will be WAV format)
        Returns:
            dict: Result with success status and loudnorm statistics
        """
        try:
            params = SoundNormalizerService.LOUDNORM_PARAMS
            logger.debug(
                f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
            )
            # Create ffmpeg input stream
@@ -259,9 +342,170 @@ class SoundNormalizerService:
            logger.error(error_msg)
            return {"success": False, "error": error_msg}
        except Exception as e:
-            logger.error(f"Error running ffmpeg: {e}")
+            logger.error(f"Error running single-pass ffmpeg: {e}")
            return {"success": False, "error": str(e)}
    @staticmethod
    def _run_first_pass(source_path: str, params: dict) -> dict:
        """Run first pass of loudnorm to analyze audio characteristics.
        Args:
            source_path: Path to source audio file
            params: Loudnorm target parameters
        Returns:
            dict: Result with measured parameters and analysis stats
        """
        try:
            # Create ffmpeg input stream
            input_stream = ffmpeg.input(source_path)
            # First pass: analyze only, output to null
            loudnorm_filter = (
                f"loudnorm=I={params['integrated']}:"
                f"TP={params['true_peak']}:"
                f"LRA={params['lra']}:"
                f"print_format=json"
            )
            # Output to null device for analysis
            output_stream = ffmpeg.output(
                input_stream,
                "/dev/null",
                af=loudnorm_filter,
                f="null"
            )
            # Run the first pass
            out, err = ffmpeg.run(
                output_stream, capture_stdout=True, capture_stderr=True,
            )
            stderr_text = err.decode() if err else ""
            # Parse measured parameters from JSON output
            measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
            if not measured_params:
                return {
                    "success": False,
                    "error": "Failed to parse measured parameters from first pass"
                }
            # Parse basic stats
            stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
            return {
                "success": True,
                "measured_params": measured_params,
                "stats": stats
            }
        except ffmpeg.Error as e:
            error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
            logger.error(error_msg)
            return {"success": False, "error": error_msg}
        except Exception as e:
            logger.error(f"Error in first pass: {e}")
            return {"success": False, "error": str(e)}
    @staticmethod
    def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
        """Run second pass of loudnorm using measured parameters.
        Args:
            source_path: Path to source audio file
            output_path: Path for normalized output file
            target_params: Target loudnorm parameters
            measured_params: Parameters measured from first pass
        Returns:
            dict: Result with normalization stats
        """
        try:
            # Create ffmpeg input stream
            input_stream = ffmpeg.input(source_path)
            # Second pass: normalize using measured parameters
            loudnorm_filter = (
                f"loudnorm=I={target_params['integrated']}:"
                f"TP={target_params['true_peak']}:"
                f"LRA={target_params['lra']}:"
                f"measured_I={measured_params['input_i']}:"
                f"measured_TP={measured_params['input_tp']}:"
                f"measured_LRA={measured_params['input_lra']}:"
                f"measured_thresh={measured_params['input_thresh']}:"
                f"offset={measured_params['target_offset']}:"
                f"linear=true:"
                f"print_format=summary"
            )
            # Create output stream with WAV format
            output_stream = ffmpeg.output(
                input_stream,
                output_path,
                acodec="pcm_s16le",  # 16-bit PCM for WAV
                ar=44100,  # 44.1kHz sample rate
                af=loudnorm_filter,
                y=None,  # Overwrite output file
            )
            # Run the second pass
            out, err = ffmpeg.run(
                output_stream, capture_stdout=True, capture_stderr=True,
            )
            stderr_text = err.decode() if err else ""
            # Parse final statistics
            stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
            return {
                "success": True,
                "stats": stats
            }
        except ffmpeg.Error as e:
            error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
            logger.error(error_msg)
            return {"success": False, "error": error_msg}
        except Exception as e:
            logger.error(f"Error in second pass: {e}")
            return {"success": False, "error": str(e)}
    @staticmethod
    def _parse_measured_params(stderr_output: str) -> dict:
        """Parse measured parameters from first pass JSON output.
        Args:
            stderr_output: ffmpeg stderr output containing JSON data
        Returns:
            dict: Parsed measured parameters, empty if parsing fails
        """
        try:
            # Find JSON block in stderr output
            json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
            if not json_match:
                logger.warning("No JSON block found in first pass output")
                return {}
            json_str = json_match.group(0)
            measured_data = json.loads(json_str)
            # Extract required parameters
            return {
                "input_i": measured_data.get("input_i", 0),
                "input_tp": measured_data.get("input_tp", 0),
                "input_lra": measured_data.get("input_lra", 0),
                "input_thresh": measured_data.get("input_thresh", 0),
                "target_offset": measured_data.get("target_offset", 0),
            }
        except (json.JSONDecodeError, KeyError, AttributeError) as e:
            logger.warning(f"Failed to parse measured parameters: {e}")
            return {}
    @staticmethod
    def _parse_loudnorm_stats(stderr_output: str) -> dict:
        """Parse loudnorm statistics from ffmpeg stderr output.