feat(sound_normalizer): implement two-pass normalization and enhance error handling

2025-07-02 17:29:28 +02:00
parent 7128ca727b
commit c241a72c60
1 changed files with 253 additions and 9 deletions
--- a/app/services/sound_normalizer_service.py
+++ b/app/services/sound_normalizer_service.py
@@ -1,7 +1,9 @@
 """Sound normalization service using ffmpeg loudnorm filter."""

 import hashlib
+import json
 import logging
+import re
 from pathlib import Path

 import ffmpeg
@@ -36,12 +38,13 @@ class SoundNormalizerService:
    }

    @staticmethod
-    def normalize_sound(sound_id: int, overwrite: bool = False) -> dict:
+    def normalize_sound(sound_id: int, overwrite: bool = False, two_pass: bool = True) -> dict:
        """Normalize a specific sound file using ffmpeg loudnorm.

        Args:
            sound_id: ID of the sound to normalize
            overwrite: Whether to overwrite existing normalized file
+            two_pass: Whether to use two-pass normalization (default: True)

        Returns:
            dict: Result of the normalization operation
@@ -79,9 +82,14 @@ class SoundNormalizerService:
                f"Starting normalization of {sound.name} ({sound.filename})",
            )

+            if two_pass:
                result = SoundNormalizerService._normalize_with_ffmpeg(
                    str(source_path), str(normalized_path),
                )
+            else:
+                result = SoundNormalizerService._normalize_with_ffmpeg_single_pass(
+                    str(source_path), str(normalized_path),
+                )

            if result["success"]:
                # Calculate normalized file metadata
@@ -123,13 +131,14 @@ class SoundNormalizerService:

    @staticmethod
    def normalize_all_sounds(
-        overwrite: bool = False, limit: int = None,
+        overwrite: bool = False, limit: int = None, two_pass: bool = True,
    ) -> dict:
        """Normalize all soundboard files.

        Args:
            overwrite: Whether to overwrite existing normalized files
            limit: Maximum number of files to process (None for all)
+            two_pass: Whether to use two-pass normalization (default: True)

        Returns:
            dict: Summary of the normalization operation
@@ -162,7 +171,7 @@ class SoundNormalizerService:

            for sound in sounds:
                result = SoundNormalizerService.normalize_sound(
-                    sound.id, overwrite,
+                    sound.id, overwrite, two_pass,
                )
                processed += 1

@@ -201,7 +210,11 @@ class SoundNormalizerService:

    @staticmethod
    def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict:
-        """Run ffmpeg loudnorm on a single file using python-ffmpeg.
+        """Run ffmpeg loudnorm on a single file using two-pass normalization.
+
+        Two-pass normalization provides better quality by:
+        1. First pass: Analyze the audio to measure its characteristics
+        2. Second pass: Apply normalization using the measured parameters

        Args:
            source_path: Path to source audio file
@@ -215,7 +228,77 @@ class SoundNormalizerService:
            params = SoundNormalizerService.LOUDNORM_PARAMS

            logger.debug(
-                f"Running ffmpeg normalization: {source_path} -> {output_path}",
+                f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}",
+            )
+
+            # FIRST PASS: Analyze the audio to get optimal parameters
+            logger.debug("Starting first pass (analysis)")
+            
+            first_pass_result = SoundNormalizerService._run_first_pass(
+                source_path, params
+            )
+            
+            if not first_pass_result["success"]:
+                return first_pass_result
+
+            measured_params = first_pass_result["measured_params"]
+            
+            # SECOND PASS: Apply normalization using measured parameters
+            logger.debug("Starting second pass (normalization)")
+            
+            second_pass_result = SoundNormalizerService._run_second_pass(
+                source_path, output_path, params, measured_params
+            )
+
+            if not second_pass_result["success"]:
+                return second_pass_result
+
+            # Combine statistics from both passes
+            stats = {
+                **first_pass_result.get("stats", {}),
+                **second_pass_result.get("stats", {}),
+                "two_pass": True,
+                "measured_params": measured_params,
+            }
+
+            if not Path(output_path).exists():
+                return {
+                    "success": False,
+                    "error": "Output file was not created after second pass",
+                }
+
+            logger.debug("Two-pass normalization completed successfully")
+            return {"success": True, "stats": stats}
+
+        except ffmpeg.Error as e:
+            error_msg = (
+                f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
+            )
+            logger.error(error_msg)
+            return {"success": False, "error": error_msg}
+        except Exception as e:
+            logger.error(f"Error running two-pass ffmpeg normalization: {e}")
+            return {"success": False, "error": str(e)}
+
+    @staticmethod
+    def _normalize_with_ffmpeg_single_pass(source_path: str, output_path: str) -> dict:
+        """Run ffmpeg loudnorm on a single file using single-pass normalization.
+        
+        This is the legacy single-pass method for backward compatibility.
+
+        Args:
+            source_path: Path to source audio file
+            output_path: Path for normalized output file (will be WAV format)
+
+        Returns:
+            dict: Result with success status and loudnorm statistics
+
+        """
+        try:
+            params = SoundNormalizerService.LOUDNORM_PARAMS
+
+            logger.debug(
+                f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}",
            )

            # Create ffmpeg input stream
@@ -259,9 +342,170 @@ class SoundNormalizerService:
            logger.error(error_msg)
            return {"success": False, "error": error_msg}
        except Exception as e:
-            logger.error(f"Error running ffmpeg: {e}")
+            logger.error(f"Error running single-pass ffmpeg: {e}")
            return {"success": False, "error": str(e)}

+    @staticmethod
+    def _run_first_pass(source_path: str, params: dict) -> dict:
+        """Run first pass of loudnorm to analyze audio characteristics.
+        
+        Args:
+            source_path: Path to source audio file
+            params: Loudnorm target parameters
+            
+        Returns:
+            dict: Result with measured parameters and analysis stats
+        """
+        try:
+            # Create ffmpeg input stream
+            input_stream = ffmpeg.input(source_path)
+
+            # First pass: analyze only, output to null
+            loudnorm_filter = (
+                f"loudnorm=I={params['integrated']}:"
+                f"TP={params['true_peak']}:"
+                f"LRA={params['lra']}:"
+                f"print_format=json"
+            )
+
+            # Output to null device for analysis
+            output_stream = ffmpeg.output(
+                input_stream,
+                "/dev/null",
+                af=loudnorm_filter,
+                f="null"
+            )
+
+            # Run the first pass
+            out, err = ffmpeg.run(
+                output_stream, capture_stdout=True, capture_stderr=True,
+            )
+
+            stderr_text = err.decode() if err else ""
+            
+            # Parse measured parameters from JSON output
+            measured_params = SoundNormalizerService._parse_measured_params(stderr_text)
+            
+            if not measured_params:
+                return {
+                    "success": False,
+                    "error": "Failed to parse measured parameters from first pass"
+                }
+
+            # Parse basic stats
+            stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
+            
+            return {
+                "success": True,
+                "measured_params": measured_params,
+                "stats": stats
+            }
+
+        except ffmpeg.Error as e:
+            error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
+            logger.error(error_msg)
+            return {"success": False, "error": error_msg}
+        except Exception as e:
+            logger.error(f"Error in first pass: {e}")
+            return {"success": False, "error": str(e)}
+
+    @staticmethod
+    def _run_second_pass(source_path: str, output_path: str, target_params: dict, measured_params: dict) -> dict:
+        """Run second pass of loudnorm using measured parameters.
+        
+        Args:
+            source_path: Path to source audio file
+            output_path: Path for normalized output file
+            target_params: Target loudnorm parameters
+            measured_params: Parameters measured from first pass
+            
+        Returns:
+            dict: Result with normalization stats
+        """
+        try:
+            # Create ffmpeg input stream
+            input_stream = ffmpeg.input(source_path)
+
+            # Second pass: normalize using measured parameters
+            loudnorm_filter = (
+                f"loudnorm=I={target_params['integrated']}:"
+                f"TP={target_params['true_peak']}:"
+                f"LRA={target_params['lra']}:"
+                f"measured_I={measured_params['input_i']}:"
+                f"measured_TP={measured_params['input_tp']}:"
+                f"measured_LRA={measured_params['input_lra']}:"
+                f"measured_thresh={measured_params['input_thresh']}:"
+                f"offset={measured_params['target_offset']}:"
+                f"linear=true:"
+                f"print_format=summary"
+            )
+
+            # Create output stream with WAV format
+            output_stream = ffmpeg.output(
+                input_stream,
+                output_path,
+                acodec="pcm_s16le",  # 16-bit PCM for WAV
+                ar=44100,  # 44.1kHz sample rate
+                af=loudnorm_filter,
+                y=None,  # Overwrite output file
+            )
+
+            # Run the second pass
+            out, err = ffmpeg.run(
+                output_stream, capture_stdout=True, capture_stderr=True,
+            )
+
+            stderr_text = err.decode() if err else ""
+            
+            # Parse final statistics
+            stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text)
+            
+            return {
+                "success": True,
+                "stats": stats
+            }
+
+        except ffmpeg.Error as e:
+            error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}"
+            logger.error(error_msg)
+            return {"success": False, "error": error_msg}
+        except Exception as e:
+            logger.error(f"Error in second pass: {e}")
+            return {"success": False, "error": str(e)}
+
+    @staticmethod
+    def _parse_measured_params(stderr_output: str) -> dict:
+        """Parse measured parameters from first pass JSON output.
+        
+        Args:
+            stderr_output: ffmpeg stderr output containing JSON data
+            
+        Returns:
+            dict: Parsed measured parameters, empty if parsing fails
+        """
+        try:
+            # Find JSON block in stderr output
+            json_match = re.search(r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL)
+            if not json_match:
+                logger.warning("No JSON block found in first pass output")
+                return {}
+            
+            json_str = json_match.group(0)
+            measured_data = json.loads(json_str)
+            
+            # Extract required parameters
+            return {
+                "input_i": measured_data.get("input_i", 0),
+                "input_tp": measured_data.get("input_tp", 0),
+                "input_lra": measured_data.get("input_lra", 0),
+                "input_thresh": measured_data.get("input_thresh", 0),
+                "target_offset": measured_data.get("target_offset", 0),
+            }
+            
+        except (json.JSONDecodeError, KeyError, AttributeError) as e:
+            logger.warning(f"Failed to parse measured parameters: {e}")
+            return {}
+
    @staticmethod
    def _parse_loudnorm_stats(stderr_output: str) -> dict:
        """Parse loudnorm statistics from ffmpeg stderr output.