sdb2-backend/app/services/sound_normalizer.py

"""Sound normalizer service for normalizing audio files using ffmpeg loudnorm."""

import hashlib
import json
import os
import re
from pathlib import Path
from typing import TypedDict

import ffmpeg  # type: ignore[import-untyped]
from sqlmodel.ext.asyncio.session import AsyncSession

from app.core.config import settings
from app.core.logging import get_logger
from app.models.sound import Sound
from app.repositories.sound import SoundRepository

logger = get_logger(__name__)


class NormalizationInfo(TypedDict):
    """Type definition for normalization information in results."""

    filename: str
    status: str
    reason: str | None
    original_path: str | None
    normalized_path: str | None
    normalized_filename: str | None
    normalized_duration: int | None
    normalized_size: int | None
    normalized_hash: str | None
    id: int | None
    error: str | None


class NormalizationResults(TypedDict):
    """Type definition for normalization results."""

    processed: int
    normalized: int
    skipped: int
    errors: int
    files: list[NormalizationInfo]


class SoundNormalizerService:
    """Service for normalizing audio files using ffmpeg loudnorm."""

    def __init__(self, session: AsyncSession) -> None:
        """Initialize the sound normalizer service."""
        self.session = session
        self.sound_repo = SoundRepository(session)

        # Normalization settings from config
        self.output_format = settings.NORMALIZED_AUDIO_FORMAT
        self.output_bitrate = settings.NORMALIZED_AUDIO_BITRATE
        self.passes = settings.NORMALIZED_AUDIO_PASSES

        # Directory mappings for different sound types
        self.type_directories = {
            "SDB": "sounds/normalized/soundboard",
            "TTS": "sounds/normalized/text_to_speech",
            "EXT": "sounds/normalized/extracted",
        }

        # Ensure normalized directories exist
        self._ensure_directories()

    def _ensure_directories(self) -> None:
        """Ensure all normalized sound directories exist."""
        for directory in self.type_directories.values():
            Path(directory).mkdir(parents=True, exist_ok=True)
            logger.debug("Ensured directory exists: %s", directory)

    def _get_normalized_path(self, sound: Sound) -> Path:
        """Get the normalized file path for a sound."""
        return self._get_normalized_path_from_data(sound.type, sound.filename)

    def _get_normalized_path_from_data(self, sound_type: str, filename: str) -> Path:
        """Get the normalized file path from sound data."""
        # Get the appropriate directory for the sound type
        directory = self.type_directories.get(sound_type, "sounds/normalized/other")

        # Create the directory if it doesn't exist
        Path(directory).mkdir(parents=True, exist_ok=True)

        # Generate filename: original_name.{format}
        original_stem = Path(filename).stem
        normalized_filename = f"{original_stem}.{self.output_format}"

        return Path(directory) / normalized_filename

    def _get_original_path(self, sound: Sound) -> Path:
        """Get the original file path for a sound."""
        return self._get_original_path_from_data(sound.type, sound.filename)

    def _get_original_path_from_data(self, sound_type: str, filename: str) -> Path:
        """Get the original file path from sound data."""
        # Map sound types to their original directories
        type_to_original_dir = {
            "SDB": "sounds/originals/soundboard",
            "TTS": "sounds/originals/text_to_speech",
            "EXT": "sounds/originals/extracted",
        }

        original_dir = type_to_original_dir.get(sound_type, "sounds/originals/other")
        return Path(original_dir) / filename

    def _get_file_hash(self, file_path: Path) -> str:
        """Calculate SHA-256 hash of a file."""
        hash_sha256 = hashlib.sha256()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_sha256.update(chunk)
        return hash_sha256.hexdigest()

    def _get_file_size(self, file_path: Path) -> int:
        """Get file size in bytes."""
        return file_path.stat().st_size

    def _get_audio_duration(self, file_path: Path) -> int:
        """Get audio duration in milliseconds using ffmpeg."""
        try:
            probe = ffmpeg.probe(str(file_path))
            duration = float(probe["format"]["duration"])
            return int(duration * 1000)  # Convert to milliseconds
        except Exception as e:
            logger.warning("Failed to get duration for %s: %s", file_path, e)
            return 0

    async def _normalize_audio_one_pass(
        self,
        input_path: Path,
        output_path: Path,
    ) -> None:
        """Normalize audio using one-pass loudnorm."""
        try:
            logger.info(
                "Starting one-pass normalization: %s -> %s",
                input_path,
                output_path,
            )

            stream = ffmpeg.input(str(input_path))
            stream = ffmpeg.filter(stream, "loudnorm", I=-23, TP=-2, LRA=7)

            # Apply output format and bitrate
            output_args = {}
            if self.output_format == "mp3":
                output_args["acodec"] = "libmp3lame"
                output_args["audio_bitrate"] = self.output_bitrate
            elif self.output_format == "aac":
                output_args["acodec"] = "aac"
                output_args["audio_bitrate"] = self.output_bitrate
            elif self.output_format == "opus":
                output_args["acodec"] = "libopus"
                output_args["audio_bitrate"] = self.output_bitrate

            stream = ffmpeg.output(stream, str(output_path), **output_args)
            stream = ffmpeg.overwrite_output(stream)

            ffmpeg.run(stream, quiet=True, overwrite_output=True)
            logger.info("One-pass normalization completed: %s", output_path)

        except Exception as e:
            logger.exception("One-pass normalization failed for %s", input_path)
            raise

    async def _normalize_audio_two_pass(
        self,
        input_path: Path,
        output_path: Path,
    ) -> None:
        """Normalize audio using two-pass loudnorm for better quality."""
        try:
            logger.info(
                "Starting two-pass normalization: %s -> %s", input_path, output_path
            )

            # First pass: analyze
            logger.debug("First pass: analyzing %s", input_path)

            stream = ffmpeg.input(str(input_path))
            stream = ffmpeg.filter(
                stream,
                "loudnorm",
                I=-23,
                TP=-2,
                LRA=7,
                print_format="json",
            )
            # Output to null device with explicit format
            null_output = "/dev/null" if os.name != "nt" else "NUL"
            stream = ffmpeg.output(stream, null_output, format="null")

            # Run first pass and capture output
            try:
                result = ffmpeg.run(stream, capture_stderr=True, quiet=True)
                analysis_output = result[1].decode("utf-8")
            except ffmpeg.Error as e:
                logger.error("FFmpeg first pass failed for %s. Stdout: %s, Stderr: %s",
                           input_path, e.stdout.decode() if e.stdout else "None",
                           e.stderr.decode() if e.stderr else "None")
                raise

            # Extract loudnorm measurements from the output
            # The JSON output is at the end of stderr
            logger.debug("Loudnorm analysis output: %s", analysis_output)

            # Find JSON in the output
            json_match = re.search(r'\{[^{}]*"input_i"[^{}]*\}', analysis_output)
            if not json_match:
                logger.error("Could not find JSON in loudnorm output: %s", analysis_output)
                raise ValueError("Could not extract loudnorm analysis data")

            logger.debug("Found JSON match: %s", json_match.group())
            analysis_data = json.loads(json_match.group())

            # Check for invalid values that would cause second pass to fail
            invalid_values = ["-inf", "inf", "nan"]
            for key in ["input_i", "input_lra", "input_tp", "input_thresh", "target_offset"]:
                if str(analysis_data.get(key, "")).lower() in invalid_values:
                    logger.warning(
                        "Invalid analysis value for %s: %s. Falling back to one-pass normalization.",
                        key, analysis_data.get(key)
                    )
                    # Fall back to one-pass normalization
                    await self._normalize_audio_one_pass(input_path, output_path)
                    return

            # Second pass: normalize with measured values
            logger.debug("Second pass: normalizing %s with measured values", input_path)

            stream = ffmpeg.input(str(input_path))
            stream = ffmpeg.filter(
                stream,
                "loudnorm",
                measured_I=analysis_data["input_i"],
                measured_LRA=analysis_data["input_lra"],
                measured_TP=analysis_data["input_tp"],
                measured_thresh=analysis_data["input_thresh"],
                offset=analysis_data["target_offset"],
            )

            # Apply output format and bitrate
            output_args = {}
            if self.output_format == "mp3":
                output_args["acodec"] = "libmp3lame"
                output_args["audio_bitrate"] = self.output_bitrate
            elif self.output_format == "aac":
                output_args["acodec"] = "aac"
                output_args["audio_bitrate"] = self.output_bitrate
            elif self.output_format == "opus":
                output_args["acodec"] = "libopus"
                output_args["audio_bitrate"] = self.output_bitrate

            stream = ffmpeg.output(stream, str(output_path), **output_args)
            stream = ffmpeg.overwrite_output(stream)

            try:
                ffmpeg.run(stream, quiet=True, overwrite_output=True)
                logger.info("Two-pass normalization completed: %s", output_path)
            except ffmpeg.Error as e:
                logger.error("FFmpeg second pass failed for %s. Stdout: %s, Stderr: %s",
                           input_path, e.stdout.decode() if e.stdout else "None",
                           e.stderr.decode() if e.stderr else "None")
                raise

        except Exception as e:
            logger.exception("Two-pass normalization failed for %s", input_path)
            raise

    async def normalize_sound(
        self,
        sound: Sound,
        force: bool = False,
        one_pass: bool | None = None,
        sound_data: dict | None = None,
    ) -> NormalizationInfo:
        """Normalize a single sound."""
        # Use provided sound_data to avoid detached instance issues, or capture from sound
        if sound_data:
            filename = sound_data["filename"]
            sound_id = sound_data["id"]
            is_normalized = sound_data["is_normalized"]
            sound_type = sound_data["type"]
        else:
            # Fallback to accessing sound properties (for single sound normalization)
            filename = sound.filename
            sound_id = sound.id
            is_normalized = sound.is_normalized
            sound_type = sound.type

        # Check if already normalized and not forcing
        if is_normalized and not force:
            return {
                "filename": filename,
                "status": "skipped",
                "reason": "already normalized",
                "original_path": None,
                "normalized_path": None,
                "normalized_filename": None,
                "normalized_duration": None,
                "normalized_size": None,
                "normalized_hash": None,
                "id": sound_id,
                "error": None,
            }

        try:
            # Get paths using captured data to avoid accessing sound properties
            original_path = self._get_original_path_from_data(sound_type, filename)
            normalized_path = self._get_normalized_path_from_data(sound_type, filename)

            # Check if original file exists
            if not original_path.exists():
                error_msg = f"Original file not found: {original_path}"
                logger.error(error_msg)
                return {
                    "filename": filename,
                    "status": "error",
                    "reason": None,
                    "original_path": str(original_path),
                    "normalized_path": None,
                    "normalized_filename": None,
                    "normalized_duration": None,
                    "normalized_size": None,
                    "normalized_hash": None,
                    "id": sound_id,
                    "error": error_msg,
                }

            # Determine which normalization method to use
            use_one_pass = one_pass if one_pass is not None else (self.passes == 1)

            # Perform normalization
            if use_one_pass:
                await self._normalize_audio_one_pass(original_path, normalized_path)
            else:
                await self._normalize_audio_two_pass(original_path, normalized_path)

            # Get normalized file info
            normalized_duration = self._get_audio_duration(normalized_path)
            normalized_size = self._get_file_size(normalized_path)
            normalized_hash = self._get_file_hash(normalized_path)
            normalized_filename = normalized_path.name

            # Update sound in database
            update_data = {
                "normalized_filename": normalized_filename,
                "normalized_duration": normalized_duration,
                "normalized_size": normalized_size,
                "normalized_hash": normalized_hash,
                "is_normalized": True,
            }

            await self.sound_repo.update(sound, update_data)
            logger.info("Normalized sound: %s -> %s", filename, normalized_filename)

            return {
                "filename": filename,
                "status": "normalized",
                "reason": None,
                "original_path": str(original_path),
                "normalized_path": str(normalized_path),
                "normalized_filename": normalized_filename,
                "normalized_duration": normalized_duration,
                "normalized_size": normalized_size,
                "normalized_hash": normalized_hash,
                "id": sound_id,
                "error": None,
            }

        except Exception as e:
            error_msg = str(e)
            logger.exception(
                "Failed to normalize sound %s",
                filename,
            )
            return {
                "filename": filename,
                "status": "error",
                "reason": None,
                "original_path": (
                    str(original_path) if "original_path" in locals() else None
                ),
                "normalized_path": (
                    str(normalized_path) if "normalized_path" in locals() else None
                ),
                "normalized_filename": None,
                "normalized_duration": None,
                "normalized_size": None,
                "normalized_hash": None,
                "id": sound_id,
                "error": error_msg,
            }

    async def normalize_all_sounds(
        self,
        force: bool = False,
        one_pass: bool | None = None,
    ) -> NormalizationResults:
        """Normalize all unnormalized sounds."""
        logger.info("Starting normalization of all sounds")

        results: NormalizationResults = {
            "processed": 0,
            "normalized": 0,
            "skipped": 0,
            "errors": 0,
            "files": [],
        }

        # Get sounds to normalize
        if force:
            # Get all sounds if forcing
            sounds = []
            for sound_type in self.type_directories.keys():
                type_sounds = await self.sound_repo.get_by_type(sound_type)
                sounds.extend(type_sounds)
        else:
            # Get only unnormalized sounds
            sounds = await self.sound_repo.get_unnormalized_sounds()

        logger.info("Found %d sounds to process", len(sounds))

        # Capture all sound data upfront to avoid session detachment issues
        sound_data_list = []
        for sound in sounds:
            sound_data_list.append(
                {
                    "id": sound.id,
                    "filename": sound.filename,
                    "type": sound.type,
                    "is_normalized": sound.is_normalized,
                    "name": sound.name,
                }
            )

        # Process each sound using captured data
        for i, sound in enumerate(sounds):
            results["processed"] += 1

            # Use captured data to avoid detached instance issues
            sound_data = sound_data_list[i]
            sound_id = sound_data["id"]
            sound_filename = sound_data["filename"]

            try:
                normalization_info = await self.normalize_sound(
                    sound,
                    force=force,
                    one_pass=one_pass,
                    sound_data=sound_data,
                )

                results["files"].append(normalization_info)

                if normalization_info["status"] == "normalized":
                    results["normalized"] += 1
                elif normalization_info["status"] == "skipped":
                    results["skipped"] += 1
                elif normalization_info["status"] == "error":
                    results["errors"] += 1

            except Exception as e:
                logger.exception(
                    "Unexpected error processing sound %s",
                    sound_filename,
                )
                results["errors"] += 1
                results["files"].append(
                    {
                        "filename": sound_filename,
                        "status": "error",
                        "reason": None,
                        "original_path": None,
                        "normalized_path": None,
                        "normalized_filename": None,
                        "normalized_duration": None,
                        "normalized_size": None,
                        "normalized_hash": None,
                        "id": sound_id,
                        "error": str(e),
                    }
                )

        logger.info("Normalization completed: %s", results)
        return results

    async def normalize_sounds_by_type(
        self,
        sound_type: str,
        force: bool = False,
        one_pass: bool | None = None,
    ) -> NormalizationResults:
        """Normalize all sounds of a specific type."""
        logger.info("Starting normalization of %s sounds", sound_type)

        results: NormalizationResults = {
            "processed": 0,
            "normalized": 0,
            "skipped": 0,
            "errors": 0,
            "files": [],
        }

        # Get sounds to normalize
        if force:
            sounds = await self.sound_repo.get_by_type(sound_type)
        else:
            sounds = await self.sound_repo.get_unnormalized_sounds_by_type(sound_type)

        logger.info("Found %d %s sounds to process", len(sounds), sound_type)

        # Capture all sound data upfront to avoid session detachment issues
        sound_data_list = []
        for sound in sounds:
            sound_data_list.append(
                {
                    "id": sound.id,
                    "filename": sound.filename,
                    "type": sound.type,
                    "is_normalized": sound.is_normalized,
                    "name": sound.name,
                }
            )

        # Process each sound using captured data
        for i, sound in enumerate(sounds):
            results["processed"] += 1

            # Use captured data to avoid detached instance issues
            sound_data = sound_data_list[i]
            sound_id = sound_data["id"]
            sound_filename = sound_data["filename"]

            try:
                normalization_info = await self.normalize_sound(
                    sound,
                    force=force,
                    one_pass=one_pass,
                    sound_data=sound_data,
                )

                results["files"].append(normalization_info)

                if normalization_info["status"] == "normalized":
                    results["normalized"] += 1
                elif normalization_info["status"] == "skipped":
                    results["skipped"] += 1
                elif normalization_info["status"] == "error":
                    results["errors"] += 1

            except Exception as e:
                logger.exception(
                    "Unexpected error processing sound %s",
                    sound_filename,
                )
                results["errors"] += 1
                results["files"].append(
                    {
                        "filename": sound_filename,
                        "status": "error",
                        "reason": None,
                        "original_path": None,
                        "normalized_path": None,
                        "normalized_filename": None,
                        "normalized_duration": None,
                        "normalized_size": None,
                        "normalized_hash": None,
                        "id": sound_id,
                        "error": str(e),
                    }
                )

        logger.info("Type normalization completed: %s", results)
        return results