Files
sdb2-backend/app/services/sound_normalizer.py
2025-08-01 02:08:36 +02:00

589 lines
21 KiB
Python

"""Sound normalizer service for normalizing audio files using ffmpeg loudnorm."""
import asyncio
import json
import os
import re
from pathlib import Path
from typing import TypedDict
import ffmpeg # type: ignore[import-untyped]
from sqlmodel.ext.asyncio.session import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.models.sound import Sound
from app.repositories.sound import SoundRepository
from app.utils.audio import get_audio_duration, get_file_hash, get_file_size
logger = get_logger(__name__)
class NormalizationInfo(TypedDict):
"""Type definition for normalization information in results."""
filename: str
status: str
reason: str | None
original_path: str | None
normalized_path: str | None
normalized_filename: str | None
normalized_duration: int | None
normalized_size: int | None
normalized_hash: str | None
id: int | None
error: str | None
class NormalizationResults(TypedDict):
"""Type definition for normalization results."""
processed: int
normalized: int
skipped: int
errors: int
files: list[NormalizationInfo]
class SoundNormalizerService:
"""Service for normalizing audio files using ffmpeg loudnorm."""
def __init__(self, session: AsyncSession) -> None:
"""Initialize the sound normalizer service."""
self.session = session
self.sound_repo = SoundRepository(session)
# Normalization settings from config
self.output_format = settings.NORMALIZED_AUDIO_FORMAT
self.output_bitrate = settings.NORMALIZED_AUDIO_BITRATE
self.passes = settings.NORMALIZED_AUDIO_PASSES
# Directory mappings for different sound types
self.type_directories = {
"SDB": "sounds/normalized/soundboard",
"TTS": "sounds/normalized/text_to_speech",
"EXT": "sounds/normalized/extracted",
}
# Ensure normalized directories exist
self._ensure_directories()
def _ensure_directories(self) -> None:
"""Ensure all normalized sound directories exist."""
for directory in self.type_directories.values():
Path(directory).mkdir(parents=True, exist_ok=True)
logger.debug("Ensured directory exists: %s", directory)
def _get_normalized_path(self, sound: Sound) -> Path:
"""Get the normalized file path for a sound."""
return self._get_normalized_path_from_data(sound.type, sound.filename)
def _get_normalized_path_from_data(self, sound_type: str, filename: str) -> Path:
"""Get the normalized file path from sound data."""
# Get the appropriate directory for the sound type
directory = self.type_directories.get(sound_type, "sounds/normalized/other")
# Create the directory if it doesn't exist
Path(directory).mkdir(parents=True, exist_ok=True)
# Generate filename: original_name.{format}
original_stem = Path(filename).stem
normalized_filename = f"{original_stem}.{self.output_format}"
return Path(directory) / normalized_filename
def _get_original_path(self, sound: Sound) -> Path:
"""Get the original file path for a sound."""
return self._get_original_path_from_data(sound.type, sound.filename)
def _get_original_path_from_data(self, sound_type: str, filename: str) -> Path:
"""Get the original file path from sound data."""
# Map sound types to their original directories
type_to_original_dir = {
"SDB": "sounds/originals/soundboard",
"TTS": "sounds/originals/text_to_speech",
"EXT": "sounds/originals/extracted",
}
original_dir = type_to_original_dir.get(sound_type, "sounds/originals/other")
return Path(original_dir) / filename
async def _normalize_audio_one_pass(
self,
input_path: Path,
output_path: Path,
) -> None:
"""Normalize audio using one-pass loudnorm."""
try:
logger.info(
"Starting one-pass normalization: %s -> %s",
input_path,
output_path,
)
stream = ffmpeg.input(str(input_path))
stream = ffmpeg.filter(stream, "loudnorm", I=-23, TP=-2, LRA=7)
# Apply output format and bitrate
output_args = {}
if self.output_format == "mp3":
output_args["acodec"] = "libmp3lame"
output_args["audio_bitrate"] = self.output_bitrate
elif self.output_format == "aac":
output_args["acodec"] = "aac"
output_args["audio_bitrate"] = self.output_bitrate
elif self.output_format == "opus":
output_args["acodec"] = "libopus"
output_args["audio_bitrate"] = self.output_bitrate
stream = ffmpeg.output(stream, str(output_path), **output_args)
stream = ffmpeg.overwrite_output(stream)
await asyncio.to_thread(
ffmpeg.run, stream, quiet=True, overwrite_output=True,
)
logger.info("One-pass normalization completed: %s", output_path)
except Exception:
logger.exception("One-pass normalization failed for %s", input_path)
raise
async def _normalize_audio_two_pass(
self,
input_path: Path,
output_path: Path,
) -> None:
"""Normalize audio using two-pass loudnorm for better quality."""
try:
logger.info(
"Starting two-pass normalization: %s -> %s",
input_path,
output_path,
)
# First pass: analyze
logger.debug("First pass: analyzing %s", input_path)
stream = ffmpeg.input(str(input_path))
stream = ffmpeg.filter(
stream,
"loudnorm",
I=-23,
TP=-2,
LRA=7,
print_format="json",
)
# Output to null device with explicit format
null_output = "/dev/null" if os.name != "nt" else "NUL"
stream = ffmpeg.output(stream, null_output, format="null")
# Run first pass and capture output
try:
result = await asyncio.to_thread(
ffmpeg.run, stream, capture_stderr=True, quiet=True,
)
analysis_output = result[1].decode("utf-8")
except ffmpeg.Error as e:
logger.exception(
"FFmpeg first pass failed for %s. Stdout: %s, Stderr: %s",
input_path,
e.stdout.decode() if e.stdout else "None",
e.stderr.decode() if e.stderr else "None",
)
raise
# Extract loudnorm measurements from the output
# The JSON output is at the end of stderr
logger.debug("Loudnorm analysis output: %s", analysis_output)
# Find JSON in the output
json_match = re.search(r'\{[^{}]*"input_i"[^{}]*\}', analysis_output)
if not json_match:
logger.error(
"Could not find JSON in loudnorm output: %s",
analysis_output,
)
msg = "Could not find JSON in loudnorm output"
raise ValueError(msg)
logger.debug("Found JSON match: %s", json_match.group())
analysis_data = json.loads(json_match.group())
# Check for invalid values that would cause second pass to fail
invalid_values = ["-inf", "inf", "nan"]
for key in [
"input_i",
"input_lra",
"input_tp",
"input_thresh",
"target_offset",
]:
if str(analysis_data.get(key, "")).lower() in invalid_values:
logger.warning(
(
"Invalid analysis value for %s: %s. "
"Falling back to one-pass normalization."
),
key,
analysis_data.get(key),
)
# Fall back to one-pass normalization
await self._normalize_audio_one_pass(input_path, output_path)
return
# Second pass: normalize with measured values
logger.debug("Second pass: normalizing %s with measured values", input_path)
stream = ffmpeg.input(str(input_path))
stream = ffmpeg.filter(
stream,
"loudnorm",
measured_I=analysis_data["input_i"],
measured_LRA=analysis_data["input_lra"],
measured_TP=analysis_data["input_tp"],
measured_thresh=analysis_data["input_thresh"],
offset=analysis_data["target_offset"],
)
# Apply output format and bitrate
output_args = {}
if self.output_format == "mp3":
output_args["acodec"] = "libmp3lame"
output_args["audio_bitrate"] = self.output_bitrate
elif self.output_format == "aac":
output_args["acodec"] = "aac"
output_args["audio_bitrate"] = self.output_bitrate
elif self.output_format == "opus":
output_args["acodec"] = "libopus"
output_args["audio_bitrate"] = self.output_bitrate
stream = ffmpeg.output(stream, str(output_path), **output_args)
stream = ffmpeg.overwrite_output(stream)
try:
await asyncio.to_thread(
ffmpeg.run, stream, quiet=True, overwrite_output=True,
)
logger.info("Two-pass normalization completed: %s", output_path)
except ffmpeg.Error as e:
logger.exception(
"FFmpeg second pass failed for %s. Stdout: %s, Stderr: %s",
input_path,
e.stdout.decode() if e.stdout else "None",
e.stderr.decode() if e.stderr else "None",
)
raise
except Exception:
logger.exception("Two-pass normalization failed for %s", input_path)
raise
async def normalize_sound(
self,
sound: Sound,
*,
force: bool = False,
one_pass: bool | None = None,
sound_data: dict | None = None,
) -> NormalizationInfo:
"""Normalize a single sound."""
# Use provided sound_data to avoid detached instance issues,
# or capture from sound
if sound_data:
filename = sound_data["filename"]
sound_id = sound_data["id"]
is_normalized = sound_data["is_normalized"]
sound_type = sound_data["type"]
else:
# Fallback to accessing sound properties (for single sound normalization)
filename = sound.filename
sound_id = sound.id
is_normalized = sound.is_normalized
sound_type = sound.type
# Check if already normalized and not forcing
if is_normalized and not force:
return {
"filename": filename,
"status": "skipped",
"reason": "already normalized",
"original_path": None,
"normalized_path": None,
"normalized_filename": None,
"normalized_duration": None,
"normalized_size": None,
"normalized_hash": None,
"id": sound_id,
"error": None,
}
try:
# Get paths using captured data to avoid accessing sound properties
original_path = self._get_original_path_from_data(sound_type, filename)
normalized_path = self._get_normalized_path_from_data(sound_type, filename)
# Check if original file exists
if not original_path.exists():
error_msg = f"Original file not found: {original_path}"
logger.error(error_msg)
return {
"filename": filename,
"status": "error",
"reason": None,
"original_path": str(original_path),
"normalized_path": None,
"normalized_filename": None,
"normalized_duration": None,
"normalized_size": None,
"normalized_hash": None,
"id": sound_id,
"error": error_msg,
}
# Determine which normalization method to use
use_one_pass = one_pass if one_pass is not None else (self.passes == 1)
# Perform normalization
if use_one_pass:
await self._normalize_audio_one_pass(original_path, normalized_path)
else:
await self._normalize_audio_two_pass(original_path, normalized_path)
# Get normalized file info
normalized_duration = get_audio_duration(normalized_path)
normalized_size = get_file_size(normalized_path)
normalized_hash = get_file_hash(normalized_path)
normalized_filename = normalized_path.name
# Update sound in database
update_data = {
"normalized_filename": normalized_filename,
"normalized_duration": normalized_duration,
"normalized_size": normalized_size,
"normalized_hash": normalized_hash,
"is_normalized": True,
}
await self.sound_repo.update(sound, update_data)
logger.info("Normalized sound: %s -> %s", filename, normalized_filename)
return {
"filename": filename,
"status": "normalized",
"reason": None,
"original_path": str(original_path),
"normalized_path": str(normalized_path),
"normalized_filename": normalized_filename,
"normalized_duration": normalized_duration,
"normalized_size": normalized_size,
"normalized_hash": normalized_hash,
"id": sound_id,
"error": None,
}
except Exception as e:
error_msg = str(e)
logger.exception(
"Failed to normalize sound %s",
filename,
)
return {
"filename": filename,
"status": "error",
"reason": None,
"original_path": (
str(original_path) if "original_path" in locals() else None
),
"normalized_path": (
str(normalized_path) if "normalized_path" in locals() else None
),
"normalized_filename": None,
"normalized_duration": None,
"normalized_size": None,
"normalized_hash": None,
"id": sound_id,
"error": error_msg,
}
async def normalize_all_sounds(
self,
*,
force: bool = False,
one_pass: bool | None = None,
) -> NormalizationResults:
"""Normalize all unnormalized sounds."""
logger.info("Starting normalization of all sounds")
results: NormalizationResults = {
"processed": 0,
"normalized": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
# Get sounds to normalize
if force:
# Get all sounds if forcing
sounds = []
for sound_type in self.type_directories:
type_sounds = await self.sound_repo.get_by_type(sound_type)
sounds.extend(type_sounds)
else:
# Get only unnormalized sounds
sounds = await self.sound_repo.get_unnormalized_sounds()
logger.info("Found %d sounds to process", len(sounds))
# Capture all sound data upfront to avoid session detachment issues
sound_data_list = [
{
"id": sound.id,
"filename": sound.filename,
"type": sound.type,
"is_normalized": sound.is_normalized,
"name": sound.name,
}
for sound in sounds
]
# Process each sound using captured data
for i, sound in enumerate(sounds):
results["processed"] += 1
# Use captured data to avoid detached instance issues
sound_data = sound_data_list[i]
sound_id = sound_data["id"]
sound_filename = sound_data["filename"]
try:
normalization_info = await self.normalize_sound(
sound,
force=force,
one_pass=one_pass,
sound_data=sound_data,
)
results["files"].append(normalization_info)
if normalization_info["status"] == "normalized":
results["normalized"] += 1
elif normalization_info["status"] == "skipped":
results["skipped"] += 1
elif normalization_info["status"] == "error":
results["errors"] += 1
except Exception as e:
logger.exception(
"Unexpected error processing sound %s",
sound_filename,
)
results["errors"] += 1
results["files"].append(
{
"filename": sound_filename,
"status": "error",
"reason": None,
"original_path": None,
"normalized_path": None,
"normalized_filename": None,
"normalized_duration": None,
"normalized_size": None,
"normalized_hash": None,
"id": sound_id,
"error": str(e),
},
)
logger.info("Normalization completed: %s", results)
return results
async def normalize_sounds_by_type(
self,
sound_type: str,
*,
force: bool = False,
one_pass: bool | None = None,
) -> NormalizationResults:
"""Normalize all sounds of a specific type."""
logger.info("Starting normalization of %s sounds", sound_type)
results: NormalizationResults = {
"processed": 0,
"normalized": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
# Get sounds to normalize
if force:
sounds = await self.sound_repo.get_by_type(sound_type)
else:
sounds = await self.sound_repo.get_unnormalized_sounds_by_type(sound_type)
logger.info("Found %d %s sounds to process", len(sounds), sound_type)
# Capture all sound data upfront to avoid session detachment issues
sound_data_list = [
{
"id": sound.id,
"filename": sound.filename,
"type": sound.type,
"is_normalized": sound.is_normalized,
"name": sound.name,
}
for sound in sounds
]
# Process each sound using captured data
for i, sound in enumerate(sounds):
results["processed"] += 1
# Use captured data to avoid detached instance issues
sound_data = sound_data_list[i]
sound_id = sound_data["id"]
sound_filename = sound_data["filename"]
try:
normalization_info = await self.normalize_sound(
sound,
force=force,
one_pass=one_pass,
sound_data=sound_data,
)
results["files"].append(normalization_info)
if normalization_info["status"] == "normalized":
results["normalized"] += 1
elif normalization_info["status"] == "skipped":
results["skipped"] += 1
elif normalization_info["status"] == "error":
results["errors"] += 1
except Exception as e:
logger.exception(
"Unexpected error processing sound %s",
sound_filename,
)
results["errors"] += 1
results["files"].append(
{
"filename": sound_filename,
"status": "error",
"reason": None,
"original_path": None,
"normalized_path": None,
"normalized_filename": None,
"normalized_duration": None,
"normalized_size": None,
"normalized_hash": None,
"id": sound_id,
"error": str(e),
},
)
logger.info("Type normalization completed: %s", results)
return results