"""Sound normalization service using ffmpeg loudnorm filter.""" import hashlib import json import logging import re from pathlib import Path import ffmpeg from pydub import AudioSegment from app.database import db from app.models.sound import Sound logger = logging.getLogger(__name__) class SoundNormalizerService: """Service for normalizing sound files using ffmpeg loudnorm.""" SUPPORTED_EXTENSIONS = { ".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac", ".opus", } SOUNDS_DIR = "sounds/soundboard" NORMALIZED_DIR = "sounds/normalized/soundboard" LOUDNORM_PARAMS = { "integrated": -16, "true_peak": -1.5, "lra": 11.0, "print_format": "summary", } @staticmethod def normalize_sound( sound_id: int, overwrite: bool = False, two_pass: bool = True, ) -> dict: """Normalize a specific sound file using ffmpeg loudnorm. Args: sound_id: ID of the sound to normalize overwrite: Whether to overwrite existing normalized file two_pass: Whether to use two-pass normalization (default: True) Returns: dict: Result of the normalization operation """ try: sound = Sound.query.get(sound_id) if not sound: return { "success": False, "error": f"Sound with ID {sound_id} not found", } source_path = ( Path(SoundNormalizerService.SOUNDS_DIR) / sound.filename ) if not source_path.exists(): return { "success": False, "error": f"Source file not found: {source_path}", } # Always output as WAV regardless of input format filename_without_ext = Path(sound.filename).stem normalized_filename = f"{filename_without_ext}.wav" normalized_path = ( Path(SoundNormalizerService.NORMALIZED_DIR) / normalized_filename ) normalized_path.parent.mkdir(parents=True, exist_ok=True) if normalized_path.exists() and not overwrite: return { "success": False, "error": f"Normalized file already exists: {normalized_path}. Use overwrite=True to replace it.", } logger.info( f"Starting normalization of {sound.name} ({sound.filename})", ) if two_pass: result = SoundNormalizerService._normalize_with_ffmpeg( str(source_path), str(normalized_path), ) else: result = ( SoundNormalizerService._normalize_with_ffmpeg_single_pass( str(source_path), str(normalized_path), ) ) if result["success"]: # Calculate normalized file metadata normalized_metadata = ( SoundNormalizerService._get_normalized_metadata( str(normalized_path), ) ) # Update sound record with normalized information sound.set_normalized_info( normalized_filename=normalized_filename, normalized_duration=normalized_metadata["duration"], normalized_size=normalized_metadata["size"], normalized_hash=normalized_metadata["hash"], ) # Commit the database changes db.session.commit() logger.info(f"Successfully normalized {sound.name}") return { "success": True, "sound_id": sound_id, "sound_name": sound.name, "source_path": str(source_path), "normalized_path": str(normalized_path), "normalized_filename": normalized_filename, "normalized_duration": normalized_metadata["duration"], "normalized_size": normalized_metadata["size"], "normalized_hash": normalized_metadata["hash"], "loudnorm_stats": result.get("stats", {}), } return result except Exception as e: logger.error(f"Error normalizing sound {sound_id}: {e}") return {"success": False, "error": str(e)} @staticmethod def normalize_all_sounds( overwrite: bool = False, limit: int = None, two_pass: bool = True, ) -> dict: """Normalize all soundboard files. Args: overwrite: Whether to overwrite existing normalized files limit: Maximum number of files to process (None for all) two_pass: Whether to use two-pass normalization (default: True) Returns: dict: Summary of the normalization operation """ try: query = Sound.query.filter_by(type="SDB") if limit: query = query.limit(limit) sounds = query.all() if not sounds: return { "success": True, "message": "No soundboard files found to normalize", "processed": 0, "successful": 0, "failed": 0, "skipped": 0, } logger.info(f"Starting bulk normalization of {len(sounds)} sounds") processed = 0 successful = 0 failed = 0 skipped = 0 errors = [] for sound in sounds: result = SoundNormalizerService.normalize_sound( sound.id, overwrite, two_pass, ) processed += 1 if result["success"]: successful += 1 elif "already exists" in result.get("error", ""): skipped += 1 else: failed += 1 errors.append(f"{sound.name}: {result['error']}") logger.info( f"Bulk normalization completed: {successful} successful, {failed} failed, {skipped} skipped", ) return { "success": True, "message": f"Processed {processed} sounds: {successful} successful, {failed} failed, {skipped} skipped", "processed": processed, "successful": successful, "failed": failed, "skipped": skipped, "errors": errors, } except Exception as e: logger.error(f"Error during bulk normalization: {e}") return { "success": False, "error": str(e), "processed": 0, "successful": 0, "failed": 0, "skipped": 0, } @staticmethod def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict: """Run ffmpeg loudnorm on a single file using two-pass normalization. Two-pass normalization provides better quality by: 1. First pass: Analyze the audio to measure its characteristics 2. Second pass: Apply normalization using the measured parameters Args: source_path: Path to source audio file output_path: Path for normalized output file (will be WAV format) Returns: dict: Result with success status and loudnorm statistics """ try: params = SoundNormalizerService.LOUDNORM_PARAMS logger.debug( f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}", ) # FIRST PASS: Analyze the audio to get optimal parameters logger.debug("Starting first pass (analysis)") first_pass_result = SoundNormalizerService._run_first_pass( source_path, params, ) if not first_pass_result["success"]: return first_pass_result measured_params = first_pass_result["measured_params"] # SECOND PASS: Apply normalization using measured parameters logger.debug("Starting second pass (normalization)") second_pass_result = SoundNormalizerService._run_second_pass( source_path, output_path, params, measured_params, ) if not second_pass_result["success"]: return second_pass_result # Combine statistics from both passes stats = { **first_pass_result.get("stats", {}), **second_pass_result.get("stats", {}), "two_pass": True, "measured_params": measured_params, } if not Path(output_path).exists(): return { "success": False, "error": "Output file was not created after second pass", } logger.debug("Two-pass normalization completed successfully") return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = ( f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" ) logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error running two-pass ffmpeg normalization: {e}") return {"success": False, "error": str(e)} @staticmethod def _normalize_with_ffmpeg_single_pass( source_path: str, output_path: str, ) -> dict: """Run ffmpeg loudnorm on a single file using single-pass normalization. This is the legacy single-pass method for backward compatibility. Args: source_path: Path to source audio file output_path: Path for normalized output file (will be WAV format) Returns: dict: Result with success status and loudnorm statistics """ try: params = SoundNormalizerService.LOUDNORM_PARAMS logger.debug( f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}", ) # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # Apply loudnorm filter loudnorm_filter = f"loudnorm=I={params['integrated']}:TP={params['true_peak']}:LRA={params['lra']}:print_format={params['print_format']}" # Create output stream with WAV format output_stream = ffmpeg.output( input_stream, output_path, acodec="pcm_s16le", # 16-bit PCM for WAV ar=44100, # 44.1kHz sample rate af=loudnorm_filter, y=None, # Overwrite output file ) # Run the ffmpeg process out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) # Parse loudnorm statistics from stderr stats = SoundNormalizerService._parse_loudnorm_stats( err.decode() if err else "", ) if not Path(output_path).exists(): return { "success": False, "error": "Output file was not created", } return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = ( f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" ) logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error running single-pass ffmpeg: {e}") return {"success": False, "error": str(e)} @staticmethod def _run_first_pass(source_path: str, params: dict) -> dict: """Run first pass of loudnorm to analyze audio characteristics. Args: source_path: Path to source audio file params: Loudnorm target parameters Returns: dict: Result with measured parameters and analysis stats """ try: # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # First pass: analyze only, output to null loudnorm_filter = ( f"loudnorm=I={params['integrated']}:" f"TP={params['true_peak']}:" f"LRA={params['lra']}:" f"print_format=json" ) # Output to null device for analysis output_stream = ffmpeg.output( input_stream, "/dev/null", af=loudnorm_filter, f="null", ) # Run the first pass out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) stderr_text = err.decode() if err else "" # Parse measured parameters from JSON output measured_params = SoundNormalizerService._parse_measured_params( stderr_text, ) if not measured_params: return { "success": False, "error": "Failed to parse measured parameters from first pass", } # Parse basic stats stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text) return { "success": True, "measured_params": measured_params, "stats": stats, } except ffmpeg.Error as e: error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error in first pass: {e}") return {"success": False, "error": str(e)} @staticmethod def _run_second_pass( source_path: str, output_path: str, target_params: dict, measured_params: dict, ) -> dict: """Run second pass of loudnorm using measured parameters. Args: source_path: Path to source audio file output_path: Path for normalized output file target_params: Target loudnorm parameters measured_params: Parameters measured from first pass Returns: dict: Result with normalization stats """ try: # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # Second pass: normalize using measured parameters loudnorm_filter = ( f"loudnorm=I={target_params['integrated']}:" f"TP={target_params['true_peak']}:" f"LRA={target_params['lra']}:" f"measured_I={measured_params['input_i']}:" f"measured_TP={measured_params['input_tp']}:" f"measured_LRA={measured_params['input_lra']}:" f"measured_thresh={measured_params['input_thresh']}:" f"offset={measured_params['target_offset']}:" f"linear=true:" f"print_format=summary" ) # Create output stream with WAV format output_stream = ffmpeg.output( input_stream, output_path, acodec="pcm_s16le", # 16-bit PCM for WAV ar=44100, # 44.1kHz sample rate af=loudnorm_filter, y=None, # Overwrite output file ) # Run the second pass out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) stderr_text = err.decode() if err else "" # Parse final statistics stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text) return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error in second pass: {e}") return {"success": False, "error": str(e)} @staticmethod def _parse_measured_params(stderr_output: str) -> dict: """Parse measured parameters from first pass JSON output. Args: stderr_output: ffmpeg stderr output containing JSON data Returns: dict: Parsed measured parameters, empty if parsing fails """ try: # Find JSON block in stderr output json_match = re.search( r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL, ) if not json_match: logger.warning("No JSON block found in first pass output") return {} json_str = json_match.group(0) measured_data = json.loads(json_str) # Extract required parameters return { "input_i": measured_data.get("input_i", 0), "input_tp": measured_data.get("input_tp", 0), "input_lra": measured_data.get("input_lra", 0), "input_thresh": measured_data.get("input_thresh", 0), "target_offset": measured_data.get("target_offset", 0), } except (json.JSONDecodeError, KeyError, AttributeError) as e: logger.warning(f"Failed to parse measured parameters: {e}") return {} @staticmethod def _parse_loudnorm_stats(stderr_output: str) -> dict: """Parse loudnorm statistics from ffmpeg stderr output. Args: stderr_output: ffmpeg stderr output containing loudnorm stats Returns: dict: Parsed loudnorm statistics """ stats = {} if not stderr_output: return stats lines = stderr_output.split("\n") for line in lines: line = line.strip() if "Input Integrated:" in line: try: stats["input_integrated"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Input True Peak:" in line: try: stats["input_true_peak"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Input LRA:" in line: try: stats["input_lra"] = float(line.split()[-1]) except (ValueError, IndexError): pass elif "Output Integrated:" in line: try: stats["output_integrated"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Output True Peak:" in line: try: stats["output_true_peak"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Output LRA:" in line: try: stats["output_lra"] = float(line.split()[-1]) except (ValueError, IndexError): pass return stats @staticmethod def _get_normalized_metadata(file_path: str) -> dict: """Calculate metadata for normalized file. Args: file_path: Path to the normalized audio file Returns: dict: Metadata including duration and hash """ try: # Get file size file_size = Path(file_path).stat().st_size # Calculate file hash file_hash = SoundNormalizerService._calculate_file_hash(file_path) # Get duration using pydub audio = AudioSegment.from_wav(file_path) duration = len(audio) # Duration in milliseconds return { "duration": duration, "size": file_size, "hash": file_hash, } except Exception as e: logger.error(f"Error calculating metadata for {file_path}: {e}") return { "duration": 0, "size": Path(file_path).stat().st_size, "hash": "", } @staticmethod def _calculate_file_hash(file_path: str) -> str: """Calculate SHA256 hash of file contents.""" sha256_hash = hashlib.sha256() with Path(file_path).open("rb") as f: # Read file in chunks to handle large files for chunk in iter(lambda: f.read(4096), b""): sha256_hash.update(chunk) return sha256_hash.hexdigest() @staticmethod def get_normalization_status() -> dict: """Get statistics about normalized vs original files. Returns: dict: Statistics about normalization status """ try: total_sounds = Sound.query.filter_by(type="SDB").count() normalized_count = 0 total_original_size = 0 total_normalized_size = 0 sounds = Sound.query.filter_by(type="SDB").all() for sound in sounds: original_path = ( Path(SoundNormalizerService.SOUNDS_DIR) / sound.filename ) if original_path.exists(): total_original_size += original_path.stat().st_size # Use database field to check if normalized, not file existence if sound.is_normalized and sound.normalized_filename: normalized_count += 1 normalized_path = ( Path(SoundNormalizerService.NORMALIZED_DIR) / sound.normalized_filename ) if normalized_path.exists(): total_normalized_size += normalized_path.stat().st_size return { "total_sounds": total_sounds, "normalized_count": normalized_count, "normalization_percentage": ( (normalized_count / total_sounds * 100) if total_sounds > 0 else 0 ), "total_original_size": total_original_size, "total_normalized_size": total_normalized_size, "size_difference": ( total_normalized_size - total_original_size if normalized_count > 0 else 0 ), } except Exception as e: logger.error(f"Error getting normalization status: {e}") return { "error": str(e), "total_sounds": 0, "normalized_count": 0, "normalization_percentage": 0, } @staticmethod def check_ffmpeg_availability() -> dict: """Check if ffmpeg is available and supports loudnorm filter. Returns: dict: Information about ffmpeg availability and capabilities """ try: # Create a minimal test audio file to check ffmpeg import tempfile with tempfile.NamedTemporaryFile( suffix=".wav", delete=False, ) as temp_file: temp_path = temp_file.name try: # Try a simple ffmpeg operation to check availability test_input = ffmpeg.input( "anullsrc=channel_layout=stereo:sample_rate=44100", f="lavfi", t=0.1, ) test_output = ffmpeg.output(test_input, temp_path) ffmpeg.run( test_output, capture_stdout=True, capture_stderr=True, quiet=True, ) # If we get here, basic ffmpeg is working # Now test loudnorm filter try: norm_input = ffmpeg.input(temp_path) norm_output = ffmpeg.output( norm_input, "/dev/null", af="loudnorm=I=-16:TP=-1.5:LRA=11.0", f="null", ) ffmpeg.run( norm_output, capture_stdout=True, capture_stderr=True, quiet=True, ) has_loudnorm = True except ffmpeg.Error: has_loudnorm = False return { "available": True, "version": "ffmpeg-python wrapper available", "has_loudnorm": has_loudnorm, "ready": has_loudnorm, } finally: # Clean up temp file temp_file_path = Path(temp_path) if temp_file_path.exists(): temp_file_path.unlink() except Exception as e: return { "available": False, "error": f"ffmpeg not available via python-ffmpeg: {e!s}", }