"""Sound normalization service using ffmpeg loudnorm filter.""" import hashlib import json import logging import re from pathlib import Path import ffmpeg from app.database import db from app.models.sound import Sound logger = logging.getLogger(__name__) class SoundNormalizerService: """Service for normalizing sound files using ffmpeg loudnorm.""" SUPPORTED_EXTENSIONS = { ".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac", ".opus", } # Sound directories by type SOUND_DIRS = { "SDB": "sounds/soundboard", "SAY": "sounds/say", "STR": "sounds/stream" } NORMALIZED_DIRS = { "SDB": "sounds/normalized/soundboard", "SAY": "sounds/normalized/say", "STR": "sounds/normalized/stream" } LOUDNORM_PARAMS = { "integrated": -16, "true_peak": -1.5, "lra": 11.0, "print_format": "summary", } @staticmethod def normalize_sound( sound_id: int, overwrite: bool = False, two_pass: bool = True, ) -> dict: """Normalize a specific sound file using ffmpeg loudnorm. Args: sound_id: ID of the sound to normalize overwrite: Whether to overwrite existing normalized file two_pass: Whether to use two-pass normalization (default: True) Returns: dict: Result of the normalization operation """ try: sound = Sound.query.get(sound_id) if not sound: return { "success": False, "error": f"Sound with ID {sound_id} not found", } # Get directories based on sound type sound_dir = SoundNormalizerService.SOUND_DIRS.get(sound.type) normalized_dir = SoundNormalizerService.NORMALIZED_DIRS.get(sound.type) if not sound_dir or not normalized_dir: return { "success": False, "error": f"Unsupported sound type: {sound.type}", } source_path = Path(sound_dir) / sound.filename if not source_path.exists(): return { "success": False, "error": f"Source file not found: {source_path}", } # Always output as WAV regardless of input format filename_without_ext = Path(sound.filename).stem normalized_filename = f"{filename_without_ext}.wav" normalized_path = Path(normalized_dir) / normalized_filename normalized_path.parent.mkdir(parents=True, exist_ok=True) if normalized_path.exists() and not overwrite: return { "success": False, "error": f"Normalized file already exists: {normalized_path}. Use overwrite=True to replace it.", } logger.info( f"Starting normalization of {sound.name} ({sound.filename})", ) if two_pass: result = SoundNormalizerService._normalize_with_ffmpeg( str(source_path), str(normalized_path), ) else: result = ( SoundNormalizerService._normalize_with_ffmpeg_single_pass( str(source_path), str(normalized_path), ) ) if result["success"]: # Calculate normalized file metadata normalized_metadata = ( SoundNormalizerService._get_normalized_metadata( str(normalized_path), ) ) # Update sound record with normalized information sound.set_normalized_info( normalized_filename=normalized_filename, normalized_duration=normalized_metadata["duration"], normalized_size=normalized_metadata["size"], normalized_hash=normalized_metadata["hash"], ) # Commit the database changes db.session.commit() logger.info(f"Successfully normalized {sound.name}") return { "success": True, "sound_id": sound_id, "sound_name": sound.name, "source_path": str(source_path), "normalized_path": str(normalized_path), "normalized_filename": normalized_filename, "normalized_duration": normalized_metadata["duration"], "normalized_size": normalized_metadata["size"], "normalized_hash": normalized_metadata["hash"], "loudnorm_stats": result.get("stats", {}), } return result except Exception as e: logger.error(f"Error normalizing sound {sound_id}: {e}") return {"success": False, "error": str(e)} @staticmethod def normalize_all_sounds( overwrite: bool = False, limit: int = None, two_pass: bool = True, ) -> dict: """Normalize all soundboard files. Args: overwrite: Whether to overwrite existing normalized files limit: Maximum number of files to process (None for all) two_pass: Whether to use two-pass normalization (default: True) Returns: dict: Summary of the normalization operation """ try: query = Sound.query.filter_by(type="SDB") if limit: query = query.limit(limit) sounds = query.all() if not sounds: return { "success": True, "message": "No soundboard files found to normalize", "processed": 0, "successful": 0, "failed": 0, "skipped": 0, } logger.info(f"Starting bulk normalization of {len(sounds)} sounds") processed = 0 successful = 0 failed = 0 skipped = 0 errors = [] for sound in sounds: result = SoundNormalizerService.normalize_sound( sound.id, overwrite, two_pass, ) processed += 1 if result["success"]: successful += 1 elif "already exists" in result.get("error", ""): skipped += 1 else: failed += 1 errors.append(f"{sound.name}: {result['error']}") logger.info( f"Bulk normalization completed: {successful} successful, {failed} failed, {skipped} skipped", ) return { "success": True, "message": f"Processed {processed} sounds: {successful} successful, {failed} failed, {skipped} skipped", "processed": processed, "successful": successful, "failed": failed, "skipped": skipped, "errors": errors, } except Exception as e: logger.error(f"Error during bulk normalization: {e}") return { "success": False, "error": str(e), "processed": 0, "successful": 0, "failed": 0, "skipped": 0, } @staticmethod def _normalize_with_ffmpeg(source_path: str, output_path: str) -> dict: """Run ffmpeg loudnorm on a single file using two-pass normalization. Two-pass normalization provides better quality by: 1. First pass: Analyze the audio to measure its characteristics 2. Second pass: Apply normalization using the measured parameters Args: source_path: Path to source audio file output_path: Path for normalized output file (will be WAV format) Returns: dict: Result with success status and loudnorm statistics """ try: params = SoundNormalizerService.LOUDNORM_PARAMS logger.debug( f"Running two-pass ffmpeg normalization: {source_path} -> {output_path}", ) # FIRST PASS: Analyze the audio to get optimal parameters logger.debug("Starting first pass (analysis)") first_pass_result = SoundNormalizerService._run_first_pass( source_path, params, ) if not first_pass_result["success"]: return first_pass_result measured_params = first_pass_result["measured_params"] # SECOND PASS: Apply normalization using measured parameters logger.debug("Starting second pass (normalization)") second_pass_result = SoundNormalizerService._run_second_pass( source_path, output_path, params, measured_params, ) if not second_pass_result["success"]: return second_pass_result # Combine statistics from both passes stats = { **first_pass_result.get("stats", {}), **second_pass_result.get("stats", {}), "two_pass": True, "measured_params": measured_params, } if not Path(output_path).exists(): return { "success": False, "error": "Output file was not created after second pass", } logger.debug("Two-pass normalization completed successfully") return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = ( f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" ) logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error running two-pass ffmpeg normalization: {e}") return {"success": False, "error": str(e)} @staticmethod def _normalize_with_ffmpeg_single_pass( source_path: str, output_path: str, ) -> dict: """Run ffmpeg loudnorm on a single file using single-pass normalization. This is the legacy single-pass method for backward compatibility. Args: source_path: Path to source audio file output_path: Path for normalized output file (will be WAV format) Returns: dict: Result with success status and loudnorm statistics """ try: params = SoundNormalizerService.LOUDNORM_PARAMS logger.debug( f"Running single-pass ffmpeg normalization: {source_path} -> {output_path}", ) # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # Apply loudnorm filter loudnorm_filter = f"loudnorm=I={params['integrated']}:TP={params['true_peak']}:LRA={params['lra']}:print_format={params['print_format']}" # Create output stream with WAV format output_stream = ffmpeg.output( input_stream, output_path, acodec="pcm_s16le", # 16-bit PCM for WAV ar=44100, # 44.1kHz sample rate af=loudnorm_filter, y=None, # Overwrite output file ) # Run the ffmpeg process out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) # Parse loudnorm statistics from stderr stats = SoundNormalizerService._parse_loudnorm_stats( err.decode() if err else "", ) if not Path(output_path).exists(): return { "success": False, "error": "Output file was not created", } return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = ( f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" ) logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error running single-pass ffmpeg: {e}") return {"success": False, "error": str(e)} @staticmethod def _run_first_pass(source_path: str, params: dict) -> dict: """Run first pass of loudnorm to analyze audio characteristics. Args: source_path: Path to source audio file params: Loudnorm target parameters Returns: dict: Result with measured parameters and analysis stats """ try: # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # First pass: analyze only, output to null loudnorm_filter = ( f"loudnorm=I={params['integrated']}:" f"TP={params['true_peak']}:" f"LRA={params['lra']}:" f"print_format=json" ) # Output to null device for analysis output_stream = ffmpeg.output( input_stream, "/dev/null", af=loudnorm_filter, f="null", ) # Run the first pass out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) stderr_text = err.decode() if err else "" # Parse measured parameters from JSON output measured_params = SoundNormalizerService._parse_measured_params( stderr_text, ) if not measured_params: return { "success": False, "error": "Failed to parse measured parameters from first pass", } # Parse basic stats stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text) return { "success": True, "measured_params": measured_params, "stats": stats, } except ffmpeg.Error as e: error_msg = f"First pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error in first pass: {e}") return {"success": False, "error": str(e)} @staticmethod def _run_second_pass( source_path: str, output_path: str, target_params: dict, measured_params: dict, ) -> dict: """Run second pass of loudnorm using measured parameters. Args: source_path: Path to source audio file output_path: Path for normalized output file target_params: Target loudnorm parameters measured_params: Parameters measured from first pass Returns: dict: Result with normalization stats """ try: # Create ffmpeg input stream input_stream = ffmpeg.input(source_path) # Second pass: normalize using measured parameters loudnorm_filter = ( f"loudnorm=I={target_params['integrated']}:" f"TP={target_params['true_peak']}:" f"LRA={target_params['lra']}:" f"measured_I={measured_params['input_i']}:" f"measured_TP={measured_params['input_tp']}:" f"measured_LRA={measured_params['input_lra']}:" f"measured_thresh={measured_params['input_thresh']}:" f"offset={measured_params['target_offset']}:" f"linear=true:" f"print_format=summary" ) # Create output stream with WAV format output_stream = ffmpeg.output( input_stream, output_path, acodec="pcm_s16le", # 16-bit PCM for WAV ar=44100, # 44.1kHz sample rate af=loudnorm_filter, y=None, # Overwrite output file ) # Run the second pass out, err = ffmpeg.run( output_stream, capture_stdout=True, capture_stderr=True, ) stderr_text = err.decode() if err else "" # Parse final statistics stats = SoundNormalizerService._parse_loudnorm_stats(stderr_text) return {"success": True, "stats": stats} except ffmpeg.Error as e: error_msg = f"Second pass FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}" logger.error(error_msg) return {"success": False, "error": error_msg} except Exception as e: logger.error(f"Error in second pass: {e}") return {"success": False, "error": str(e)} @staticmethod def _parse_measured_params(stderr_output: str) -> dict: """Parse measured parameters from first pass JSON output. Args: stderr_output: ffmpeg stderr output containing JSON data Returns: dict: Parsed measured parameters, empty if parsing fails """ try: # Find JSON block in stderr output json_match = re.search( r'\{[^}]*"input_i"[^}]*\}', stderr_output, re.DOTALL, ) if not json_match: logger.warning("No JSON block found in first pass output") return {} json_str = json_match.group(0) measured_data = json.loads(json_str) # Extract required parameters return { "input_i": measured_data.get("input_i", 0), "input_tp": measured_data.get("input_tp", 0), "input_lra": measured_data.get("input_lra", 0), "input_thresh": measured_data.get("input_thresh", 0), "target_offset": measured_data.get("target_offset", 0), } except (json.JSONDecodeError, KeyError, AttributeError) as e: logger.warning(f"Failed to parse measured parameters: {e}") return {} @staticmethod def _parse_loudnorm_stats(stderr_output: str) -> dict: """Parse loudnorm statistics from ffmpeg stderr output. Args: stderr_output: ffmpeg stderr output containing loudnorm stats Returns: dict: Parsed loudnorm statistics """ stats = {} if not stderr_output: return stats lines = stderr_output.split("\n") for line in lines: line = line.strip() if "Input Integrated:" in line: try: stats["input_integrated"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Input True Peak:" in line: try: stats["input_true_peak"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Input LRA:" in line: try: stats["input_lra"] = float(line.split()[-1]) except (ValueError, IndexError): pass elif "Output Integrated:" in line: try: stats["output_integrated"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Output True Peak:" in line: try: stats["output_true_peak"] = float(line.split()[-2]) except (ValueError, IndexError): pass elif "Output LRA:" in line: try: stats["output_lra"] = float(line.split()[-1]) except (ValueError, IndexError): pass return stats @staticmethod def _get_normalized_metadata(file_path: str) -> dict: """Calculate metadata for normalized file. Args: file_path: Path to the normalized audio file Returns: dict: Metadata including duration and hash """ try: # Get file size file_size = Path(file_path).stat().st_size # Calculate file hash file_hash = SoundNormalizerService._calculate_file_hash(file_path) # Get duration using ffmpeg probe = ffmpeg.probe(file_path) audio_stream = next( (s for s in probe['streams'] if s['codec_type'] == 'audio'), None ) if audio_stream and 'duration' in audio_stream: duration = int(float(audio_stream['duration']) * 1000) # Convert to milliseconds else: duration = 0 return { "duration": duration, "size": file_size, "hash": file_hash, } except Exception as e: logger.error(f"Error calculating metadata for {file_path}: {e}") return { "duration": 0, "size": Path(file_path).stat().st_size, "hash": "", } @staticmethod def _calculate_file_hash(file_path: str) -> str: """Calculate SHA256 hash of file contents.""" sha256_hash = hashlib.sha256() with Path(file_path).open("rb") as f: # Read file in chunks to handle large files for chunk in iter(lambda: f.read(4096), b""): sha256_hash.update(chunk) return sha256_hash.hexdigest() @staticmethod def get_normalization_status() -> dict: """Get statistics about normalized vs original files. Returns: dict: Statistics about normalization status """ try: total_sounds = Sound.query.filter_by(type="SDB").count() normalized_count = 0 total_original_size = 0 total_normalized_size = 0 sounds = Sound.query.filter_by(type="SDB").all() for sound in sounds: original_path = ( Path(SoundNormalizerService.SOUNDS_DIR) / sound.filename ) if original_path.exists(): total_original_size += original_path.stat().st_size # Use database field to check if normalized, not file existence if sound.is_normalized and sound.normalized_filename: normalized_count += 1 normalized_path = ( Path(SoundNormalizerService.NORMALIZED_DIR) / sound.normalized_filename ) if normalized_path.exists(): total_normalized_size += normalized_path.stat().st_size return { "total_sounds": total_sounds, "normalized_count": normalized_count, "normalization_percentage": ( (normalized_count / total_sounds * 100) if total_sounds > 0 else 0 ), "total_original_size": total_original_size, "total_normalized_size": total_normalized_size, "size_difference": ( total_normalized_size - total_original_size if normalized_count > 0 else 0 ), } except Exception as e: logger.error(f"Error getting normalization status: {e}") return { "error": str(e), "total_sounds": 0, "normalized_count": 0, "normalization_percentage": 0, } @staticmethod def check_ffmpeg_availability() -> dict: """Check if ffmpeg is available and supports loudnorm filter. Returns: dict: Information about ffmpeg availability and capabilities """ try: # Create a minimal test audio file to check ffmpeg import tempfile with tempfile.NamedTemporaryFile( suffix=".wav", delete=False, ) as temp_file: temp_path = temp_file.name try: # Try a simple ffmpeg operation to check availability test_input = ffmpeg.input( "anullsrc=channel_layout=stereo:sample_rate=44100", f="lavfi", t=0.1, ) test_output = ffmpeg.output(test_input, temp_path) ffmpeg.run( test_output, capture_stdout=True, capture_stderr=True, quiet=True, ) # If we get here, basic ffmpeg is working # Now test loudnorm filter try: norm_input = ffmpeg.input(temp_path) norm_output = ffmpeg.output( norm_input, "/dev/null", af="loudnorm=I=-16:TP=-1.5:LRA=11.0", f="null", ) ffmpeg.run( norm_output, capture_stdout=True, capture_stderr=True, quiet=True, ) has_loudnorm = True except ffmpeg.Error: has_loudnorm = False return { "available": True, "version": "ffmpeg-python wrapper available", "has_loudnorm": has_loudnorm, "ready": has_loudnorm, } finally: # Clean up temp file temp_file_path = Path(temp_path) if temp_file_path.exists(): temp_file_path.unlink() except Exception as e: return { "available": False, "error": f"ffmpeg not available via python-ffmpeg: {e!s}", }