Add new sound files and update dependencies

- Added various sound files to the soundboard, including insults, quotes, and sound effects. - Introduced new dependencies: ffmpeg-python (version 0.2.0) and pydub (version 0.25.1) for audio processing. - Updated the lock file to reflect the new packages and their respective versions. - Added .gitignore files in the sounds/stream and sounds/temp directories to exclude unnecessary files.
2025-07-02 17:09:43 +02:00
parent 1b597f4047
commit 7128ca727b
181 changed files with 1278 additions and 62 deletions
--- a/app/services/sound_scanner_service.py
+++ b/app/services/sound_scanner_service.py
@@ -0,0 +1,316 @@
+"""Sound file scanning service for discovering and importing audio files."""
+
+import hashlib
+import logging
+from pathlib import Path
+
+from pydub import AudioSegment
+from pydub.utils import mediainfo
+
+from app.database import db
+from app.models.sound import Sound
+
+logger = logging.getLogger(__name__)
+
+
+class SoundScannerService:
+    """Service for scanning and importing sound files."""
+
+    # Supported audio file extensions
+    SUPPORTED_EXTENSIONS = {".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac"}
+
+    # Default soundboard directory
+    DEFAULT_SOUNDBOARD_DIR = "sounds/soundboard"
+
+    @staticmethod
+    def scan_soundboard_directory(
+        directory: str | None = None,
+    ) -> dict:
+        """Scan the soundboard directory and add new files to the database.
+
+        Args:
+            directory: Directory to scan (defaults to sounds/soundboard)
+
+        Returns:
+            dict: Summary of the scan operation
+
+        """
+        scan_dir = directory or SoundScannerService.DEFAULT_SOUNDBOARD_DIR
+
+        try:
+            # Ensure directory exists
+            scan_path = Path(scan_dir)
+            if not scan_path.exists():
+                logger.warning(
+                    f"Soundboard directory does not exist: {scan_dir}",
+                )
+                return {
+                    "success": False,
+                    "error": f"Directory not found: {scan_dir}",
+                    "files_found": 0,
+                    "files_added": 0,
+                    "files_skipped": 0,
+                }
+
+            logger.info(f"Starting soundboard scan in: {scan_dir}")
+
+            files_found = 0
+            files_added = 0
+            files_skipped = 0
+            errors = []
+
+            # Walk through directory and subdirectories
+            for file_path in scan_path.rglob("*"):
+                if file_path.is_file():
+                    filename = file_path.name
+
+                    # Check if file has supported extension
+                    if not SoundScannerService._is_supported_audio_file(
+                        filename,
+                    ):
+                        continue
+
+                    files_found += 1
+
+                    try:
+                        # Process the audio file
+                        result = SoundScannerService._process_audio_file(
+                            str(file_path),
+                            scan_dir,
+                        )
+
+                        if result["added"]:
+                            files_added += 1
+                            logger.debug(f"Added sound: {filename}")
+                        elif result.get("updated"):
+                            files_added += 1  # Count updates as additions for reporting
+                            logger.debug(f"Updated sound: {filename}")
+                        else:
+                            files_skipped += 1
+                            logger.debug(
+                                f"Skipped sound: {filename} ({result['reason']})",
+                            )
+
+                    except Exception as e:
+                        error_msg = f"Error processing {filename}: {e!s}"
+                        logger.error(error_msg)
+                        errors.append(error_msg)
+                        files_skipped += 1
+
+            # Commit all changes
+            db.session.commit()
+
+            logger.info(
+                f"Soundboard scan completed: {files_found} files found, "
+                f"{files_added} added, {files_skipped} skipped",
+            )
+
+            return {
+                "success": True,
+                "directory": scan_dir,
+                "files_found": files_found,
+                "files_added": files_added,
+                "files_skipped": files_skipped,
+                "errors": errors,
+                "message": f"Scan completed: {files_added} new sounds added",
+            }
+
+        except Exception as e:
+            db.session.rollback()
+            logger.error(f"Error during soundboard scan: {e!s}")
+
+            return {
+                "success": False,
+                "error": str(e),
+                "files_found": 0,
+                "files_added": 0,
+                "files_skipped": 0,
+                "message": "Soundboard scan failed",
+            }
+
+    @staticmethod
+    def _is_supported_audio_file(filename: str) -> bool:
+        """Check if file has a supported audio extension."""
+        return (
+            Path(filename).suffix.lower()
+            in SoundScannerService.SUPPORTED_EXTENSIONS
+        )
+
+    @staticmethod
+    def _process_audio_file(file_path: str, base_dir: str) -> dict:
+        """Process a single audio file and add it to database if new.
+
+        Args:
+            file_path: Full path to the audio file
+            base_dir: Base directory for relative path calculation
+
+        Returns:
+            dict: Processing result with added flag and reason
+
+        """
+        # Calculate file hash for deduplication
+        file_hash = SoundScannerService._calculate_file_hash(file_path)
+
+        # Get file metadata
+        metadata = SoundScannerService._extract_audio_metadata(file_path)
+
+        # Calculate relative filename from base directory
+        relative_path = Path(file_path).relative_to(Path(base_dir))
+
+        # Check if file already exists in database by hash
+        existing_sound = Sound.find_by_hash(file_hash)
+        if existing_sound:
+            return {
+                "added": False,
+                "reason": f"File already exists as '{existing_sound.name}'",
+            }
+
+        # Check if filename already exists in database
+        existing_filename_sound = Sound.find_by_filename(str(relative_path))
+        if existing_filename_sound:
+            # Remove normalized files and clear normalized info
+            SoundScannerService._clear_normalized_files(existing_filename_sound)
+            existing_filename_sound.clear_normalized_info()
+            
+            # Update existing sound with new file information
+            existing_filename_sound.update_file_info(
+                filename=str(relative_path),
+                duration=metadata["duration"],
+                size=metadata["size"],
+                hash_value=file_hash,
+            )
+            
+            return {
+                "added": False,
+                "updated": True,
+                "sound_id": existing_filename_sound.id,
+                "reason": f"Updated existing sound '{existing_filename_sound.name}' with new file data",
+            }
+
+        # Generate sound name from filename (without extension)
+        sound_name = Path(file_path).stem
+
+        # Check if name already exists and make it unique if needed
+        counter = 1
+        original_name = sound_name
+        while Sound.find_by_name(sound_name):
+            sound_name = f"{original_name}_{counter}"
+            counter += 1
+
+        # Create new sound record
+        sound = Sound.create_sound(
+            sound_type="SDB",  # Soundboard type
+            name=sound_name,
+            filename=str(relative_path),
+            duration=metadata["duration"],
+            size=metadata["size"],
+            hash_value=file_hash,
+            is_music=False,
+            is_deletable=False,
+            commit=False,  # Don't commit individually, let scanner handle transaction
+        )
+
+        return {
+            "added": True,
+            "sound_id": sound.id,
+            "reason": "New file added successfully",
+        }
+
+    @staticmethod
+    def _calculate_file_hash(file_path: str) -> str:
+        """Calculate SHA256 hash of file contents."""
+        sha256_hash = hashlib.sha256()
+
+        with Path(file_path).open("rb") as f:
+            # Read file in chunks to handle large files
+            for chunk in iter(lambda: f.read(4096), b""):
+                sha256_hash.update(chunk)
+
+        return sha256_hash.hexdigest()
+
+    @staticmethod
+    def _clear_normalized_files(sound: Sound) -> None:
+        """Remove normalized files for a sound if they exist."""
+        if sound.is_normalized and sound.normalized_filename:
+            # Import here to avoid circular imports
+            from app.services.sound_normalizer_service import SoundNormalizerService
+            
+            normalized_path = Path(SoundNormalizerService.NORMALIZED_DIR) / sound.normalized_filename
+            if normalized_path.exists():
+                try:
+                    normalized_path.unlink()
+                    logger.info(f"Removed normalized file: {normalized_path}")
+                except Exception as e:
+                    logger.warning(f"Could not remove normalized file {normalized_path}: {e}")
+
+    @staticmethod
+    def _extract_audio_metadata(file_path: str) -> dict:
+        """Extract metadata from audio file using pydub and mediainfo."""
+        try:
+            # Get file size
+            file_size = Path(file_path).stat().st_size
+
+            # Load audio file with pydub for basic info
+            audio = AudioSegment.from_file(file_path)
+
+            # Extract basic metadata from AudioSegment
+            duration = len(audio)
+            channels = audio.channels
+            sample_rate = audio.frame_rate
+
+            # Use mediainfo for more accurate bitrate information
+            bitrate = None
+            try:
+                info = mediainfo(file_path)
+                if info and "bit_rate" in info:
+                    bitrate = int(info["bit_rate"])
+                elif info and "bitrate" in info:
+                    bitrate = int(info["bitrate"])
+            except (ValueError, KeyError, TypeError):
+                # Fallback to calculated bitrate if mediainfo fails
+                if duration > 0:
+                    file_size_bits = file_size * 8
+                    bitrate = int(file_size_bits / duration / 1000)
+
+            return {
+                "duration": duration,
+                "size": file_size,
+                "bitrate": bitrate,
+                "channels": channels,
+                "sample_rate": sample_rate,
+            }
+
+        except Exception as e:
+            logger.warning(f"Could not extract metadata from {file_path}: {e}")
+            return {
+                "duration": 0,
+                "size": Path(file_path).stat().st_size,
+                "bitrate": None,
+                "channels": None,
+                "sample_rate": None,
+            }
+
+    @staticmethod
+    def get_scan_statistics() -> dict:
+        """Get statistics about sounds in the database."""
+        total_sounds = Sound.query.count()
+        sdb_sounds = Sound.query.filter_by(type="SDB").count()
+        music_sounds = Sound.query.filter_by(is_music=True).count()
+
+        # Calculate total size and duration
+        sounds = Sound.query.all()
+        total_size = sum(sound.size for sound in sounds)
+        total_duration = sum(sound.duration for sound in sounds)
+        total_plays = sum(sound.play_count for sound in sounds)
+
+        return {
+            "total_sounds": total_sounds,
+            "soundboard_sounds": sdb_sounds,
+            "music_sounds": music_sounds,
+            "total_size_bytes": total_size,
+            "total_duration": total_duration,
+            "total_plays": total_plays,
+            "most_played": [
+                sound.to_dict() for sound in Sound.get_most_played(5)
+            ],
+        }