Add new sound files and update dependencies
- Added various sound files to the soundboard, including insults, quotes, and sound effects. - Introduced new dependencies: ffmpeg-python (version 0.2.0) and pydub (version 0.25.1) for audio processing. - Updated the lock file to reflect the new packages and their respective versions. - Added .gitignore files in the sounds/stream and sounds/temp directories to exclude unnecessary files.
This commit is contained in:
316
app/services/sound_scanner_service.py
Normal file
316
app/services/sound_scanner_service.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""Sound file scanning service for discovering and importing audio files."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from pydub import AudioSegment
|
||||
from pydub.utils import mediainfo
|
||||
|
||||
from app.database import db
|
||||
from app.models.sound import Sound
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SoundScannerService:
|
||||
"""Service for scanning and importing sound files."""
|
||||
|
||||
# Supported audio file extensions
|
||||
SUPPORTED_EXTENSIONS = {".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac"}
|
||||
|
||||
# Default soundboard directory
|
||||
DEFAULT_SOUNDBOARD_DIR = "sounds/soundboard"
|
||||
|
||||
@staticmethod
|
||||
def scan_soundboard_directory(
|
||||
directory: str | None = None,
|
||||
) -> dict:
|
||||
"""Scan the soundboard directory and add new files to the database.
|
||||
|
||||
Args:
|
||||
directory: Directory to scan (defaults to sounds/soundboard)
|
||||
|
||||
Returns:
|
||||
dict: Summary of the scan operation
|
||||
|
||||
"""
|
||||
scan_dir = directory or SoundScannerService.DEFAULT_SOUNDBOARD_DIR
|
||||
|
||||
try:
|
||||
# Ensure directory exists
|
||||
scan_path = Path(scan_dir)
|
||||
if not scan_path.exists():
|
||||
logger.warning(
|
||||
f"Soundboard directory does not exist: {scan_dir}",
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Directory not found: {scan_dir}",
|
||||
"files_found": 0,
|
||||
"files_added": 0,
|
||||
"files_skipped": 0,
|
||||
}
|
||||
|
||||
logger.info(f"Starting soundboard scan in: {scan_dir}")
|
||||
|
||||
files_found = 0
|
||||
files_added = 0
|
||||
files_skipped = 0
|
||||
errors = []
|
||||
|
||||
# Walk through directory and subdirectories
|
||||
for file_path in scan_path.rglob("*"):
|
||||
if file_path.is_file():
|
||||
filename = file_path.name
|
||||
|
||||
# Check if file has supported extension
|
||||
if not SoundScannerService._is_supported_audio_file(
|
||||
filename,
|
||||
):
|
||||
continue
|
||||
|
||||
files_found += 1
|
||||
|
||||
try:
|
||||
# Process the audio file
|
||||
result = SoundScannerService._process_audio_file(
|
||||
str(file_path),
|
||||
scan_dir,
|
||||
)
|
||||
|
||||
if result["added"]:
|
||||
files_added += 1
|
||||
logger.debug(f"Added sound: {filename}")
|
||||
elif result.get("updated"):
|
||||
files_added += 1 # Count updates as additions for reporting
|
||||
logger.debug(f"Updated sound: {filename}")
|
||||
else:
|
||||
files_skipped += 1
|
||||
logger.debug(
|
||||
f"Skipped sound: {filename} ({result['reason']})",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing {filename}: {e!s}"
|
||||
logger.error(error_msg)
|
||||
errors.append(error_msg)
|
||||
files_skipped += 1
|
||||
|
||||
# Commit all changes
|
||||
db.session.commit()
|
||||
|
||||
logger.info(
|
||||
f"Soundboard scan completed: {files_found} files found, "
|
||||
f"{files_added} added, {files_skipped} skipped",
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"directory": scan_dir,
|
||||
"files_found": files_found,
|
||||
"files_added": files_added,
|
||||
"files_skipped": files_skipped,
|
||||
"errors": errors,
|
||||
"message": f"Scan completed: {files_added} new sounds added",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"Error during soundboard scan: {e!s}")
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"files_found": 0,
|
||||
"files_added": 0,
|
||||
"files_skipped": 0,
|
||||
"message": "Soundboard scan failed",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _is_supported_audio_file(filename: str) -> bool:
|
||||
"""Check if file has a supported audio extension."""
|
||||
return (
|
||||
Path(filename).suffix.lower()
|
||||
in SoundScannerService.SUPPORTED_EXTENSIONS
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _process_audio_file(file_path: str, base_dir: str) -> dict:
|
||||
"""Process a single audio file and add it to database if new.
|
||||
|
||||
Args:
|
||||
file_path: Full path to the audio file
|
||||
base_dir: Base directory for relative path calculation
|
||||
|
||||
Returns:
|
||||
dict: Processing result with added flag and reason
|
||||
|
||||
"""
|
||||
# Calculate file hash for deduplication
|
||||
file_hash = SoundScannerService._calculate_file_hash(file_path)
|
||||
|
||||
# Get file metadata
|
||||
metadata = SoundScannerService._extract_audio_metadata(file_path)
|
||||
|
||||
# Calculate relative filename from base directory
|
||||
relative_path = Path(file_path).relative_to(Path(base_dir))
|
||||
|
||||
# Check if file already exists in database by hash
|
||||
existing_sound = Sound.find_by_hash(file_hash)
|
||||
if existing_sound:
|
||||
return {
|
||||
"added": False,
|
||||
"reason": f"File already exists as '{existing_sound.name}'",
|
||||
}
|
||||
|
||||
# Check if filename already exists in database
|
||||
existing_filename_sound = Sound.find_by_filename(str(relative_path))
|
||||
if existing_filename_sound:
|
||||
# Remove normalized files and clear normalized info
|
||||
SoundScannerService._clear_normalized_files(existing_filename_sound)
|
||||
existing_filename_sound.clear_normalized_info()
|
||||
|
||||
# Update existing sound with new file information
|
||||
existing_filename_sound.update_file_info(
|
||||
filename=str(relative_path),
|
||||
duration=metadata["duration"],
|
||||
size=metadata["size"],
|
||||
hash_value=file_hash,
|
||||
)
|
||||
|
||||
return {
|
||||
"added": False,
|
||||
"updated": True,
|
||||
"sound_id": existing_filename_sound.id,
|
||||
"reason": f"Updated existing sound '{existing_filename_sound.name}' with new file data",
|
||||
}
|
||||
|
||||
# Generate sound name from filename (without extension)
|
||||
sound_name = Path(file_path).stem
|
||||
|
||||
# Check if name already exists and make it unique if needed
|
||||
counter = 1
|
||||
original_name = sound_name
|
||||
while Sound.find_by_name(sound_name):
|
||||
sound_name = f"{original_name}_{counter}"
|
||||
counter += 1
|
||||
|
||||
# Create new sound record
|
||||
sound = Sound.create_sound(
|
||||
sound_type="SDB", # Soundboard type
|
||||
name=sound_name,
|
||||
filename=str(relative_path),
|
||||
duration=metadata["duration"],
|
||||
size=metadata["size"],
|
||||
hash_value=file_hash,
|
||||
is_music=False,
|
||||
is_deletable=False,
|
||||
commit=False, # Don't commit individually, let scanner handle transaction
|
||||
)
|
||||
|
||||
return {
|
||||
"added": True,
|
||||
"sound_id": sound.id,
|
||||
"reason": "New file added successfully",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _calculate_file_hash(file_path: str) -> str:
|
||||
"""Calculate SHA256 hash of file contents."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
|
||||
with Path(file_path).open("rb") as f:
|
||||
# Read file in chunks to handle large files
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(chunk)
|
||||
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def _clear_normalized_files(sound: Sound) -> None:
|
||||
"""Remove normalized files for a sound if they exist."""
|
||||
if sound.is_normalized and sound.normalized_filename:
|
||||
# Import here to avoid circular imports
|
||||
from app.services.sound_normalizer_service import SoundNormalizerService
|
||||
|
||||
normalized_path = Path(SoundNormalizerService.NORMALIZED_DIR) / sound.normalized_filename
|
||||
if normalized_path.exists():
|
||||
try:
|
||||
normalized_path.unlink()
|
||||
logger.info(f"Removed normalized file: {normalized_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove normalized file {normalized_path}: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _extract_audio_metadata(file_path: str) -> dict:
|
||||
"""Extract metadata from audio file using pydub and mediainfo."""
|
||||
try:
|
||||
# Get file size
|
||||
file_size = Path(file_path).stat().st_size
|
||||
|
||||
# Load audio file with pydub for basic info
|
||||
audio = AudioSegment.from_file(file_path)
|
||||
|
||||
# Extract basic metadata from AudioSegment
|
||||
duration = len(audio)
|
||||
channels = audio.channels
|
||||
sample_rate = audio.frame_rate
|
||||
|
||||
# Use mediainfo for more accurate bitrate information
|
||||
bitrate = None
|
||||
try:
|
||||
info = mediainfo(file_path)
|
||||
if info and "bit_rate" in info:
|
||||
bitrate = int(info["bit_rate"])
|
||||
elif info and "bitrate" in info:
|
||||
bitrate = int(info["bitrate"])
|
||||
except (ValueError, KeyError, TypeError):
|
||||
# Fallback to calculated bitrate if mediainfo fails
|
||||
if duration > 0:
|
||||
file_size_bits = file_size * 8
|
||||
bitrate = int(file_size_bits / duration / 1000)
|
||||
|
||||
return {
|
||||
"duration": duration,
|
||||
"size": file_size,
|
||||
"bitrate": bitrate,
|
||||
"channels": channels,
|
||||
"sample_rate": sample_rate,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not extract metadata from {file_path}: {e}")
|
||||
return {
|
||||
"duration": 0,
|
||||
"size": Path(file_path).stat().st_size,
|
||||
"bitrate": None,
|
||||
"channels": None,
|
||||
"sample_rate": None,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_scan_statistics() -> dict:
|
||||
"""Get statistics about sounds in the database."""
|
||||
total_sounds = Sound.query.count()
|
||||
sdb_sounds = Sound.query.filter_by(type="SDB").count()
|
||||
music_sounds = Sound.query.filter_by(is_music=True).count()
|
||||
|
||||
# Calculate total size and duration
|
||||
sounds = Sound.query.all()
|
||||
total_size = sum(sound.size for sound in sounds)
|
||||
total_duration = sum(sound.duration for sound in sounds)
|
||||
total_plays = sum(sound.play_count for sound in sounds)
|
||||
|
||||
return {
|
||||
"total_sounds": total_sounds,
|
||||
"soundboard_sounds": sdb_sounds,
|
||||
"music_sounds": music_sounds,
|
||||
"total_size_bytes": total_size,
|
||||
"total_duration": total_duration,
|
||||
"total_plays": total_plays,
|
||||
"most_played": [
|
||||
sound.to_dict() for sound in Sound.get_most_played(5)
|
||||
],
|
||||
}
|
||||
Reference in New Issue
Block a user