Add tests for extraction API endpoints and enhance existing tests
- Implement tests for admin extraction API endpoints including status retrieval, deletion of extractions, and permission checks. - Add tests for user extraction deletion, ensuring proper handling of permissions and non-existent extractions. - Enhance sound endpoint tests to include duplicate handling in responses. - Refactor favorite service tests to utilize mock dependencies for better maintainability and clarity. - Update sound scanner tests to improve file handling and ensure proper deletion of associated files.
This commit is contained in:
@@ -2,14 +2,16 @@
|
||||
|
||||
import asyncio
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TypedDict
|
||||
from typing import Any, TypedDict
|
||||
|
||||
import yt_dlp
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.models.extraction import Extraction
|
||||
from app.models.sound import Sound
|
||||
from app.repositories.extraction import ExtractionRepository
|
||||
from app.repositories.sound import SoundRepository
|
||||
@@ -21,6 +23,18 @@ from app.utils.audio import get_audio_duration, get_file_hash, get_file_size
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionContext:
|
||||
"""Context data for extraction processing."""
|
||||
|
||||
extraction_id: int
|
||||
extraction_url: str
|
||||
extraction_service: str | None
|
||||
extraction_service_id: str | None
|
||||
extraction_title: str | None
|
||||
user_id: int
|
||||
|
||||
|
||||
class ExtractionInfo(TypedDict):
|
||||
"""Type definition for extraction information."""
|
||||
|
||||
@@ -150,8 +164,8 @@ class ExtractionService:
|
||||
logger.exception("Failed to detect service info for URL: %s", url)
|
||||
return None
|
||||
|
||||
async def process_extraction(self, extraction_id: int) -> ExtractionInfo:
|
||||
"""Process an extraction job."""
|
||||
async def _validate_extraction(self, extraction_id: int) -> tuple:
|
||||
"""Validate extraction and return extraction data."""
|
||||
extraction = await self.extraction_repo.get_by_id(extraction_id)
|
||||
if not extraction:
|
||||
msg = f"Extraction {extraction_id} not found"
|
||||
@@ -173,9 +187,183 @@ class ExtractionService:
|
||||
user = await self.user_repo.get_by_id(user_id)
|
||||
user_name = user.name if user else None
|
||||
except Exception:
|
||||
logger.warning("Failed to get user %d for extraction", user_id)
|
||||
logger.exception("Failed to get user %d for extraction", user_id)
|
||||
user_name = None
|
||||
|
||||
return (
|
||||
extraction,
|
||||
user_id,
|
||||
extraction_url,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
extraction_title,
|
||||
user_name,
|
||||
)
|
||||
|
||||
async def _handle_service_detection(
|
||||
self,
|
||||
extraction: Extraction,
|
||||
context: ExtractionContext,
|
||||
) -> tuple:
|
||||
"""Handle service detection and duplicate checking."""
|
||||
if context.extraction_service and context.extraction_service_id:
|
||||
return (
|
||||
context.extraction_service,
|
||||
context.extraction_service_id,
|
||||
context.extraction_title,
|
||||
)
|
||||
|
||||
logger.info("Detecting service info for extraction %d", context.extraction_id)
|
||||
service_info = await self._detect_service_info(context.extraction_url)
|
||||
|
||||
if not service_info:
|
||||
msg = "Unable to detect service information from URL"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Check if extraction already exists for this service
|
||||
service_name = service_info["service"]
|
||||
service_id_val = service_info["service_id"]
|
||||
|
||||
if not service_name or not service_id_val:
|
||||
msg = "Service info is incomplete"
|
||||
raise ValueError(msg)
|
||||
|
||||
existing = await self.extraction_repo.get_by_service_and_id(
|
||||
service_name,
|
||||
service_id_val,
|
||||
)
|
||||
if existing and existing.id != context.extraction_id:
|
||||
error_msg = (
|
||||
f"Extraction already exists for "
|
||||
f"{service_info['service']}:{service_info['service_id']}"
|
||||
)
|
||||
logger.warning(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Update extraction with service info
|
||||
update_data = {
|
||||
"service": service_info["service"],
|
||||
"service_id": service_info["service_id"],
|
||||
"title": service_info.get("title") or context.extraction_title,
|
||||
}
|
||||
await self.extraction_repo.update(extraction, update_data)
|
||||
|
||||
# Update values for processing
|
||||
new_service = service_info["service"]
|
||||
new_service_id = service_info["service_id"]
|
||||
new_title = service_info.get("title") or context.extraction_title
|
||||
|
||||
await self._emit_extraction_event(
|
||||
context.user_id,
|
||||
{
|
||||
"extraction_id": context.extraction_id,
|
||||
"status": "processing",
|
||||
"title": new_title,
|
||||
"url": context.extraction_url,
|
||||
},
|
||||
)
|
||||
|
||||
return new_service, new_service_id, new_title
|
||||
|
||||
async def _process_media_files(
|
||||
self,
|
||||
extraction_id: int,
|
||||
extraction_url: str,
|
||||
extraction_title: str | None,
|
||||
extraction_service: str,
|
||||
extraction_service_id: str,
|
||||
) -> int:
|
||||
"""Process media files and create sound record."""
|
||||
# Extract audio and thumbnail
|
||||
audio_file, thumbnail_file = await self._extract_media(
|
||||
extraction_id,
|
||||
extraction_url,
|
||||
)
|
||||
|
||||
# Move files to final locations
|
||||
final_audio_path, final_thumbnail_path = (
|
||||
await self._move_files_to_final_location(
|
||||
audio_file,
|
||||
thumbnail_file,
|
||||
extraction_title,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
)
|
||||
)
|
||||
|
||||
# Create Sound record
|
||||
sound = await self._create_sound_record(
|
||||
final_audio_path,
|
||||
final_thumbnail_path,
|
||||
extraction_title,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
)
|
||||
|
||||
if not sound.id:
|
||||
msg = "Sound creation failed - no ID returned"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
return sound.id
|
||||
|
||||
async def _complete_extraction(
|
||||
self,
|
||||
extraction: Extraction,
|
||||
context: ExtractionContext,
|
||||
sound_id: int,
|
||||
) -> None:
|
||||
"""Complete extraction processing."""
|
||||
# Normalize the sound
|
||||
await self._normalize_sound(sound_id)
|
||||
|
||||
# Add to main playlist
|
||||
await self._add_to_main_playlist(sound_id, context.user_id)
|
||||
|
||||
# Update extraction with success
|
||||
await self.extraction_repo.update(
|
||||
extraction,
|
||||
{
|
||||
"status": "completed",
|
||||
"sound_id": sound_id,
|
||||
"error": None,
|
||||
},
|
||||
)
|
||||
|
||||
# Emit WebSocket event for completion
|
||||
await self._emit_extraction_event(
|
||||
context.user_id,
|
||||
{
|
||||
"extraction_id": context.extraction_id,
|
||||
"status": "completed",
|
||||
"title": context.extraction_title,
|
||||
"url": context.extraction_url,
|
||||
"sound_id": sound_id,
|
||||
},
|
||||
)
|
||||
|
||||
async def process_extraction(self, extraction_id: int) -> ExtractionInfo:
|
||||
"""Process an extraction job."""
|
||||
# Validate extraction and get context data
|
||||
(
|
||||
extraction,
|
||||
user_id,
|
||||
extraction_url,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
extraction_title,
|
||||
user_name,
|
||||
) = await self._validate_extraction(extraction_id)
|
||||
|
||||
# Create context object for helper methods
|
||||
context = ExtractionContext(
|
||||
extraction_id=extraction_id,
|
||||
extraction_url=extraction_url,
|
||||
extraction_service=extraction_service,
|
||||
extraction_service_id=extraction_service_id,
|
||||
extraction_title=extraction_title,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
logger.info("Processing extraction %d: %s", extraction_id, extraction_url)
|
||||
|
||||
try:
|
||||
@@ -184,142 +372,53 @@ class ExtractionService:
|
||||
|
||||
# Emit WebSocket event for processing start
|
||||
await self._emit_extraction_event(
|
||||
user_id,
|
||||
context.user_id,
|
||||
{
|
||||
"extraction_id": extraction_id,
|
||||
"extraction_id": context.extraction_id,
|
||||
"status": "processing",
|
||||
"title": extraction_title or "Processing extraction...",
|
||||
"url": extraction_url,
|
||||
"title": context.extraction_title or "Processing extraction...",
|
||||
"url": context.extraction_url,
|
||||
},
|
||||
)
|
||||
|
||||
# Detect service info if not already available
|
||||
if not extraction_service or not extraction_service_id:
|
||||
logger.info("Detecting service info for extraction %d", extraction_id)
|
||||
service_info = await self._detect_service_info(extraction_url)
|
||||
|
||||
if not service_info:
|
||||
msg = "Unable to detect service information from URL"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Check if extraction already exists for this service
|
||||
service_name = service_info["service"]
|
||||
service_id_val = service_info["service_id"]
|
||||
|
||||
if not service_name or not service_id_val:
|
||||
msg = "Service info is incomplete"
|
||||
raise ValueError(msg)
|
||||
|
||||
existing = await self.extraction_repo.get_by_service_and_id(
|
||||
service_name,
|
||||
service_id_val,
|
||||
)
|
||||
if existing and existing.id != extraction_id:
|
||||
error_msg = (
|
||||
f"Extraction already exists for "
|
||||
f"{service_info['service']}:{service_info['service_id']}"
|
||||
)
|
||||
logger.warning(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Update extraction with service info
|
||||
update_data = {
|
||||
"service": service_info["service"],
|
||||
"service_id": service_info["service_id"],
|
||||
"title": service_info.get("title") or extraction_title,
|
||||
}
|
||||
await self.extraction_repo.update(extraction, update_data)
|
||||
|
||||
# Update values for processing
|
||||
extraction_service = service_info["service"]
|
||||
extraction_service_id = service_info["service_id"]
|
||||
extraction_title = service_info.get("title") or extraction_title
|
||||
|
||||
await self._emit_extraction_event(
|
||||
user_id,
|
||||
{
|
||||
"extraction_id": extraction_id,
|
||||
"status": "processing",
|
||||
"title": extraction_title,
|
||||
"url": extraction_url,
|
||||
},
|
||||
)
|
||||
|
||||
# Extract audio and thumbnail
|
||||
audio_file, thumbnail_file = await self._extract_media(
|
||||
extraction_id,
|
||||
extraction_url,
|
||||
# Handle service detection and duplicate checking
|
||||
extraction_service, extraction_service_id, extraction_title = (
|
||||
await self._handle_service_detection(extraction, context)
|
||||
)
|
||||
|
||||
# Move files to final locations
|
||||
(
|
||||
final_audio_path,
|
||||
final_thumbnail_path,
|
||||
) = await self._move_files_to_final_location(
|
||||
audio_file,
|
||||
thumbnail_file,
|
||||
extraction_title,
|
||||
# Update context with potentially new values
|
||||
context.extraction_service = extraction_service
|
||||
context.extraction_service_id = extraction_service_id
|
||||
context.extraction_title = extraction_title
|
||||
|
||||
# Process media files and create sound record
|
||||
sound_id = await self._process_media_files(
|
||||
context.extraction_id,
|
||||
context.extraction_url,
|
||||
context.extraction_title,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
)
|
||||
|
||||
# Create Sound record
|
||||
sound = await self._create_sound_record(
|
||||
final_audio_path,
|
||||
final_thumbnail_path,
|
||||
extraction_title,
|
||||
extraction_service,
|
||||
extraction_service_id,
|
||||
)
|
||||
# Complete extraction processing
|
||||
await self._complete_extraction(extraction, context, sound_id)
|
||||
|
||||
# Store sound_id early to avoid session detachment issues
|
||||
sound_id = sound.id
|
||||
if not sound_id:
|
||||
msg = "Sound creation failed - no ID returned"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
# Normalize the sound
|
||||
await self._normalize_sound(sound_id)
|
||||
|
||||
# Add to main playlist
|
||||
await self._add_to_main_playlist(sound_id, user_id)
|
||||
|
||||
# Update extraction with success
|
||||
await self.extraction_repo.update(
|
||||
extraction,
|
||||
{
|
||||
"status": "completed",
|
||||
"sound_id": sound_id,
|
||||
"error": None,
|
||||
},
|
||||
)
|
||||
|
||||
# Emit WebSocket event for completion
|
||||
await self._emit_extraction_event(
|
||||
user_id,
|
||||
{
|
||||
"extraction_id": extraction_id,
|
||||
"status": "completed",
|
||||
"title": extraction_title,
|
||||
"url": extraction_url,
|
||||
"sound_id": sound_id,
|
||||
},
|
||||
)
|
||||
|
||||
logger.info("Successfully processed extraction %d", extraction_id)
|
||||
logger.info("Successfully processed extraction %d", context.extraction_id)
|
||||
|
||||
# Get updated extraction to get latest timestamps
|
||||
updated_extraction = await self.extraction_repo.get_by_id(extraction_id)
|
||||
updated_extraction = await self.extraction_repo.get_by_id(
|
||||
context.extraction_id,
|
||||
)
|
||||
return {
|
||||
"id": extraction_id,
|
||||
"url": extraction_url,
|
||||
"id": context.extraction_id,
|
||||
"url": context.extraction_url,
|
||||
"service": extraction_service,
|
||||
"service_id": extraction_service_id,
|
||||
"title": extraction_title,
|
||||
"status": "completed",
|
||||
"error": None,
|
||||
"sound_id": sound_id,
|
||||
"user_id": user_id,
|
||||
"user_id": context.user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": (
|
||||
updated_extraction.created_at.isoformat()
|
||||
@@ -337,18 +436,18 @@ class ExtractionService:
|
||||
error_msg = str(e)
|
||||
logger.exception(
|
||||
"Failed to process extraction %d: %s",
|
||||
extraction_id,
|
||||
context.extraction_id,
|
||||
error_msg,
|
||||
)
|
||||
|
||||
# Emit WebSocket event for failure
|
||||
await self._emit_extraction_event(
|
||||
user_id,
|
||||
context.user_id,
|
||||
{
|
||||
"extraction_id": extraction_id,
|
||||
"extraction_id": context.extraction_id,
|
||||
"status": "failed",
|
||||
"title": extraction_title or "Extraction failed",
|
||||
"url": extraction_url,
|
||||
"title": context.extraction_title or "Extraction failed",
|
||||
"url": context.extraction_url,
|
||||
"error": error_msg,
|
||||
},
|
||||
)
|
||||
@@ -363,17 +462,19 @@ class ExtractionService:
|
||||
)
|
||||
|
||||
# Get updated extraction to get latest timestamps
|
||||
updated_extraction = await self.extraction_repo.get_by_id(extraction_id)
|
||||
updated_extraction = await self.extraction_repo.get_by_id(
|
||||
context.extraction_id,
|
||||
)
|
||||
return {
|
||||
"id": extraction_id,
|
||||
"url": extraction_url,
|
||||
"service": extraction_service,
|
||||
"service_id": extraction_service_id,
|
||||
"title": extraction_title,
|
||||
"id": context.extraction_id,
|
||||
"url": context.extraction_url,
|
||||
"service": context.extraction_service,
|
||||
"service_id": context.extraction_service_id,
|
||||
"title": context.extraction_title,
|
||||
"status": "failed",
|
||||
"error": error_msg,
|
||||
"sound_id": None,
|
||||
"user_id": user_id,
|
||||
"user_id": context.user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": (
|
||||
updated_extraction.created_at.isoformat()
|
||||
@@ -780,3 +881,174 @@ class ExtractionService:
|
||||
}
|
||||
for extraction, user in extraction_user_tuples
|
||||
]
|
||||
|
||||
async def delete_extraction(
|
||||
self,
|
||||
extraction_id: int,
|
||||
user_id: int | None = None,
|
||||
) -> bool:
|
||||
"""Delete an extraction and its associated sound and files.
|
||||
|
||||
Args:
|
||||
extraction_id: The ID of the extraction to delete
|
||||
user_id: Optional user ID for ownership verification (None for admin)
|
||||
|
||||
Returns:
|
||||
True if deletion was successful, False if extraction not found
|
||||
|
||||
Raises:
|
||||
ValueError: If user doesn't own the extraction (when user_id is provided)
|
||||
|
||||
"""
|
||||
logger.info(
|
||||
"Deleting extraction: %d (user: %s)",
|
||||
extraction_id,
|
||||
user_id or "admin",
|
||||
)
|
||||
|
||||
# Get the extraction record
|
||||
extraction = await self.extraction_repo.get_by_id(extraction_id)
|
||||
if not extraction:
|
||||
logger.warning("Extraction %d not found", extraction_id)
|
||||
return False
|
||||
|
||||
# Check ownership if user_id is provided (non-admin request)
|
||||
if user_id is not None and extraction.user_id != user_id:
|
||||
msg = "You don't have permission to delete this extraction"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Get associated sound if it exists and capture its attributes immediately
|
||||
sound_data = None
|
||||
sound_object = None
|
||||
if extraction.sound_id:
|
||||
sound_object = await self.sound_repo.get_by_id(extraction.sound_id)
|
||||
if sound_object:
|
||||
# Capture attributes immediately while session is valid
|
||||
sound_data = {
|
||||
"id": sound_object.id,
|
||||
"type": sound_object.type,
|
||||
"filename": sound_object.filename,
|
||||
"is_normalized": sound_object.is_normalized,
|
||||
"normalized_filename": sound_object.normalized_filename,
|
||||
"thumbnail": sound_object.thumbnail,
|
||||
}
|
||||
|
||||
try:
|
||||
# Delete the extraction record first
|
||||
await self.extraction_repo.delete(extraction)
|
||||
logger.info("Deleted extraction record: %d", extraction_id)
|
||||
|
||||
# Check if sound was in current playlist before deletion
|
||||
sound_was_in_current_playlist = False
|
||||
if sound_object and sound_data:
|
||||
sound_was_in_current_playlist = (
|
||||
await self._check_sound_in_current_playlist(sound_data["id"])
|
||||
)
|
||||
|
||||
# If there's an associated sound, delete it and its files
|
||||
if sound_object and sound_data:
|
||||
await self._delete_sound_and_files(sound_object, sound_data)
|
||||
logger.info(
|
||||
"Deleted associated sound: %d (%s)",
|
||||
sound_data["id"],
|
||||
sound_data["filename"],
|
||||
)
|
||||
|
||||
# Commit the transaction
|
||||
await self.session.commit()
|
||||
|
||||
# Reload player playlist if deleted sound was in current playlist
|
||||
if sound_was_in_current_playlist and sound_data:
|
||||
await self._reload_player_playlist()
|
||||
logger.info(
|
||||
"Reloaded player playlist after deleting sound %d "
|
||||
"from current playlist",
|
||||
sound_data["id"],
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# Rollback on any error
|
||||
await self.session.rollback()
|
||||
logger.exception("Failed to delete extraction %d", extraction_id)
|
||||
raise
|
||||
else:
|
||||
return True
|
||||
|
||||
async def _delete_sound_and_files(
|
||||
self,
|
||||
sound: Sound,
|
||||
sound_data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Delete a sound record and all its associated files."""
|
||||
# Collect all file paths to delete using captured attributes
|
||||
files_to_delete = []
|
||||
|
||||
# Original audio file
|
||||
if sound_data["type"] == "EXT": # Extracted sounds
|
||||
original_path = Path("sounds/originals/extracted") / sound_data["filename"]
|
||||
if original_path.exists():
|
||||
files_to_delete.append(original_path)
|
||||
|
||||
# Normalized file
|
||||
if sound_data["is_normalized"] and sound_data["normalized_filename"]:
|
||||
normalized_path = (
|
||||
Path("sounds/normalized/extracted") / sound_data["normalized_filename"]
|
||||
)
|
||||
if normalized_path.exists():
|
||||
files_to_delete.append(normalized_path)
|
||||
|
||||
# Thumbnail file
|
||||
if sound_data["thumbnail"]:
|
||||
thumbnail_path = (
|
||||
Path(settings.EXTRACTION_THUMBNAILS_DIR) / sound_data["thumbnail"]
|
||||
)
|
||||
if thumbnail_path.exists():
|
||||
files_to_delete.append(thumbnail_path)
|
||||
|
||||
# Delete the sound from database first
|
||||
await self.sound_repo.delete(sound)
|
||||
|
||||
# Delete all associated files
|
||||
for file_path in files_to_delete:
|
||||
try:
|
||||
file_path.unlink()
|
||||
logger.info("Deleted file: %s", file_path)
|
||||
except OSError:
|
||||
logger.exception("Failed to delete file %s", file_path)
|
||||
# Continue with other files even if one fails
|
||||
|
||||
async def _check_sound_in_current_playlist(self, sound_id: int) -> bool:
|
||||
"""Check if a sound is in the current playlist."""
|
||||
try:
|
||||
from app.repositories.playlist import PlaylistRepository # noqa: PLC0415
|
||||
|
||||
playlist_repo = PlaylistRepository(self.session)
|
||||
current_playlist = await playlist_repo.get_current_playlist()
|
||||
|
||||
if not current_playlist or not current_playlist.id:
|
||||
return False
|
||||
|
||||
return await playlist_repo.is_sound_in_playlist(
|
||||
current_playlist.id, sound_id,
|
||||
)
|
||||
except (ImportError, AttributeError, ValueError, RuntimeError) as e:
|
||||
logger.warning(
|
||||
"Failed to check if sound %s is in current playlist: %s",
|
||||
sound_id,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
async def _reload_player_playlist(self) -> None:
|
||||
"""Reload the player playlist after a sound is deleted."""
|
||||
try:
|
||||
# Import here to avoid circular import issues
|
||||
from app.services.player import get_player_service # noqa: PLC0415
|
||||
|
||||
player = get_player_service()
|
||||
await player.reload_playlist()
|
||||
logger.debug("Player playlist reloaded after sound deletion")
|
||||
except (ImportError, AttributeError, ValueError, RuntimeError) as e:
|
||||
# Don't fail the deletion operation if player reload fails
|
||||
logger.warning("Failed to reload player playlist: %s", e, exc_info=True)
|
||||
|
||||
@@ -201,7 +201,7 @@ class ExtractionProcessor:
|
||||
for extraction in stuck_extractions:
|
||||
try:
|
||||
await extraction_service.extraction_repo.update(
|
||||
extraction, {"status": "pending", "error": None}
|
||||
extraction, {"status": "pending", "error": None},
|
||||
)
|
||||
reset_count += 1
|
||||
logger.info(
|
||||
@@ -210,12 +210,13 @@ class ExtractionProcessor:
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to reset extraction %d", extraction.id
|
||||
"Failed to reset extraction %d", extraction.id,
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
logger.info(
|
||||
"Successfully reset %d stuck extractions from processing to pending",
|
||||
"Successfully reset %d stuck extractions from processing to "
|
||||
"pending",
|
||||
reset_count,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Sound scanner service for scanning and importing audio files."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TypedDict
|
||||
|
||||
@@ -13,6 +14,28 @@ from app.utils.audio import get_audio_duration, get_file_hash, get_file_size
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AudioFileInfo:
|
||||
"""Data class for audio file metadata."""
|
||||
|
||||
filename: str
|
||||
name: str
|
||||
duration: int
|
||||
size: int
|
||||
file_hash: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncContext:
|
||||
"""Context data for audio file synchronization."""
|
||||
|
||||
file_path: Path
|
||||
sound_type: str
|
||||
existing_sound_by_hash: dict | Sound | None
|
||||
existing_sound_by_filename: dict | Sound | None
|
||||
file_hash: str
|
||||
|
||||
|
||||
class FileInfo(TypedDict):
|
||||
"""Type definition for file information in scan results."""
|
||||
|
||||
@@ -56,7 +79,7 @@ class SoundScannerService:
|
||||
".m4a",
|
||||
".aac",
|
||||
}
|
||||
|
||||
|
||||
# Directory mappings for normalized files (matching sound_normalizer)
|
||||
self.normalized_directories = {
|
||||
"SDB": "sounds/normalized/soundboard",
|
||||
@@ -72,43 +95,416 @@ class SoundScannerService:
|
||||
name = name.replace("_", " ").replace("-", " ")
|
||||
# Capitalize words
|
||||
return " ".join(word.capitalize() for word in name.split())
|
||||
|
||||
|
||||
def _get_normalized_path(self, sound_type: str, filename: str) -> Path:
|
||||
"""Get the normalized file path for a sound."""
|
||||
directory = self.normalized_directories.get(sound_type, "sounds/normalized/other")
|
||||
directory = self.normalized_directories.get(
|
||||
sound_type, "sounds/normalized/other",
|
||||
)
|
||||
return Path(directory) / filename
|
||||
|
||||
def _rename_normalized_file(self, sound_type: str, old_filename: str, new_filename: str) -> bool:
|
||||
"""Rename a normalized file if it exists. Returns True if renamed, False if not found."""
|
||||
|
||||
def _rename_normalized_file(
|
||||
self, sound_type: str, old_filename: str, new_filename: str,
|
||||
) -> bool:
|
||||
"""Rename normalized file if exists. Returns True if renamed, else False."""
|
||||
old_path = self._get_normalized_path(sound_type, old_filename)
|
||||
new_path = self._get_normalized_path(sound_type, new_filename)
|
||||
|
||||
|
||||
if old_path.exists():
|
||||
try:
|
||||
# Ensure the directory exists
|
||||
new_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
old_path.rename(new_path)
|
||||
logger.info("Renamed normalized file: %s -> %s", old_path, new_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to rename normalized file %s -> %s: %s", old_path, new_path, e)
|
||||
except OSError:
|
||||
logger.exception(
|
||||
"Failed to rename normalized file %s -> %s",
|
||||
old_path,
|
||||
new_path,
|
||||
)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _delete_normalized_file(self, sound_type: str, filename: str) -> bool:
|
||||
"""Delete a normalized file if it exists. Returns True if deleted, False if not found."""
|
||||
"""Delete normalized file if exists. Returns True if deleted, else False."""
|
||||
normalized_path = self._get_normalized_path(sound_type, filename)
|
||||
|
||||
|
||||
if normalized_path.exists():
|
||||
try:
|
||||
normalized_path.unlink()
|
||||
logger.info("Deleted normalized file: %s", normalized_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete normalized file %s: %s", normalized_path, e)
|
||||
except OSError:
|
||||
logger.exception(
|
||||
"Failed to delete normalized file %s", normalized_path,
|
||||
)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_sound_attributes(self, sound_data: dict | Sound | None) -> dict:
|
||||
"""Extract attributes from sound data (dict or Sound object)."""
|
||||
if sound_data is None:
|
||||
return {}
|
||||
|
||||
if isinstance(sound_data, dict):
|
||||
return {
|
||||
"filename": sound_data.get("filename"),
|
||||
"name": sound_data.get("name"),
|
||||
"duration": sound_data.get("duration"),
|
||||
"size": sound_data.get("size"),
|
||||
"id": sound_data.get("id"),
|
||||
"object": sound_data.get("sound_object"),
|
||||
"type": sound_data.get("type"),
|
||||
"is_normalized": sound_data.get("is_normalized"),
|
||||
"normalized_filename": sound_data.get("normalized_filename"),
|
||||
}
|
||||
# Sound object (for tests)
|
||||
return {
|
||||
"filename": sound_data.filename,
|
||||
"name": sound_data.name,
|
||||
"duration": sound_data.duration,
|
||||
"size": sound_data.size,
|
||||
"id": sound_data.id,
|
||||
"object": sound_data,
|
||||
"type": sound_data.type,
|
||||
"is_normalized": sound_data.is_normalized,
|
||||
"normalized_filename": sound_data.normalized_filename,
|
||||
}
|
||||
|
||||
def _handle_unchanged_file(
|
||||
self,
|
||||
filename: str,
|
||||
existing_attrs: dict,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Handle unchanged file (same hash, same filename)."""
|
||||
logger.debug("Sound unchanged: %s", filename)
|
||||
results["skipped"] += 1
|
||||
results["files"].append({
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "file unchanged",
|
||||
"name": existing_attrs["name"],
|
||||
"duration": existing_attrs["duration"],
|
||||
"size": existing_attrs["size"],
|
||||
"id": existing_attrs["id"],
|
||||
"error": None,
|
||||
"changes": None,
|
||||
})
|
||||
|
||||
def _handle_duplicate_file(
|
||||
self,
|
||||
filename: str,
|
||||
existing_filename: str,
|
||||
file_hash: str,
|
||||
existing_attrs: dict,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Handle duplicate file (same hash, different filename)."""
|
||||
logger.warning(
|
||||
"Duplicate file detected: '%s' has same content as existing "
|
||||
"'%s' (hash: %s). Skipping duplicate file.",
|
||||
filename,
|
||||
existing_filename,
|
||||
file_hash[:8] + "...",
|
||||
)
|
||||
results["skipped"] += 1
|
||||
results["duplicates"] += 1
|
||||
results["files"].append({
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "duplicate content",
|
||||
"name": existing_attrs["name"],
|
||||
"duration": existing_attrs["duration"],
|
||||
"size": existing_attrs["size"],
|
||||
"id": existing_attrs["id"],
|
||||
"error": None,
|
||||
"changes": None,
|
||||
})
|
||||
|
||||
async def _handle_file_rename(
|
||||
self,
|
||||
file_info: AudioFileInfo,
|
||||
existing_attrs: dict,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Handle file rename (same hash, different filename)."""
|
||||
update_data = {
|
||||
"filename": file_info.filename,
|
||||
"name": file_info.name,
|
||||
}
|
||||
|
||||
# If the sound has a normalized file, rename it too
|
||||
if existing_attrs["is_normalized"] and existing_attrs["normalized_filename"]:
|
||||
old_normalized_base = Path(existing_attrs["normalized_filename"]).name
|
||||
new_normalized_base = (
|
||||
Path(file_info.filename).stem
|
||||
+ Path(existing_attrs["normalized_filename"]).suffix
|
||||
)
|
||||
|
||||
renamed = self._rename_normalized_file(
|
||||
existing_attrs["type"],
|
||||
old_normalized_base,
|
||||
new_normalized_base,
|
||||
)
|
||||
|
||||
if renamed:
|
||||
update_data["normalized_filename"] = new_normalized_base
|
||||
logger.info(
|
||||
"Renamed normalized file: %s -> %s",
|
||||
old_normalized_base,
|
||||
new_normalized_base,
|
||||
)
|
||||
|
||||
await self.sound_repo.update(existing_attrs["object"], update_data)
|
||||
logger.info(
|
||||
"Detected rename: %s -> %s (ID: %s)",
|
||||
existing_attrs["filename"],
|
||||
file_info.filename,
|
||||
existing_attrs["id"],
|
||||
)
|
||||
|
||||
# Build changes list
|
||||
changes = ["filename", "name"]
|
||||
if "normalized_filename" in update_data:
|
||||
changes.append("normalized_filename")
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append({
|
||||
"filename": file_info.filename,
|
||||
"status": "updated",
|
||||
"reason": "file was renamed",
|
||||
"name": file_info.name,
|
||||
"duration": existing_attrs["duration"],
|
||||
"size": existing_attrs["size"],
|
||||
"id": existing_attrs["id"],
|
||||
"error": None,
|
||||
"changes": changes,
|
||||
# Store old filename to prevent deletion
|
||||
"old_filename": existing_attrs["filename"],
|
||||
})
|
||||
|
||||
async def _handle_file_modification(
|
||||
self,
|
||||
file_info: AudioFileInfo,
|
||||
existing_attrs: dict,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Handle file modification (same filename, different hash)."""
|
||||
update_data = {
|
||||
"name": file_info.name,
|
||||
"duration": file_info.duration,
|
||||
"size": file_info.size,
|
||||
"hash": file_info.file_hash,
|
||||
}
|
||||
|
||||
await self.sound_repo.update(existing_attrs["object"], update_data)
|
||||
logger.info(
|
||||
"Updated modified sound: %s (ID: %s)",
|
||||
file_info.name,
|
||||
existing_attrs["id"],
|
||||
)
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append({
|
||||
"filename": file_info.filename,
|
||||
"status": "updated",
|
||||
"reason": "file was modified",
|
||||
"name": file_info.name,
|
||||
"duration": file_info.duration,
|
||||
"size": file_info.size,
|
||||
"id": existing_attrs["id"],
|
||||
"error": None,
|
||||
"changes": ["hash", "duration", "size", "name"],
|
||||
})
|
||||
|
||||
async def _handle_new_file(
|
||||
self,
|
||||
file_info: AudioFileInfo,
|
||||
sound_type: str,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Handle new file (neither hash nor filename exists)."""
|
||||
sound_data = {
|
||||
"type": sound_type,
|
||||
"name": file_info.name,
|
||||
"filename": file_info.filename,
|
||||
"duration": file_info.duration,
|
||||
"size": file_info.size,
|
||||
"hash": file_info.file_hash,
|
||||
"is_deletable": False,
|
||||
"is_music": False,
|
||||
"is_normalized": False,
|
||||
"play_count": 0,
|
||||
}
|
||||
|
||||
sound = await self.sound_repo.create(sound_data)
|
||||
logger.info("Added new sound: %s (ID: %s)", sound.name, sound.id)
|
||||
|
||||
results["added"] += 1
|
||||
results["files"].append({
|
||||
"filename": file_info.filename,
|
||||
"status": "added",
|
||||
"reason": None,
|
||||
"name": file_info.name,
|
||||
"duration": file_info.duration,
|
||||
"size": file_info.size,
|
||||
"id": sound.id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
})
|
||||
|
||||
async def _load_existing_sounds(self, sound_type: str) -> tuple[dict, dict]:
|
||||
"""Load existing sounds and create lookup dictionaries."""
|
||||
existing_sounds = await self.sound_repo.get_by_type(sound_type)
|
||||
|
||||
# Create lookup dictionaries with immediate attribute access
|
||||
# to avoid session detachment
|
||||
sounds_by_hash = {}
|
||||
sounds_by_filename = {}
|
||||
|
||||
for sound in existing_sounds:
|
||||
# Capture all attributes immediately while session is valid
|
||||
sound_data = {
|
||||
"id": sound.id,
|
||||
"hash": sound.hash,
|
||||
"filename": sound.filename,
|
||||
"name": sound.name,
|
||||
"duration": sound.duration,
|
||||
"size": sound.size,
|
||||
"type": sound.type,
|
||||
"is_normalized": sound.is_normalized,
|
||||
"normalized_filename": sound.normalized_filename,
|
||||
"sound_object": sound, # Keep reference for database operations
|
||||
}
|
||||
sounds_by_hash[sound.hash] = sound_data
|
||||
sounds_by_filename[sound.filename] = sound_data
|
||||
|
||||
return sounds_by_hash, sounds_by_filename
|
||||
|
||||
async def _process_audio_files(
|
||||
self,
|
||||
scan_path: Path,
|
||||
sound_type: str,
|
||||
sounds_by_hash: dict,
|
||||
sounds_by_filename: dict,
|
||||
results: ScanResults,
|
||||
) -> set[str]:
|
||||
"""Process all audio files in directory and return processed filenames."""
|
||||
# Get all audio files from directory
|
||||
audio_files = [
|
||||
f
|
||||
for f in scan_path.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in self.supported_extensions
|
||||
]
|
||||
|
||||
# Process each file in directory
|
||||
processed_filenames = set()
|
||||
for file_path in audio_files:
|
||||
results["scanned"] += 1
|
||||
filename = file_path.name
|
||||
processed_filenames.add(filename)
|
||||
|
||||
try:
|
||||
# Calculate hash first to enable hash-based lookup
|
||||
file_hash = get_file_hash(file_path)
|
||||
existing_sound_by_hash = sounds_by_hash.get(file_hash)
|
||||
existing_sound_by_filename = sounds_by_filename.get(filename)
|
||||
|
||||
# Create sync context
|
||||
sync_context = SyncContext(
|
||||
file_path=file_path,
|
||||
sound_type=sound_type,
|
||||
existing_sound_by_hash=existing_sound_by_hash,
|
||||
existing_sound_by_filename=existing_sound_by_filename,
|
||||
file_hash=file_hash,
|
||||
)
|
||||
|
||||
await self._sync_audio_file(sync_context, results)
|
||||
|
||||
# Check if this was a rename and mark old filename as processed
|
||||
if results["files"] and results["files"][-1].get("old_filename"):
|
||||
old_filename = results["files"][-1]["old_filename"]
|
||||
processed_filenames.add(old_filename)
|
||||
logger.debug("Marked old filename as processed: %s", old_filename)
|
||||
# Remove temporary tracking field from results
|
||||
del results["files"][-1]["old_filename"]
|
||||
except Exception as e:
|
||||
logger.exception("Error processing file %s", file_path)
|
||||
results["errors"] += 1
|
||||
results["files"].append({
|
||||
"filename": filename,
|
||||
"status": "error",
|
||||
"reason": None,
|
||||
"name": None,
|
||||
"duration": None,
|
||||
"size": None,
|
||||
"id": None,
|
||||
"error": str(e),
|
||||
"changes": None,
|
||||
})
|
||||
|
||||
return processed_filenames
|
||||
|
||||
async def _delete_missing_sounds(
|
||||
self,
|
||||
sounds_by_filename: dict,
|
||||
processed_filenames: set[str],
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Delete sounds that no longer exist in directory."""
|
||||
for filename, sound_data in sounds_by_filename.items():
|
||||
if filename not in processed_filenames:
|
||||
# Attributes already captured in sound_data dictionary
|
||||
sound_name = sound_data["name"]
|
||||
sound_duration = sound_data["duration"]
|
||||
sound_size = sound_data["size"]
|
||||
sound_id = sound_data["id"]
|
||||
sound_object = sound_data["sound_object"]
|
||||
sound_type = sound_data["type"]
|
||||
sound_is_normalized = sound_data["is_normalized"]
|
||||
sound_normalized_filename = sound_data["normalized_filename"]
|
||||
|
||||
try:
|
||||
# Delete the sound from database first
|
||||
await self.sound_repo.delete(sound_object)
|
||||
logger.info("Deleted sound no longer in directory: %s", filename)
|
||||
|
||||
# If the sound had a normalized file, delete it too
|
||||
if sound_is_normalized and sound_normalized_filename:
|
||||
normalized_base = Path(sound_normalized_filename).name
|
||||
self._delete_normalized_file(sound_type, normalized_base)
|
||||
|
||||
results["deleted"] += 1
|
||||
results["files"].append({
|
||||
"filename": filename,
|
||||
"status": "deleted",
|
||||
"reason": "file no longer exists",
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.exception("Error deleting sound %s", filename)
|
||||
results["errors"] += 1
|
||||
results["files"].append({
|
||||
"filename": filename,
|
||||
"status": "error",
|
||||
"reason": "failed to delete",
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": str(e),
|
||||
"changes": None,
|
||||
})
|
||||
|
||||
async def scan_directory(
|
||||
self,
|
||||
directory_path: str,
|
||||
@@ -138,368 +534,84 @@ class SoundScannerService:
|
||||
|
||||
logger.info("Starting sync of directory: %s", directory_path)
|
||||
|
||||
# Get all existing sounds of this type from database
|
||||
existing_sounds = await self.sound_repo.get_by_type(sound_type)
|
||||
# Load existing sounds from database
|
||||
sounds_by_hash, sounds_by_filename = await self._load_existing_sounds(
|
||||
sound_type,
|
||||
)
|
||||
|
||||
# Create lookup dictionaries with immediate attribute access
|
||||
# to avoid session detachment
|
||||
sounds_by_hash = {}
|
||||
sounds_by_filename = {}
|
||||
|
||||
for sound in existing_sounds:
|
||||
# Capture all attributes immediately while session is valid
|
||||
sound_data = {
|
||||
"id": sound.id,
|
||||
"hash": sound.hash,
|
||||
"filename": sound.filename,
|
||||
"name": sound.name,
|
||||
"duration": sound.duration,
|
||||
"size": sound.size,
|
||||
"type": sound.type,
|
||||
"is_normalized": sound.is_normalized,
|
||||
"normalized_filename": sound.normalized_filename,
|
||||
"sound_object": sound, # Keep reference for database operations
|
||||
}
|
||||
sounds_by_hash[sound.hash] = sound_data
|
||||
sounds_by_filename[sound.filename] = sound_data
|
||||
|
||||
# Get all audio files from directory
|
||||
audio_files = [
|
||||
f
|
||||
for f in scan_path.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in self.supported_extensions
|
||||
]
|
||||
|
||||
# Process each file in directory
|
||||
processed_filenames = set()
|
||||
for file_path in audio_files:
|
||||
results["scanned"] += 1
|
||||
filename = file_path.name
|
||||
processed_filenames.add(filename)
|
||||
|
||||
try:
|
||||
# Calculate hash first to enable hash-based lookup
|
||||
file_hash = get_file_hash(file_path)
|
||||
existing_sound_by_hash = sounds_by_hash.get(file_hash)
|
||||
existing_sound_by_filename = sounds_by_filename.get(filename)
|
||||
|
||||
await self._sync_audio_file(
|
||||
file_path,
|
||||
sound_type,
|
||||
existing_sound_by_hash,
|
||||
existing_sound_by_filename,
|
||||
file_hash,
|
||||
results,
|
||||
)
|
||||
|
||||
# Check if this was a rename operation and mark old filename as processed
|
||||
if results["files"] and results["files"][-1].get("old_filename"):
|
||||
old_filename = results["files"][-1]["old_filename"]
|
||||
processed_filenames.add(old_filename)
|
||||
logger.debug("Marked old filename as processed: %s", old_filename)
|
||||
# Remove temporary tracking field from results
|
||||
del results["files"][-1]["old_filename"]
|
||||
except Exception as e:
|
||||
logger.exception("Error processing file %s", file_path)
|
||||
results["errors"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "error",
|
||||
"reason": None,
|
||||
"name": None,
|
||||
"duration": None,
|
||||
"size": None,
|
||||
"id": None,
|
||||
"error": str(e),
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
# Process audio files in directory
|
||||
processed_filenames = await self._process_audio_files(
|
||||
scan_path,
|
||||
sound_type,
|
||||
sounds_by_hash,
|
||||
sounds_by_filename,
|
||||
results,
|
||||
)
|
||||
|
||||
# Delete sounds that no longer exist in directory
|
||||
for filename, sound_data in sounds_by_filename.items():
|
||||
if filename not in processed_filenames:
|
||||
# Attributes already captured in sound_data dictionary
|
||||
sound_name = sound_data["name"]
|
||||
sound_duration = sound_data["duration"]
|
||||
sound_size = sound_data["size"]
|
||||
sound_id = sound_data["id"]
|
||||
sound_object = sound_data["sound_object"]
|
||||
sound_type = sound_data["type"]
|
||||
sound_is_normalized = sound_data["is_normalized"]
|
||||
sound_normalized_filename = sound_data["normalized_filename"]
|
||||
|
||||
try:
|
||||
# Delete the sound from database first
|
||||
await self.sound_repo.delete(sound_object)
|
||||
logger.info("Deleted sound no longer in directory: %s", filename)
|
||||
|
||||
# If the sound had a normalized file, delete it too
|
||||
if sound_is_normalized and sound_normalized_filename:
|
||||
normalized_base = Path(sound_normalized_filename).name
|
||||
self._delete_normalized_file(sound_type, normalized_base)
|
||||
|
||||
results["deleted"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "deleted",
|
||||
"reason": "file no longer exists",
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Error deleting sound %s", filename)
|
||||
results["errors"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "error",
|
||||
"reason": "failed to delete",
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": str(e),
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
await self._delete_missing_sounds(
|
||||
sounds_by_filename,
|
||||
processed_filenames,
|
||||
results,
|
||||
)
|
||||
|
||||
logger.info("Sync completed: %s", results)
|
||||
return results
|
||||
|
||||
async def _sync_audio_file(
|
||||
self,
|
||||
file_path: Path,
|
||||
sound_type: str,
|
||||
existing_sound_by_hash: dict | Sound | None,
|
||||
existing_sound_by_filename: dict | Sound | None,
|
||||
file_hash: str,
|
||||
sync_context: SyncContext,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Sync a single audio file using hash-first identification strategy."""
|
||||
filename = file_path.name
|
||||
duration = get_audio_duration(file_path)
|
||||
size = get_file_size(file_path)
|
||||
filename = sync_context.file_path.name
|
||||
duration = get_audio_duration(sync_context.file_path)
|
||||
size = get_file_size(sync_context.file_path)
|
||||
name = self.extract_name_from_filename(filename)
|
||||
|
||||
# Extract attributes - handle both dict (normal) and Sound object (tests)
|
||||
existing_hash_filename = None
|
||||
existing_hash_name = None
|
||||
existing_hash_duration = None
|
||||
existing_hash_size = None
|
||||
existing_hash_id = None
|
||||
existing_hash_object = None
|
||||
existing_hash_type = None
|
||||
existing_hash_is_normalized = None
|
||||
existing_hash_normalized_filename = None
|
||||
# Create file info object
|
||||
file_info = AudioFileInfo(
|
||||
filename=filename,
|
||||
name=name,
|
||||
duration=duration,
|
||||
size=size,
|
||||
file_hash=sync_context.file_hash,
|
||||
)
|
||||
|
||||
if existing_sound_by_hash is not None:
|
||||
if isinstance(existing_sound_by_hash, dict):
|
||||
existing_hash_filename = existing_sound_by_hash["filename"]
|
||||
existing_hash_name = existing_sound_by_hash["name"]
|
||||
existing_hash_duration = existing_sound_by_hash["duration"]
|
||||
existing_hash_size = existing_sound_by_hash["size"]
|
||||
existing_hash_id = existing_sound_by_hash["id"]
|
||||
existing_hash_object = existing_sound_by_hash["sound_object"]
|
||||
existing_hash_type = existing_sound_by_hash["type"]
|
||||
existing_hash_is_normalized = existing_sound_by_hash["is_normalized"]
|
||||
existing_hash_normalized_filename = existing_sound_by_hash["normalized_filename"]
|
||||
else: # Sound object (for tests)
|
||||
existing_hash_filename = existing_sound_by_hash.filename
|
||||
existing_hash_name = existing_sound_by_hash.name
|
||||
existing_hash_duration = existing_sound_by_hash.duration
|
||||
existing_hash_size = existing_sound_by_hash.size
|
||||
existing_hash_id = existing_sound_by_hash.id
|
||||
existing_hash_object = existing_sound_by_hash
|
||||
existing_hash_type = existing_sound_by_hash.type
|
||||
existing_hash_is_normalized = existing_sound_by_hash.is_normalized
|
||||
existing_hash_normalized_filename = existing_sound_by_hash.normalized_filename
|
||||
|
||||
existing_filename_id = None
|
||||
existing_filename_object = None
|
||||
if existing_sound_by_filename is not None:
|
||||
if isinstance(existing_sound_by_filename, dict):
|
||||
existing_filename_id = existing_sound_by_filename["id"]
|
||||
existing_filename_object = existing_sound_by_filename["sound_object"]
|
||||
else: # Sound object (for tests)
|
||||
existing_filename_id = existing_sound_by_filename.id
|
||||
existing_filename_object = existing_sound_by_filename
|
||||
# Extract attributes from existing sounds
|
||||
hash_attrs = self._extract_sound_attributes(sync_context.existing_sound_by_hash)
|
||||
filename_attrs = self._extract_sound_attributes(
|
||||
sync_context.existing_sound_by_filename,
|
||||
)
|
||||
|
||||
# Hash-first identification strategy
|
||||
if existing_sound_by_hash is not None:
|
||||
if sync_context.existing_sound_by_hash is not None:
|
||||
# Content exists in database (same hash)
|
||||
if existing_hash_filename == filename:
|
||||
if hash_attrs["filename"] == filename:
|
||||
# Same hash, same filename - file unchanged
|
||||
logger.debug("Sound unchanged: %s", filename)
|
||||
results["skipped"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "file unchanged",
|
||||
"name": existing_hash_name,
|
||||
"duration": existing_hash_duration,
|
||||
"size": existing_hash_size,
|
||||
"id": existing_hash_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
self._handle_unchanged_file(filename, hash_attrs, results)
|
||||
else:
|
||||
# Same hash, different filename - could be rename or duplicate
|
||||
# Check if both files exist to determine if it's a duplicate
|
||||
old_file_path = file_path.parent / existing_hash_filename
|
||||
old_file_path = sync_context.file_path.parent / hash_attrs["filename"]
|
||||
if old_file_path.exists():
|
||||
# Both files exist with same hash - this is a duplicate
|
||||
logger.warning(
|
||||
"Duplicate file detected: '%s' has same content as existing '%s' (hash: %s). "
|
||||
"Skipping duplicate file.",
|
||||
self._handle_duplicate_file(
|
||||
filename,
|
||||
existing_hash_filename,
|
||||
file_hash[:8] + "...",
|
||||
)
|
||||
|
||||
results["skipped"] += 1
|
||||
results["duplicates"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "duplicate content",
|
||||
"name": existing_hash_name,
|
||||
"duration": existing_hash_duration,
|
||||
"size": existing_hash_size,
|
||||
"id": existing_hash_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
hash_attrs["filename"],
|
||||
sync_context.file_hash,
|
||||
hash_attrs,
|
||||
results,
|
||||
)
|
||||
else:
|
||||
# Old file doesn't exist - this is a genuine rename
|
||||
update_data = {
|
||||
"filename": filename,
|
||||
"name": name,
|
||||
}
|
||||
|
||||
# If the sound has a normalized file, rename it too
|
||||
if existing_hash_is_normalized and existing_hash_normalized_filename:
|
||||
# Extract base filename without path for normalized file
|
||||
old_normalized_base = Path(existing_hash_normalized_filename).name
|
||||
new_normalized_base = Path(filename).stem + Path(existing_hash_normalized_filename).suffix
|
||||
|
||||
renamed = self._rename_normalized_file(
|
||||
existing_hash_type,
|
||||
old_normalized_base,
|
||||
new_normalized_base
|
||||
)
|
||||
|
||||
if renamed:
|
||||
update_data["normalized_filename"] = new_normalized_base
|
||||
logger.info(
|
||||
"Renamed normalized file: %s -> %s",
|
||||
old_normalized_base,
|
||||
new_normalized_base
|
||||
)
|
||||
await self._handle_file_rename(file_info, hash_attrs, results)
|
||||
|
||||
await self.sound_repo.update(existing_hash_object, update_data)
|
||||
logger.info(
|
||||
"Detected rename: %s -> %s (ID: %s)",
|
||||
existing_hash_filename,
|
||||
filename,
|
||||
existing_hash_id,
|
||||
)
|
||||
|
||||
# Build changes list
|
||||
changes = ["filename", "name"]
|
||||
if "normalized_filename" in update_data:
|
||||
changes.append("normalized_filename")
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "updated",
|
||||
"reason": "file was renamed",
|
||||
"name": name,
|
||||
"duration": existing_hash_duration,
|
||||
"size": existing_hash_size,
|
||||
"id": existing_hash_id,
|
||||
"error": None,
|
||||
"changes": changes,
|
||||
# Store old filename to prevent deletion
|
||||
"old_filename": existing_hash_filename,
|
||||
},
|
||||
)
|
||||
|
||||
elif existing_sound_by_filename is not None:
|
||||
elif sync_context.existing_sound_by_filename is not None:
|
||||
# Same filename but different hash - file was modified
|
||||
update_data = {
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"hash": file_hash,
|
||||
}
|
||||
|
||||
await self.sound_repo.update(existing_filename_object, update_data)
|
||||
logger.info(
|
||||
"Updated modified sound: %s (ID: %s)",
|
||||
name,
|
||||
existing_filename_id,
|
||||
)
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "updated",
|
||||
"reason": "file was modified",
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"id": existing_filename_id,
|
||||
"error": None,
|
||||
"changes": ["hash", "duration", "size", "name"],
|
||||
},
|
||||
)
|
||||
|
||||
await self._handle_file_modification(file_info, filename_attrs, results)
|
||||
else:
|
||||
# New file - neither hash nor filename exists
|
||||
sound_data = {
|
||||
"type": sound_type,
|
||||
"name": name,
|
||||
"filename": filename,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"hash": file_hash,
|
||||
"is_deletable": False,
|
||||
"is_music": False,
|
||||
"is_normalized": False,
|
||||
"play_count": 0,
|
||||
}
|
||||
|
||||
sound = await self.sound_repo.create(sound_data)
|
||||
logger.info("Added new sound: %s (ID: %s)", sound.name, sound.id)
|
||||
|
||||
results["added"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "added",
|
||||
"reason": None,
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"id": sound.id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
await self._handle_new_file(file_info, sync_context.sound_type, results)
|
||||
|
||||
async def scan_soundboard_directory(self) -> ScanResults:
|
||||
"""Sync the default soundboard directory."""
|
||||
|
||||
Reference in New Issue
Block a user