diff --git a/app/models/sound.py b/app/models/sound.py index cda5028..fc28ae8 100644 --- a/app/models/sound.py +++ b/app/models/sound.py @@ -34,7 +34,11 @@ class Sound(BaseModel, table=True): __table_args__ = (UniqueConstraint("hash", name="uq_sound_hash"),) # relationships - playlist_sounds: list["PlaylistSound"] = Relationship(back_populates="sound") + playlist_sounds: list["PlaylistSound"] = Relationship( + back_populates="sound", cascade_delete=True + ) extractions: list["Extraction"] = Relationship(back_populates="sound") - play_history: list["SoundPlayed"] = Relationship(back_populates="sound") + play_history: list["SoundPlayed"] = Relationship( + back_populates="sound", cascade_delete=True + ) favorites: list["Favorite"] = Relationship(back_populates="sound") diff --git a/app/services/sound_scanner.py b/app/services/sound_scanner.py index 04271ca..1c28a81 100644 --- a/app/services/sound_scanner.py +++ b/app/services/sound_scanner.py @@ -95,7 +95,25 @@ class SoundScannerService: # Get all existing sounds of this type from database existing_sounds = await self.sound_repo.get_by_type(sound_type) - sounds_by_filename = {sound.filename: sound for sound in existing_sounds} + + # Create lookup dictionaries with immediate attribute access + # to avoid session detachment + sounds_by_hash = {} + sounds_by_filename = {} + + for sound in existing_sounds: + # Capture all attributes immediately while session is valid + sound_data = { + "id": sound.id, + "hash": sound.hash, + "filename": sound.filename, + "name": sound.name, + "duration": sound.duration, + "size": sound.size, + "sound_object": sound, # Keep reference for database operations + } + sounds_by_hash[sound.hash] = sound_data + sounds_by_filename[sound.filename] = sound_data # Get all audio files from directory audio_files = [ @@ -112,12 +130,27 @@ class SoundScannerService: processed_filenames.add(filename) try: + # Calculate hash first to enable hash-based lookup + file_hash = get_file_hash(file_path) + existing_sound_by_hash = sounds_by_hash.get(file_hash) + existing_sound_by_filename = sounds_by_filename.get(filename) + await self._sync_audio_file( file_path, sound_type, - sounds_by_filename.get(filename), + existing_sound_by_hash, + existing_sound_by_filename, + file_hash, results, ) + + # Check if this was a rename operation and mark old filename as processed + if results["files"] and results["files"][-1].get("old_filename"): + old_filename = results["files"][-1]["old_filename"] + processed_filenames.add(old_filename) + logger.debug("Marked old filename as processed: %s", old_filename) + # Remove temporary tracking field from results + del results["files"][-1]["old_filename"] except Exception as e: logger.exception("Error processing file %s", file_path) results["errors"] += 1 @@ -136,10 +169,17 @@ class SoundScannerService: ) # Delete sounds that no longer exist in directory - for filename, sound in sounds_by_filename.items(): + for filename, sound_data in sounds_by_filename.items(): if filename not in processed_filenames: + # Attributes already captured in sound_data dictionary + sound_name = sound_data["name"] + sound_duration = sound_data["duration"] + sound_size = sound_data["size"] + sound_id = sound_data["id"] + sound_object = sound_data["sound_object"] + try: - await self.sound_repo.delete(sound) + await self.sound_repo.delete(sound_object) logger.info("Deleted sound no longer in directory: %s", filename) results["deleted"] += 1 results["files"].append( @@ -147,10 +187,10 @@ class SoundScannerService: "filename": filename, "status": "deleted", "reason": "file no longer exists", - "name": sound.name, - "duration": sound.duration, - "size": sound.size, - "id": sound.id, + "name": sound_name, + "duration": sound_duration, + "size": sound_size, + "id": sound_id, "error": None, "changes": None, }, @@ -163,10 +203,10 @@ class SoundScannerService: "filename": filename, "status": "error", "reason": "failed to delete", - "name": sound.name, - "duration": sound.duration, - "size": sound.size, - "id": sound.id, + "name": sound_name, + "duration": sound_duration, + "size": sound_size, + "id": sound_id, "error": str(e), "changes": None, }, @@ -179,18 +219,136 @@ class SoundScannerService: self, file_path: Path, sound_type: str, - existing_sound: Sound | None, + existing_sound_by_hash: dict | Sound | None, + existing_sound_by_filename: dict | Sound | None, + file_hash: str, results: ScanResults, ) -> None: - """Sync a single audio file (add new or update existing).""" + """Sync a single audio file using hash-first identification strategy.""" filename = file_path.name - file_hash = get_file_hash(file_path) duration = get_audio_duration(file_path) size = get_file_size(file_path) name = self.extract_name_from_filename(filename) - if existing_sound is None: - # Add new sound + # Extract attributes - handle both dict (normal) and Sound object (tests) + existing_hash_filename = None + existing_hash_name = None + existing_hash_duration = None + existing_hash_size = None + existing_hash_id = None + existing_hash_object = None + + if existing_sound_by_hash is not None: + if isinstance(existing_sound_by_hash, dict): + existing_hash_filename = existing_sound_by_hash["filename"] + existing_hash_name = existing_sound_by_hash["name"] + existing_hash_duration = existing_sound_by_hash["duration"] + existing_hash_size = existing_sound_by_hash["size"] + existing_hash_id = existing_sound_by_hash["id"] + existing_hash_object = existing_sound_by_hash["sound_object"] + else: # Sound object (for tests) + existing_hash_filename = existing_sound_by_hash.filename + existing_hash_name = existing_sound_by_hash.name + existing_hash_duration = existing_sound_by_hash.duration + existing_hash_size = existing_sound_by_hash.size + existing_hash_id = existing_sound_by_hash.id + existing_hash_object = existing_sound_by_hash + + existing_filename_id = None + existing_filename_object = None + if existing_sound_by_filename is not None: + if isinstance(existing_sound_by_filename, dict): + existing_filename_id = existing_sound_by_filename["id"] + existing_filename_object = existing_sound_by_filename["sound_object"] + else: # Sound object (for tests) + existing_filename_id = existing_sound_by_filename.id + existing_filename_object = existing_sound_by_filename + + # Hash-first identification strategy + if existing_sound_by_hash is not None: + # Content exists in database (same hash) + if existing_hash_filename == filename: + # Same hash, same filename - file unchanged + logger.debug("Sound unchanged: %s", filename) + results["skipped"] += 1 + results["files"].append( + { + "filename": filename, + "status": "skipped", + "reason": "file unchanged", + "name": existing_hash_name, + "duration": existing_hash_duration, + "size": existing_hash_size, + "id": existing_hash_id, + "error": None, + "changes": None, + }, + ) + else: + # Same hash, different filename - file was renamed + update_data = { + "filename": filename, + "name": name, + } + + await self.sound_repo.update(existing_hash_object, update_data) + logger.info( + "Detected rename: %s -> %s (ID: %s)", + existing_hash_filename, + filename, + existing_hash_id, + ) + + results["updated"] += 1 + results["files"].append( + { + "filename": filename, + "status": "updated", + "reason": "file was renamed", + "name": name, + "duration": existing_hash_duration, + "size": existing_hash_size, + "id": existing_hash_id, + "error": None, + "changes": ["filename", "name"], + # Store old filename to prevent deletion + "old_filename": existing_hash_filename, + }, + ) + + elif existing_sound_by_filename is not None: + # Same filename but different hash - file was modified + update_data = { + "name": name, + "duration": duration, + "size": size, + "hash": file_hash, + } + + await self.sound_repo.update(existing_filename_object, update_data) + logger.info( + "Updated modified sound: %s (ID: %s)", + name, + existing_filename_id, + ) + + results["updated"] += 1 + results["files"].append( + { + "filename": filename, + "status": "updated", + "reason": "file was modified", + "name": name, + "duration": duration, + "size": size, + "id": existing_filename_id, + "error": None, + "changes": ["hash", "duration", "size", "name"], + }, + ) + + else: + # New file - neither hash nor filename exists sound_data = { "type": sound_type, "name": name, @@ -222,51 +380,6 @@ class SoundScannerService: }, ) - elif existing_sound.hash != file_hash: - # Update existing sound (file was modified) - update_data = { - "name": name, - "duration": duration, - "size": size, - "hash": file_hash, - } - - await self.sound_repo.update(existing_sound, update_data) - logger.info("Updated modified sound: %s (ID: %s)", name, existing_sound.id) - - results["updated"] += 1 - results["files"].append( - { - "filename": filename, - "status": "updated", - "reason": "file was modified", - "name": name, - "duration": duration, - "size": size, - "id": existing_sound.id, - "error": None, - "changes": ["hash", "duration", "size", "name"], - }, - ) - - else: - # File unchanged, skip - logger.debug("Sound unchanged: %s", filename) - results["skipped"] += 1 - results["files"].append( - { - "filename": filename, - "status": "skipped", - "reason": "file unchanged", - "name": existing_sound.name, - "duration": existing_sound.duration, - "size": existing_sound.size, - "id": existing_sound.id, - "error": None, - "changes": None, - }, - ) - async def scan_soundboard_directory(self) -> ScanResults: """Sync the default soundboard directory.""" soundboard_path = "sounds/originals/soundboard" diff --git a/tests/services/test_sound_scanner.py b/tests/services/test_sound_scanner.py index b7d8e5a..258dc70 100644 --- a/tests/services/test_sound_scanner.py +++ b/tests/services/test_sound_scanner.py @@ -152,10 +152,15 @@ class TestSoundScannerService: "errors": 0, "files": [], } + # Set the existing sound filename to match temp file for "unchanged" test + existing_sound.filename = temp_path.name + await scanner_service._sync_audio_file( temp_path, "SDB", - existing_sound, + existing_sound, # existing_sound_by_hash (same hash) + None, # existing_sound_by_filename (no conflict) + "same_hash", results, ) @@ -168,6 +173,121 @@ class TestSoundScannerService: finally: temp_path.unlink() + @pytest.mark.asyncio + async def test_sync_audio_file_renamed(self, scanner_service) -> None: + """Test syncing file that was renamed (same hash, different filename).""" + # Existing sound with same hash but different filename + existing_sound = Sound( + id=1, + type="SDB", + name="Old Name", + filename="old_name.mp3", + duration=120000, + size=1024, + hash="same_hash", + ) + + scanner_service.sound_repo.update = AsyncMock(return_value=existing_sound) + + # Mock file operations to return same hash + with ( + patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"), + patch("app.services.sound_scanner.get_audio_duration", return_value=120000), + patch("app.services.sound_scanner.get_file_size", return_value=1024), + ): + # Create a temporary file with different name + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: + temp_path = Path(f.name) + + try: + results = { + "scanned": 0, + "added": 0, + "updated": 0, + "deleted": 0, + "skipped": 0, + "errors": 0, + "files": [], + } + + await scanner_service._sync_audio_file( + temp_path, + "SDB", + existing_sound, # existing_sound_by_hash (same hash) + None, # existing_sound_by_filename (different filename) + "same_hash", + results, + ) + + # Should be marked as updated (renamed) + assert results["updated"] == 1 + assert results["added"] == 0 + assert results["skipped"] == 0 + assert len(results["files"]) == 1 + assert results["files"][0]["status"] == "updated" + assert results["files"][0]["reason"] == "file was renamed" + assert results["files"][0]["changes"] == ["filename", "name"] + + # Verify update was called with new filename + scanner_service.sound_repo.update.assert_called_once() + call_args = scanner_service.sound_repo.update.call_args[0][1] # update_data + assert call_args["filename"] == temp_path.name + + finally: + temp_path.unlink() + + @pytest.mark.asyncio + async def test_scan_directory_rename_no_delete(self, scanner_service, mock_session) -> None: + """Test that renamed files are not deleted (regression test).""" + # Create a mock existing sound that will be "renamed" + existing_sound = Sound( + id=1, + type="SDB", + name="Old Name", + filename="old_name.mp3", + duration=120000, + size=1024, + hash="same_hash", + ) + + # Mock the repository to return the existing sound + scanner_service.sound_repo.get_by_type = AsyncMock(return_value=[existing_sound]) + scanner_service.sound_repo.update = AsyncMock() + scanner_service.sound_repo.delete = AsyncMock() + + # Create temporary directory with renamed file + import tempfile + import os + + with tempfile.TemporaryDirectory() as temp_dir: + # Create the "renamed" file (same hash, different name) + new_file_path = os.path.join(temp_dir, "new_name.mp3") + with open(new_file_path, "wb") as f: + f.write(b"test audio content") # This will produce consistent hash + + # Mock file operations to return same hash + with ( + patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"), + patch("app.services.sound_scanner.get_audio_duration", return_value=120000), + patch("app.services.sound_scanner.get_file_size", return_value=1024), + ): + results = await scanner_service.scan_directory(temp_dir, "SDB") + + # Should have detected one renamed file + assert results["updated"] == 1 + assert results["deleted"] == 0 # This is the key assertion - no deletion! + assert results["added"] == 0 + assert len(results["files"]) == 1 + + # Verify it was marked as renamed + file_result = results["files"][0] + assert file_result["status"] == "updated" + assert file_result["reason"] == "file was renamed" + + # Verify update was called but delete was NOT called + scanner_service.sound_repo.update.assert_called_once() + scanner_service.sound_repo.delete.assert_not_called() + @pytest.mark.asyncio async def test_sync_audio_file_new(self, scanner_service) -> None: """Test syncing a new audio file.""" @@ -202,7 +322,14 @@ class TestSoundScannerService: "errors": 0, "files": [], } - await scanner_service._sync_audio_file(temp_path, "SDB", None, results) + await scanner_service._sync_audio_file( + temp_path, + "SDB", + None, # existing_sound_by_hash + None, # existing_sound_by_filename + "test_hash", + results, + ) assert results["added"] == 1 assert results["skipped"] == 0 @@ -262,7 +389,9 @@ class TestSoundScannerService: await scanner_service._sync_audio_file( temp_path, "SDB", - existing_sound, + None, # existing_sound_by_hash (different hash) + existing_sound, # existing_sound_by_filename + "new_hash", results, ) @@ -325,7 +454,9 @@ class TestSoundScannerService: await scanner_service._sync_audio_file( temp_path, "CUSTOM", - None, + None, # existing_sound_by_hash + None, # existing_sound_by_filename + "custom_hash", results, )