feat: Enhance SoundScannerService with duplicate detection and normalized file handling
Some checks failed
Backend CI / lint (push) Failing after 4m52s
Backend CI / test (push) Failing after 4m39s

This commit is contained in:
JSC
2025-08-25 12:33:10 +02:00
parent da66516bb3
commit d3ce17f10d
2 changed files with 373 additions and 27 deletions

View File

@@ -288,6 +288,59 @@ class TestSoundScannerService:
scanner_service.sound_repo.update.assert_called_once()
scanner_service.sound_repo.delete.assert_not_called()
@pytest.mark.asyncio
async def test_scan_directory_duplicate_detection(self, scanner_service, mock_session) -> None:
"""Test that duplicate files (same hash) are detected and logged."""
# Create a mock existing sound
existing_sound = Sound(
id=1,
type="SDB",
name="Original Song",
filename="original.mp3",
duration=120000,
size=1024,
hash="same_hash",
)
# Mock the repository
scanner_service.sound_repo.get_by_type = AsyncMock(return_value=[existing_sound])
scanner_service.sound_repo.update = AsyncMock()
# Create temporary directory with both original and duplicate files
import tempfile
import os
with tempfile.TemporaryDirectory() as temp_dir:
# Create both files (simulating duplicate content)
original_path = os.path.join(temp_dir, "original.mp3")
duplicate_path = os.path.join(temp_dir, "duplicate.mp3")
with open(original_path, "wb") as f:
f.write(b"test audio content")
with open(duplicate_path, "wb") as f:
f.write(b"test audio content") # Same content = same hash
# Mock file operations
with (
patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
results = await scanner_service.scan_directory(temp_dir, "SDB")
# Should have 1 unchanged (original) and 1 skipped (duplicate)
assert results["skipped"] == 2 # Both files have same hash, both skipped
assert results["duplicates"] == 1 # One duplicate detected
assert results["updated"] == 0
assert results["added"] == 0
assert results["deleted"] == 0
# Check that duplicate was properly detected
skipped_files = [f for f in results["files"] if f["status"] == "skipped"]
duplicate_file = next((f for f in skipped_files if "duplicate" in f["reason"]), None)
assert duplicate_file is not None
assert duplicate_file["reason"] == "duplicate content"
@pytest.mark.asyncio
async def test_sync_audio_file_new(self, scanner_service) -> None:
"""Test syncing a new audio file."""
@@ -477,3 +530,175 @@ class TestSoundScannerService:
) # All sounds are set to not deletable
finally:
temp_path.unlink()
@pytest.mark.asyncio
async def test_sync_audio_file_rename_with_normalized_file(
self, test_session, scanner_service
):
"""Test that renaming a sound file also renames its normalized file."""
# Create temporary directories for testing
from pathlib import Path
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
# Set up the scanner's normalized directories to use temp dir
scanner_service.normalized_directories = {
"SDB": str(temp_dir_path / "normalized" / "soundboard")
}
# Create the normalized directory
normalized_dir = temp_dir_path / "normalized" / "soundboard"
normalized_dir.mkdir(parents=True)
# Create the old normalized file
old_normalized_file = normalized_dir / "old_sound.mp3"
old_normalized_file.write_text("normalized audio content")
# Create the audio files (they need to exist for the scanner)
old_path = temp_dir_path / "old_sound.mp3"
new_path = temp_dir_path / "new_sound.mp3"
# Create a dummy audio file for the new path
new_path.write_bytes(b"fake audio data for testing")
# Mock the audio utility functions since we're using fake files
from unittest.mock import patch
with patch('app.services.sound_scanner.get_audio_duration', return_value=60000), \
patch('app.services.sound_scanner.get_file_size', return_value=2048):
# Create existing sound with normalized file info
existing_sound = Sound(
id=1,
type="SDB",
name="Old Sound",
filename="old_sound.mp3",
duration=60000,
size=2048,
hash="test_hash",
is_normalized=True,
normalized_filename="old_sound.mp3",
normalized_duration=60000,
normalized_size=1024,
normalized_hash="normalized_hash",
play_count=5,
is_deletable=False,
is_music=False
)
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"duplicates": 0,
"errors": 0,
"files": [],
}
# Mock the sound repository update
scanner_service.sound_repo.update = AsyncMock()
# Simulate rename detection by calling _sync_audio_file
await scanner_service._sync_audio_file(
new_path,
"SDB",
existing_sound, # existing_sound_by_hash (same hash, different filename)
None, # existing_sound_by_filename (no file with new name exists)
"test_hash",
results,
)
# Verify the results
assert results["updated"] == 1
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "updated"
assert results["files"][0]["reason"] == "file was renamed"
assert "normalized_filename" in results["files"][0]["changes"]
# Verify sound_repo.update was called with normalized filename update
update_call = scanner_service.sound_repo.update.call_args
update_data = update_call[0][1] # Second argument is the update data
assert "filename" in update_data
assert "name" in update_data
assert "normalized_filename" in update_data
assert update_data["normalized_filename"] == "new_sound.mp3"
# Verify the normalized file was actually renamed
new_normalized_file = normalized_dir / "new_sound.mp3"
assert new_normalized_file.exists()
assert not old_normalized_file.exists()
assert new_normalized_file.read_text() == "normalized audio content"
@pytest.mark.asyncio
async def test_scan_directory_delete_with_normalized_file(
self, test_session, scanner_service
):
"""Test that deleting a sound also deletes its normalized file."""
# Create temporary directories for testing
from pathlib import Path
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
scan_dir = temp_dir_path / "sounds"
scan_dir.mkdir()
# Set up the scanner's normalized directories to use temp dir
scanner_service.normalized_directories = {
"SDB": str(temp_dir_path / "normalized" / "soundboard")
}
# Create the normalized directory and file
normalized_dir = temp_dir_path / "normalized" / "soundboard"
normalized_dir.mkdir(parents=True)
normalized_file = normalized_dir / "test_sound.mp3"
normalized_file.write_text("normalized audio content")
# Create existing sound with normalized file info
existing_sound = Sound(
id=1,
type="SDB",
name="Test Sound",
filename="test_sound.mp3",
duration=60000,
size=2048,
hash="test_hash",
is_normalized=True,
normalized_filename="test_sound.mp3",
normalized_duration=60000,
normalized_size=1024,
normalized_hash="normalized_hash",
play_count=5,
is_deletable=False,
is_music=False
)
# Mock sound repository methods
scanner_service.sound_repo.get_by_type = AsyncMock(return_value=[existing_sound])
scanner_service.sound_repo.delete = AsyncMock()
# Mock audio utility functions
from unittest.mock import patch
with patch('app.services.sound_scanner.get_audio_duration'), \
patch('app.services.sound_scanner.get_file_size'):
# Run scan with empty directory (should trigger deletion)
results = await scanner_service.scan_directory(str(scan_dir), "SDB")
# Verify the results
assert results["deleted"] == 1
assert results["added"] == 0
assert results["updated"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "deleted"
assert results["files"][0]["reason"] == "file no longer exists"
# Verify sound_repo.delete was called
scanner_service.sound_repo.delete.assert_called_once_with(existing_sound)
# Verify the normalized file was actually deleted
assert not normalized_file.exists()