Files
sdb2-backend/tests/services/test_sound_scanner.py
JSC da66516bb3
Some checks failed
Backend CI / lint (push) Failing after 4m55s
Backend CI / test (push) Failing after 4m32s
feat: Implement hash-first identification strategy in audio file syncing and enhance tests for renamed files
2025-08-25 11:56:07 +02:00

480 lines
19 KiB
Python

"""Tests for sound scanner service."""
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, Mock, patch
import pytest
from sqlmodel.ext.asyncio.session import AsyncSession
from app.models.sound import Sound
from app.services.sound_scanner import SoundScannerService
class TestSoundScannerService:
"""Test sound scanner service."""
@pytest.fixture
def mock_session(self):
"""Create a mock session."""
return Mock(spec=AsyncSession)
@pytest.fixture
def scanner_service(self, mock_session):
"""Create a scanner service with mock session."""
return SoundScannerService(mock_session)
def test_init(self, scanner_service) -> None:
"""Test scanner service initialization."""
assert scanner_service.session is not None
assert scanner_service.sound_repo is not None
assert len(scanner_service.supported_extensions) > 0
assert ".mp3" in scanner_service.supported_extensions
assert ".wav" in scanner_service.supported_extensions
def test_get_file_hash(self, scanner_service) -> None:
"""Test file hash calculation."""
# Create a temporary file
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write("test content")
temp_path = Path(f.name)
try:
from app.utils.audio import get_file_hash
hash_value = get_file_hash(temp_path)
assert len(hash_value) == 64 # SHA-256 hash length
assert isinstance(hash_value, str)
finally:
temp_path.unlink()
def test_get_file_size(self, scanner_service) -> None:
"""Test file size calculation."""
# Create a temporary file
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write("test content for size calculation")
temp_path = Path(f.name)
try:
from app.utils.audio import get_file_size
size = get_file_size(temp_path)
assert size > 0
assert isinstance(size, int)
finally:
temp_path.unlink()
def test_extract_name_from_filename(self, scanner_service) -> None:
"""Test name extraction from filename."""
test_cases = [
("hello_world.mp3", "Hello World"),
("my-awesome-sound.wav", "My Awesome Sound"),
("TEST_FILE_NAME.opus", "Test File Name"),
("single.mp3", "Single"),
("multiple_words_here.flac", "Multiple Words Here"),
]
for filename, expected_name in test_cases:
result = scanner_service.extract_name_from_filename(filename)
assert result == expected_name
@patch("app.utils.audio.ffmpeg.probe")
def test_get_audio_duration_success(self, mock_probe, scanner_service) -> None:
"""Test successful audio duration extraction."""
mock_probe.return_value = {"format": {"duration": "123.456"}}
temp_path = Path("/fake/path/test.mp3")
from app.utils.audio import get_audio_duration
duration = get_audio_duration(temp_path)
assert duration == 123456 # 123.456 seconds * 1000 = 123456 ms
mock_probe.assert_called_once_with(str(temp_path))
@patch("app.utils.audio.ffmpeg.probe")
def test_get_audio_duration_failure(self, mock_probe, scanner_service) -> None:
"""Test audio duration extraction failure."""
mock_probe.side_effect = Exception("FFmpeg error")
temp_path = Path("/fake/path/test.mp3")
from app.utils.audio import get_audio_duration
duration = get_audio_duration(temp_path)
assert duration == 0
mock_probe.assert_called_once_with(str(temp_path))
@pytest.mark.asyncio
async def test_scan_directory_nonexistent(self, scanner_service) -> None:
"""Test scanning a non-existent directory."""
with pytest.raises(ValueError, match="Directory does not exist"):
await scanner_service.scan_directory("/non/existent/path")
@pytest.mark.asyncio
async def test_scan_directory_not_directory(self, scanner_service) -> None:
"""Test scanning a path that is not a directory."""
# Create a temporary file
with tempfile.NamedTemporaryFile() as f:
with pytest.raises(ValueError, match="Path is not a directory"):
await scanner_service.scan_directory(f.name)
@pytest.mark.asyncio
async def test_sync_audio_file_unchanged(self, scanner_service) -> None:
"""Test syncing file that is unchanged."""
# Existing sound with same hash as file
existing_sound = Sound(
id=1,
type="SDB",
name="Test Sound",
filename="test.mp3",
duration=120000, # 120 seconds = 120000 ms
size=1024,
hash="same_hash",
)
# Mock file operations to return same hash
with (
patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
temp_path = Path(f.name)
try:
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
# Set the existing sound filename to match temp file for "unchanged" test
existing_sound.filename = temp_path.name
await scanner_service._sync_audio_file(
temp_path,
"SDB",
existing_sound, # existing_sound_by_hash (same hash)
None, # existing_sound_by_filename (no conflict)
"same_hash",
results,
)
assert results["skipped"] == 1
assert results["added"] == 0
assert results["updated"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "skipped"
assert results["files"][0]["reason"] == "file unchanged"
finally:
temp_path.unlink()
@pytest.mark.asyncio
async def test_sync_audio_file_renamed(self, scanner_service) -> None:
"""Test syncing file that was renamed (same hash, different filename)."""
# Existing sound with same hash but different filename
existing_sound = Sound(
id=1,
type="SDB",
name="Old Name",
filename="old_name.mp3",
duration=120000,
size=1024,
hash="same_hash",
)
scanner_service.sound_repo.update = AsyncMock(return_value=existing_sound)
# Mock file operations to return same hash
with (
patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
# Create a temporary file with different name
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
temp_path = Path(f.name)
try:
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
await scanner_service._sync_audio_file(
temp_path,
"SDB",
existing_sound, # existing_sound_by_hash (same hash)
None, # existing_sound_by_filename (different filename)
"same_hash",
results,
)
# Should be marked as updated (renamed)
assert results["updated"] == 1
assert results["added"] == 0
assert results["skipped"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "updated"
assert results["files"][0]["reason"] == "file was renamed"
assert results["files"][0]["changes"] == ["filename", "name"]
# Verify update was called with new filename
scanner_service.sound_repo.update.assert_called_once()
call_args = scanner_service.sound_repo.update.call_args[0][1] # update_data
assert call_args["filename"] == temp_path.name
finally:
temp_path.unlink()
@pytest.mark.asyncio
async def test_scan_directory_rename_no_delete(self, scanner_service, mock_session) -> None:
"""Test that renamed files are not deleted (regression test)."""
# Create a mock existing sound that will be "renamed"
existing_sound = Sound(
id=1,
type="SDB",
name="Old Name",
filename="old_name.mp3",
duration=120000,
size=1024,
hash="same_hash",
)
# Mock the repository to return the existing sound
scanner_service.sound_repo.get_by_type = AsyncMock(return_value=[existing_sound])
scanner_service.sound_repo.update = AsyncMock()
scanner_service.sound_repo.delete = AsyncMock()
# Create temporary directory with renamed file
import tempfile
import os
with tempfile.TemporaryDirectory() as temp_dir:
# Create the "renamed" file (same hash, different name)
new_file_path = os.path.join(temp_dir, "new_name.mp3")
with open(new_file_path, "wb") as f:
f.write(b"test audio content") # This will produce consistent hash
# Mock file operations to return same hash
with (
patch("app.services.sound_scanner.get_file_hash", return_value="same_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
results = await scanner_service.scan_directory(temp_dir, "SDB")
# Should have detected one renamed file
assert results["updated"] == 1
assert results["deleted"] == 0 # This is the key assertion - no deletion!
assert results["added"] == 0
assert len(results["files"]) == 1
# Verify it was marked as renamed
file_result = results["files"][0]
assert file_result["status"] == "updated"
assert file_result["reason"] == "file was renamed"
# Verify update was called but delete was NOT called
scanner_service.sound_repo.update.assert_called_once()
scanner_service.sound_repo.delete.assert_not_called()
@pytest.mark.asyncio
async def test_sync_audio_file_new(self, scanner_service) -> None:
"""Test syncing a new audio file."""
created_sound = Sound(
id=1,
type="SDB",
name="Test Sound",
filename="test.mp3",
duration=120000, # 120 seconds = 120000 ms
size=1024,
hash="test_hash",
)
scanner_service.sound_repo.create = AsyncMock(return_value=created_sound)
# Mock file operations
with (
patch("app.services.sound_scanner.get_file_hash", return_value="test_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
temp_path = Path(f.name)
try:
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
await scanner_service._sync_audio_file(
temp_path,
"SDB",
None, # existing_sound_by_hash
None, # existing_sound_by_filename
"test_hash",
results,
)
assert results["added"] == 1
assert results["skipped"] == 0
assert results["updated"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "added"
# Verify sound_repo.create was called with correct data
call_args = scanner_service.sound_repo.create.call_args[0][0]
assert call_args["type"] == "SDB"
assert call_args["filename"] == temp_path.name
assert call_args["duration"] == 120000 # Duration in ms
assert call_args["size"] == 1024
assert call_args["hash"] == "test_hash"
assert (
call_args["is_deletable"] is False
) # SDB sounds are not deletable
finally:
temp_path.unlink()
@pytest.mark.asyncio
async def test_sync_audio_file_updated(self, scanner_service) -> None:
"""Test syncing a file that was modified (different hash)."""
# Existing sound with different hash than file
existing_sound = Sound(
id=1,
type="SDB",
name="Old Sound",
filename="test.mp3",
duration=60000, # Old duration
size=512, # Old size
hash="old_hash", # Old hash
)
scanner_service.sound_repo.update = AsyncMock(return_value=existing_sound)
# Mock file operations to return new values
with (
patch("app.services.sound_scanner.get_file_hash", return_value="new_hash"),
patch("app.services.sound_scanner.get_audio_duration", return_value=120000),
patch("app.services.sound_scanner.get_file_size", return_value=1024),
):
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
temp_path = Path(f.name)
try:
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
await scanner_service._sync_audio_file(
temp_path,
"SDB",
None, # existing_sound_by_hash (different hash)
existing_sound, # existing_sound_by_filename
"new_hash",
results,
)
assert results["updated"] == 1
assert results["added"] == 0
assert results["skipped"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "updated"
assert results["files"][0]["reason"] == "file was modified"
# Verify sound_repo.update was called with correct data
call_args = scanner_service.sound_repo.update.call_args[0][
1
] # update_data
assert call_args["duration"] == 120000
assert call_args["size"] == 1024
assert call_args["hash"] == "new_hash"
# Name is extracted from temp filename, should be capitalized
assert call_args["name"].endswith("mp3") is False # Should be cleaned
finally:
temp_path.unlink()
@pytest.mark.asyncio
async def test_sync_audio_file_custom_type(self, scanner_service) -> None:
"""Test syncing file with custom type."""
created_sound = Sound(
id=1,
type="CUSTOM",
name="Test Sound",
filename="test.mp3",
duration=60000, # 60 seconds = 60000 ms
size=2048,
hash="custom_hash",
)
scanner_service.sound_repo.create = AsyncMock(return_value=created_sound)
# Mock file operations
with (
patch(
"app.services.sound_scanner.get_file_hash",
return_value="custom_hash",
),
patch("app.services.sound_scanner.get_audio_duration", return_value=60000),
patch("app.services.sound_scanner.get_file_size", return_value=2048),
):
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
temp_path = Path(f.name)
try:
results = {
"scanned": 0,
"added": 0,
"updated": 0,
"deleted": 0,
"skipped": 0,
"errors": 0,
"files": [],
}
await scanner_service._sync_audio_file(
temp_path,
"CUSTOM",
None, # existing_sound_by_hash
None, # existing_sound_by_filename
"custom_hash",
results,
)
assert results["added"] == 1
assert results["skipped"] == 0
assert len(results["files"]) == 1
assert results["files"][0]["status"] == "added"
# Verify sound_repo.create was called with correct data for custom type
call_args = scanner_service.sound_repo.create.call_args[0][0]
assert call_args["type"] == "CUSTOM"
assert call_args["filename"] == temp_path.name
assert call_args["duration"] == 60000 # Duration in ms
assert call_args["size"] == 2048
assert call_args["hash"] == "custom_hash"
assert (
call_args["is_deletable"] is False
) # All sounds are set to not deletable
finally:
temp_path.unlink()