feat: Implement hash-first identification strategy in audio file syncing and enhance tests for renamed files
This commit is contained in:
@@ -95,7 +95,25 @@ class SoundScannerService:
|
||||
|
||||
# Get all existing sounds of this type from database
|
||||
existing_sounds = await self.sound_repo.get_by_type(sound_type)
|
||||
sounds_by_filename = {sound.filename: sound for sound in existing_sounds}
|
||||
|
||||
# Create lookup dictionaries with immediate attribute access
|
||||
# to avoid session detachment
|
||||
sounds_by_hash = {}
|
||||
sounds_by_filename = {}
|
||||
|
||||
for sound in existing_sounds:
|
||||
# Capture all attributes immediately while session is valid
|
||||
sound_data = {
|
||||
"id": sound.id,
|
||||
"hash": sound.hash,
|
||||
"filename": sound.filename,
|
||||
"name": sound.name,
|
||||
"duration": sound.duration,
|
||||
"size": sound.size,
|
||||
"sound_object": sound, # Keep reference for database operations
|
||||
}
|
||||
sounds_by_hash[sound.hash] = sound_data
|
||||
sounds_by_filename[sound.filename] = sound_data
|
||||
|
||||
# Get all audio files from directory
|
||||
audio_files = [
|
||||
@@ -112,12 +130,27 @@ class SoundScannerService:
|
||||
processed_filenames.add(filename)
|
||||
|
||||
try:
|
||||
# Calculate hash first to enable hash-based lookup
|
||||
file_hash = get_file_hash(file_path)
|
||||
existing_sound_by_hash = sounds_by_hash.get(file_hash)
|
||||
existing_sound_by_filename = sounds_by_filename.get(filename)
|
||||
|
||||
await self._sync_audio_file(
|
||||
file_path,
|
||||
sound_type,
|
||||
sounds_by_filename.get(filename),
|
||||
existing_sound_by_hash,
|
||||
existing_sound_by_filename,
|
||||
file_hash,
|
||||
results,
|
||||
)
|
||||
|
||||
# Check if this was a rename operation and mark old filename as processed
|
||||
if results["files"] and results["files"][-1].get("old_filename"):
|
||||
old_filename = results["files"][-1]["old_filename"]
|
||||
processed_filenames.add(old_filename)
|
||||
logger.debug("Marked old filename as processed: %s", old_filename)
|
||||
# Remove temporary tracking field from results
|
||||
del results["files"][-1]["old_filename"]
|
||||
except Exception as e:
|
||||
logger.exception("Error processing file %s", file_path)
|
||||
results["errors"] += 1
|
||||
@@ -136,10 +169,17 @@ class SoundScannerService:
|
||||
)
|
||||
|
||||
# Delete sounds that no longer exist in directory
|
||||
for filename, sound in sounds_by_filename.items():
|
||||
for filename, sound_data in sounds_by_filename.items():
|
||||
if filename not in processed_filenames:
|
||||
# Attributes already captured in sound_data dictionary
|
||||
sound_name = sound_data["name"]
|
||||
sound_duration = sound_data["duration"]
|
||||
sound_size = sound_data["size"]
|
||||
sound_id = sound_data["id"]
|
||||
sound_object = sound_data["sound_object"]
|
||||
|
||||
try:
|
||||
await self.sound_repo.delete(sound)
|
||||
await self.sound_repo.delete(sound_object)
|
||||
logger.info("Deleted sound no longer in directory: %s", filename)
|
||||
results["deleted"] += 1
|
||||
results["files"].append(
|
||||
@@ -147,10 +187,10 @@ class SoundScannerService:
|
||||
"filename": filename,
|
||||
"status": "deleted",
|
||||
"reason": "file no longer exists",
|
||||
"name": sound.name,
|
||||
"duration": sound.duration,
|
||||
"size": sound.size,
|
||||
"id": sound.id,
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
@@ -163,10 +203,10 @@ class SoundScannerService:
|
||||
"filename": filename,
|
||||
"status": "error",
|
||||
"reason": "failed to delete",
|
||||
"name": sound.name,
|
||||
"duration": sound.duration,
|
||||
"size": sound.size,
|
||||
"id": sound.id,
|
||||
"name": sound_name,
|
||||
"duration": sound_duration,
|
||||
"size": sound_size,
|
||||
"id": sound_id,
|
||||
"error": str(e),
|
||||
"changes": None,
|
||||
},
|
||||
@@ -179,18 +219,136 @@ class SoundScannerService:
|
||||
self,
|
||||
file_path: Path,
|
||||
sound_type: str,
|
||||
existing_sound: Sound | None,
|
||||
existing_sound_by_hash: dict | Sound | None,
|
||||
existing_sound_by_filename: dict | Sound | None,
|
||||
file_hash: str,
|
||||
results: ScanResults,
|
||||
) -> None:
|
||||
"""Sync a single audio file (add new or update existing)."""
|
||||
"""Sync a single audio file using hash-first identification strategy."""
|
||||
filename = file_path.name
|
||||
file_hash = get_file_hash(file_path)
|
||||
duration = get_audio_duration(file_path)
|
||||
size = get_file_size(file_path)
|
||||
name = self.extract_name_from_filename(filename)
|
||||
|
||||
if existing_sound is None:
|
||||
# Add new sound
|
||||
# Extract attributes - handle both dict (normal) and Sound object (tests)
|
||||
existing_hash_filename = None
|
||||
existing_hash_name = None
|
||||
existing_hash_duration = None
|
||||
existing_hash_size = None
|
||||
existing_hash_id = None
|
||||
existing_hash_object = None
|
||||
|
||||
if existing_sound_by_hash is not None:
|
||||
if isinstance(existing_sound_by_hash, dict):
|
||||
existing_hash_filename = existing_sound_by_hash["filename"]
|
||||
existing_hash_name = existing_sound_by_hash["name"]
|
||||
existing_hash_duration = existing_sound_by_hash["duration"]
|
||||
existing_hash_size = existing_sound_by_hash["size"]
|
||||
existing_hash_id = existing_sound_by_hash["id"]
|
||||
existing_hash_object = existing_sound_by_hash["sound_object"]
|
||||
else: # Sound object (for tests)
|
||||
existing_hash_filename = existing_sound_by_hash.filename
|
||||
existing_hash_name = existing_sound_by_hash.name
|
||||
existing_hash_duration = existing_sound_by_hash.duration
|
||||
existing_hash_size = existing_sound_by_hash.size
|
||||
existing_hash_id = existing_sound_by_hash.id
|
||||
existing_hash_object = existing_sound_by_hash
|
||||
|
||||
existing_filename_id = None
|
||||
existing_filename_object = None
|
||||
if existing_sound_by_filename is not None:
|
||||
if isinstance(existing_sound_by_filename, dict):
|
||||
existing_filename_id = existing_sound_by_filename["id"]
|
||||
existing_filename_object = existing_sound_by_filename["sound_object"]
|
||||
else: # Sound object (for tests)
|
||||
existing_filename_id = existing_sound_by_filename.id
|
||||
existing_filename_object = existing_sound_by_filename
|
||||
|
||||
# Hash-first identification strategy
|
||||
if existing_sound_by_hash is not None:
|
||||
# Content exists in database (same hash)
|
||||
if existing_hash_filename == filename:
|
||||
# Same hash, same filename - file unchanged
|
||||
logger.debug("Sound unchanged: %s", filename)
|
||||
results["skipped"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "file unchanged",
|
||||
"name": existing_hash_name,
|
||||
"duration": existing_hash_duration,
|
||||
"size": existing_hash_size,
|
||||
"id": existing_hash_id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
else:
|
||||
# Same hash, different filename - file was renamed
|
||||
update_data = {
|
||||
"filename": filename,
|
||||
"name": name,
|
||||
}
|
||||
|
||||
await self.sound_repo.update(existing_hash_object, update_data)
|
||||
logger.info(
|
||||
"Detected rename: %s -> %s (ID: %s)",
|
||||
existing_hash_filename,
|
||||
filename,
|
||||
existing_hash_id,
|
||||
)
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "updated",
|
||||
"reason": "file was renamed",
|
||||
"name": name,
|
||||
"duration": existing_hash_duration,
|
||||
"size": existing_hash_size,
|
||||
"id": existing_hash_id,
|
||||
"error": None,
|
||||
"changes": ["filename", "name"],
|
||||
# Store old filename to prevent deletion
|
||||
"old_filename": existing_hash_filename,
|
||||
},
|
||||
)
|
||||
|
||||
elif existing_sound_by_filename is not None:
|
||||
# Same filename but different hash - file was modified
|
||||
update_data = {
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"hash": file_hash,
|
||||
}
|
||||
|
||||
await self.sound_repo.update(existing_filename_object, update_data)
|
||||
logger.info(
|
||||
"Updated modified sound: %s (ID: %s)",
|
||||
name,
|
||||
existing_filename_id,
|
||||
)
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "updated",
|
||||
"reason": "file was modified",
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"id": existing_filename_id,
|
||||
"error": None,
|
||||
"changes": ["hash", "duration", "size", "name"],
|
||||
},
|
||||
)
|
||||
|
||||
else:
|
||||
# New file - neither hash nor filename exists
|
||||
sound_data = {
|
||||
"type": sound_type,
|
||||
"name": name,
|
||||
@@ -222,51 +380,6 @@ class SoundScannerService:
|
||||
},
|
||||
)
|
||||
|
||||
elif existing_sound.hash != file_hash:
|
||||
# Update existing sound (file was modified)
|
||||
update_data = {
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"hash": file_hash,
|
||||
}
|
||||
|
||||
await self.sound_repo.update(existing_sound, update_data)
|
||||
logger.info("Updated modified sound: %s (ID: %s)", name, existing_sound.id)
|
||||
|
||||
results["updated"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "updated",
|
||||
"reason": "file was modified",
|
||||
"name": name,
|
||||
"duration": duration,
|
||||
"size": size,
|
||||
"id": existing_sound.id,
|
||||
"error": None,
|
||||
"changes": ["hash", "duration", "size", "name"],
|
||||
},
|
||||
)
|
||||
|
||||
else:
|
||||
# File unchanged, skip
|
||||
logger.debug("Sound unchanged: %s", filename)
|
||||
results["skipped"] += 1
|
||||
results["files"].append(
|
||||
{
|
||||
"filename": filename,
|
||||
"status": "skipped",
|
||||
"reason": "file unchanged",
|
||||
"name": existing_sound.name,
|
||||
"duration": existing_sound.duration,
|
||||
"size": existing_sound.size,
|
||||
"id": existing_sound.id,
|
||||
"error": None,
|
||||
"changes": None,
|
||||
},
|
||||
)
|
||||
|
||||
async def scan_soundboard_directory(self) -> ScanResults:
|
||||
"""Sync the default soundboard directory."""
|
||||
soundboard_path = "sounds/originals/soundboard"
|
||||
|
||||
Reference in New Issue
Block a user