feat: Replace pydub with ffmpeg for audio duration and metadata extraction in sound services

2025-07-19 09:40:31 +02:00
parent 4cfc2ec0a2
commit b1f9667edd
4 changed files with 32 additions and 39 deletions
--- a/app/services/sound_normalizer_service.py
+++ b/app/services/sound_normalizer_service.py
@@ -7,7 +7,6 @@ import re
 from pathlib import Path

 import ffmpeg
-from pydub import AudioSegment

 from app.database import db
 from app.models.sound import Sound
@@ -632,9 +631,17 @@ class SoundNormalizerService:
            # Calculate file hash
            file_hash = SoundNormalizerService._calculate_file_hash(file_path)

-            # Get duration using pydub
-            audio = AudioSegment.from_wav(file_path)
-            duration = len(audio)  # Duration in milliseconds
+            # Get duration using ffmpeg
+            probe = ffmpeg.probe(file_path)
+            audio_stream = next(
+                (s for s in probe['streams'] if s['codec_type'] == 'audio'),
+                None
+            )
+            
+            if audio_stream and 'duration' in audio_stream:
+                duration = int(float(audio_stream['duration']) * 1000)  # Convert to milliseconds
+            else:
+                duration = 0

            return {
                "duration": duration,
--- a/app/services/sound_scanner_service.py
+++ b/app/services/sound_scanner_service.py
@@ -4,8 +4,7 @@ import hashlib
 import logging
 from pathlib import Path

-from pydub import AudioSegment
-from pydub.utils import mediainfo
+import ffmpeg

 from app.database import db
 from app.models.sound import Sound
@@ -281,32 +280,31 @@ class SoundScannerService:

    @staticmethod
    def _extract_audio_metadata(file_path: str) -> dict:
-        """Extract metadata from audio file using pydub and mediainfo."""
+        """Extract metadata from audio file using ffmpeg-python."""
        try:
            # Get file size
            file_size = Path(file_path).stat().st_size

-            # Load audio file with pydub for basic info
-            audio = AudioSegment.from_file(file_path)
+            # Use ffmpeg to probe audio metadata
+            probe = ffmpeg.probe(file_path)
+            audio_stream = next(
+                (s for s in probe['streams'] if s['codec_type'] == 'audio'),
+                None
+            )
+            
+            if not audio_stream:
+                raise ValueError("No audio stream found in file")

-            # Extract basic metadata from AudioSegment
-            duration = len(audio)
-            channels = audio.channels
-            sample_rate = audio.frame_rate
-
-            # Use mediainfo for more accurate bitrate information
-            bitrate = None
-            try:
-                info = mediainfo(file_path)
-                if info and "bit_rate" in info:
-                    bitrate = int(info["bit_rate"])
-                elif info and "bitrate" in info:
-                    bitrate = int(info["bitrate"])
-            except (ValueError, KeyError, TypeError):
-                # Fallback to calculated bitrate if mediainfo fails
-                if duration > 0:
-                    file_size_bits = file_size * 8
-                    bitrate = int(file_size_bits / duration / 1000)
+            # Extract metadata from ffmpeg probe
+            duration = int(float(audio_stream.get('duration', 0)) * 1000)  # Convert to milliseconds
+            channels = int(audio_stream.get('channels', 0))
+            sample_rate = int(audio_stream.get('sample_rate', 0))
+            bitrate = int(audio_stream.get('bit_rate', 0)) if audio_stream.get('bit_rate') else None
+            
+            # Fallback bitrate calculation if not available
+            if not bitrate and duration > 0:
+                file_size_bits = file_size * 8
+                bitrate = int(file_size_bits / (duration / 1000))

            return {
                "duration": duration,