feat: Implement background extraction processor with concurrency control

- Added `ExtractionProcessor` class to handle extraction queue processing in the background.
- Implemented methods for starting, stopping, and queuing extractions with concurrency limits.
- Integrated logging for monitoring the processor's status and actions.
- Created tests for the extraction processor to ensure functionality and error handling.

test: Add unit tests for extraction API endpoints

- Created tests for successful extraction creation, authentication checks, and processor status retrieval.
- Ensured proper responses for authenticated and unauthenticated requests.

test: Implement unit tests for extraction repository

- Added tests for creating, retrieving, and updating extractions in the repository.
- Mocked database interactions to validate repository behavior without actual database access.

test: Add comprehensive tests for extraction service

- Developed tests for extraction creation, service detection, and sound record creation.
- Included tests for handling duplicate extractions and invalid URLs.

test: Add unit tests for extraction background processor

- Created tests for the `ExtractionProcessor` class to validate its behavior under various conditions.
- Ensured proper handling of extraction queuing, processing, and completion callbacks.

fix: Update OAuth service tests to use AsyncMock

- Modified OAuth provider tests to use `AsyncMock` for mocking asynchronous HTTP requests.
This commit is contained in:
JSC
2025-07-29 01:06:29 +02:00
parent c993230f98
commit 9b5f83eef0
11 changed files with 1860 additions and 4 deletions

View File

@@ -0,0 +1,408 @@
"""Tests for extraction service."""
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, Mock, patch
import pytest
from sqlmodel.ext.asyncio.session import AsyncSession
from app.models.extraction import Extraction
from app.models.sound import Sound
from app.services.extraction import ExtractionService
class TestExtractionService:
"""Test extraction service."""
@pytest.fixture
def mock_session(self):
"""Create a mock session."""
return Mock(spec=AsyncSession)
@pytest.fixture
def extraction_service(self, mock_session):
"""Create an extraction service with mock session."""
with patch("app.services.extraction.Path.mkdir"):
return ExtractionService(mock_session)
def test_init(self, extraction_service):
"""Test service initialization."""
assert extraction_service.session is not None
assert extraction_service.extraction_repo is not None
assert extraction_service.sound_repo is not None
def test_sanitize_filename(self, extraction_service):
"""Test filename sanitization."""
test_cases = [
("Hello World", "Hello World"),
("Test<>Video", "Test__Video"),
("Bad/File\\Name", "Bad_File_Name"),
(" Spaces ", "Spaces"),
(
"Very long filename that exceeds the maximum length limit and should be truncated to 100 characters maximum",
"Very long filename that exceeds the maximum length limit and should be truncated to 100 characters m",
),
("", "untitled"),
]
for input_name, expected in test_cases:
result = extraction_service._sanitize_filename(input_name)
assert result == expected
@patch("app.services.extraction.yt_dlp.YoutubeDL")
def test_detect_service_info_youtube(self, mock_ydl_class, extraction_service):
"""Test service detection for YouTube."""
mock_ydl = Mock()
mock_ydl_class.return_value.__enter__.return_value = mock_ydl
mock_ydl.extract_info.return_value = {
"extractor": "youtube",
"id": "test123",
"title": "Test Video",
"duration": 240,
"uploader": "Test Channel",
}
result = extraction_service._detect_service_info(
"https://www.youtube.com/watch?v=test123"
)
assert result is not None
assert result["service"] == "youtube"
assert result["service_id"] == "test123"
assert result["title"] == "Test Video"
assert result["duration"] == 240
@patch("app.services.extraction.yt_dlp.YoutubeDL")
def test_detect_service_info_failure(self, mock_ydl_class, extraction_service):
"""Test service detection failure."""
mock_ydl = Mock()
mock_ydl_class.return_value.__enter__.return_value = mock_ydl
mock_ydl.extract_info.side_effect = Exception("Network error")
result = extraction_service._detect_service_info("https://invalid.url")
assert result is None
@pytest.mark.asyncio
async def test_create_extraction_success(self, extraction_service):
"""Test successful extraction creation."""
url = "https://www.youtube.com/watch?v=test123"
user_id = 1
# Mock service detection
service_info = {
"service": "youtube",
"service_id": "test123",
"title": "Test Video",
}
with patch.object(
extraction_service, "_detect_service_info", return_value=service_info
):
# Mock repository calls
extraction_service.extraction_repo.get_by_service_and_id = AsyncMock(
return_value=None
)
mock_extraction = Extraction(
id=1,
url=url,
user_id=user_id,
service="youtube",
service_id="test123",
title="Test Video",
status="pending",
)
extraction_service.extraction_repo.create = AsyncMock(
return_value=mock_extraction
)
result = await extraction_service.create_extraction(url, user_id)
assert result["id"] == 1
assert result["service"] == "youtube"
assert result["service_id"] == "test123"
assert result["title"] == "Test Video"
assert result["status"] == "pending"
@pytest.mark.asyncio
async def test_create_extraction_duplicate(self, extraction_service):
"""Test extraction creation with duplicate service/service_id."""
url = "https://www.youtube.com/watch?v=test123"
user_id = 1
# Mock service detection
service_info = {
"service": "youtube",
"service_id": "test123",
"title": "Test Video",
}
existing_extraction = Extraction(
id=1,
url=url,
user_id=2, # Different user
service="youtube",
service_id="test123",
status="completed",
)
with patch.object(
extraction_service, "_detect_service_info", return_value=service_info
):
extraction_service.extraction_repo.get_by_service_and_id = AsyncMock(
return_value=existing_extraction
)
with pytest.raises(ValueError, match="Extraction already exists"):
await extraction_service.create_extraction(url, user_id)
@pytest.mark.asyncio
async def test_create_extraction_invalid_url(self, extraction_service):
"""Test extraction creation with invalid URL."""
url = "https://invalid.url"
user_id = 1
with patch.object(
extraction_service, "_detect_service_info", return_value=None
):
with pytest.raises(
ValueError, match="Unable to detect service information"
):
await extraction_service.create_extraction(url, user_id)
def test_ensure_unique_filename(self, extraction_service):
"""Test unique filename generation."""
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Create original file
original_file = temp_path / "test.mp3"
original_file.touch()
# Test unique filename generation
result = extraction_service._ensure_unique_filename(original_file)
expected = temp_path / "test_1.mp3"
assert result == expected
# Create the first duplicate and test again
expected.touch()
result = extraction_service._ensure_unique_filename(original_file)
expected_2 = temp_path / "test_2.mp3"
assert result == expected_2
@pytest.mark.asyncio
async def test_create_sound_record(self, extraction_service):
"""Test sound record creation."""
# Create temporary audio file
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
audio_path = Path(f.name)
f.write(b"fake audio data")
try:
extraction = Extraction(
id=1,
service="youtube",
service_id="test123",
title="Test Video",
url="https://www.youtube.com/watch?v=test123",
user_id=1,
status="processing",
)
mock_sound = Sound(
id=1,
type="EXT",
name="Test Video",
filename=audio_path.name,
duration=240000,
size=1024,
hash="test_hash",
is_deletable=True,
is_music=True,
is_normalized=False,
play_count=0,
)
with (
patch(
"app.services.extraction.get_audio_duration", return_value=240000
),
patch("app.services.extraction.get_file_size", return_value=1024),
patch(
"app.services.extraction.get_file_hash", return_value="test_hash"
),
):
extraction_service.sound_repo.create = AsyncMock(
return_value=mock_sound
)
result = await extraction_service._create_sound_record(
audio_path,
extraction.title,
extraction.service,
extraction.service_id,
)
assert result.type == "EXT"
assert result.name == "Test Video"
assert result.is_deletable is True
assert result.is_music is True
assert result.is_normalized is False
finally:
audio_path.unlink()
@pytest.mark.asyncio
async def test_normalize_sound_success(self, extraction_service):
"""Test sound normalization."""
sound = Sound(
id=1,
type="EXT",
name="Test Sound",
filename="test.mp3",
duration=240000,
size=1024,
hash="test_hash",
is_normalized=False,
)
mock_normalizer = Mock()
mock_normalizer.normalize_sound = AsyncMock(
return_value={"status": "normalized"}
)
with patch(
"app.services.extraction.SoundNormalizerService",
return_value=mock_normalizer,
):
# Should not raise exception
await extraction_service._normalize_sound(sound)
mock_normalizer.normalize_sound.assert_called_once_with(sound)
@pytest.mark.asyncio
async def test_normalize_sound_failure(self, extraction_service):
"""Test sound normalization failure."""
sound = Sound(
id=1,
type="EXT",
name="Test Sound",
filename="test.mp3",
duration=240000,
size=1024,
hash="test_hash",
is_normalized=False,
)
mock_normalizer = Mock()
mock_normalizer.normalize_sound = AsyncMock(
return_value={"status": "error", "error": "Test error"}
)
with patch(
"app.services.extraction.SoundNormalizerService",
return_value=mock_normalizer,
):
# Should not raise exception even on failure
await extraction_service._normalize_sound(sound)
mock_normalizer.normalize_sound.assert_called_once_with(sound)
@pytest.mark.asyncio
async def test_get_extraction_by_id(self, extraction_service):
"""Test getting extraction by ID."""
extraction = Extraction(
id=1,
service="youtube",
service_id="test123",
url="https://www.youtube.com/watch?v=test123",
user_id=1,
title="Test Video",
status="completed",
sound_id=42,
)
extraction_service.extraction_repo.get_by_id = AsyncMock(
return_value=extraction
)
result = await extraction_service.get_extraction_by_id(1)
assert result is not None
assert result["id"] == 1
assert result["service"] == "youtube"
assert result["service_id"] == "test123"
assert result["title"] == "Test Video"
assert result["status"] == "completed"
assert result["sound_id"] == 42
@pytest.mark.asyncio
async def test_get_extraction_by_id_not_found(self, extraction_service):
"""Test getting extraction by ID when not found."""
extraction_service.extraction_repo.get_by_id = AsyncMock(return_value=None)
result = await extraction_service.get_extraction_by_id(999)
assert result is None
@pytest.mark.asyncio
async def test_get_user_extractions(self, extraction_service):
"""Test getting user extractions."""
extractions = [
Extraction(
id=1,
service="youtube",
service_id="test123",
url="https://www.youtube.com/watch?v=test123",
user_id=1,
title="Test Video 1",
status="completed",
sound_id=42,
),
Extraction(
id=2,
service="youtube",
service_id="test456",
url="https://www.youtube.com/watch?v=test456",
user_id=1,
title="Test Video 2",
status="pending",
),
]
extraction_service.extraction_repo.get_by_user = AsyncMock(
return_value=extractions
)
result = await extraction_service.get_user_extractions(1)
assert len(result) == 2
assert result[0]["id"] == 1
assert result[0]["title"] == "Test Video 1"
assert result[1]["id"] == 2
assert result[1]["title"] == "Test Video 2"
@pytest.mark.asyncio
async def test_get_pending_extractions(self, extraction_service):
"""Test getting pending extractions."""
pending_extractions = [
Extraction(
id=1,
service="youtube",
service_id="test123",
url="https://www.youtube.com/watch?v=test123",
user_id=1,
title="Pending Video",
status="pending",
),
]
extraction_service.extraction_repo.get_pending_extractions = AsyncMock(
return_value=pending_extractions
)
result = await extraction_service.get_pending_extractions()
assert len(result) == 1
assert result[0]["id"] == 1
assert result[0]["status"] == "pending"

View File

@@ -0,0 +1,298 @@
"""Tests for extraction background processor."""
import asyncio
from unittest.mock import AsyncMock, Mock, patch
import pytest
from app.services.extraction_processor import ExtractionProcessor
class TestExtractionProcessor:
"""Test extraction background processor."""
@pytest.fixture
def processor(self):
"""Create an extraction processor instance."""
# Use a custom processor instance to avoid affecting the global one
return ExtractionProcessor()
def test_init(self, processor):
"""Test processor initialization."""
assert processor.max_concurrent > 0
assert len(processor.running_extractions) == 0
assert processor.processing_lock is not None
assert processor.shutdown_event is not None
assert processor.processor_task is None
@pytest.mark.asyncio
async def test_start_and_stop(self, processor):
"""Test starting and stopping the processor."""
# Mock the _process_queue method to avoid actual processing
with patch.object(processor, "_process_queue", new_callable=AsyncMock) as mock_process:
# Start the processor
await processor.start()
assert processor.processor_task is not None
assert not processor.processor_task.done()
# Stop the processor
await processor.stop()
assert processor.processor_task.done()
@pytest.mark.asyncio
async def test_start_already_running(self, processor):
"""Test starting processor when already running."""
with patch.object(processor, "_process_queue", new_callable=AsyncMock):
# Start first time
await processor.start()
first_task = processor.processor_task
# Start second time (should not create new task)
await processor.start()
assert processor.processor_task is first_task
# Clean up
await processor.stop()
@pytest.mark.asyncio
async def test_queue_extraction(self, processor):
"""Test queuing an extraction."""
extraction_id = 123
await processor.queue_extraction(extraction_id)
# The extraction should not be in running_extractions yet
# (it gets added when actually started by the processor)
assert extraction_id not in processor.running_extractions
@pytest.mark.asyncio
async def test_queue_extraction_already_running(self, processor):
"""Test queuing an extraction that's already running."""
extraction_id = 123
processor.running_extractions.add(extraction_id)
await processor.queue_extraction(extraction_id)
# Should still be in running extractions
assert extraction_id in processor.running_extractions
def test_get_status(self, processor):
"""Test getting processor status."""
status = processor.get_status()
assert "running" in status
assert "max_concurrent" in status
assert "currently_processing" in status
assert "processing_ids" in status
assert "available_slots" in status
assert status["max_concurrent"] == processor.max_concurrent
assert status["currently_processing"] == 0
assert status["available_slots"] == processor.max_concurrent
def test_get_status_with_running_extractions(self, processor):
"""Test getting processor status with running extractions."""
processor.running_extractions.add(123)
processor.running_extractions.add(456)
status = processor.get_status()
assert status["currently_processing"] == 2
assert status["available_slots"] == processor.max_concurrent - 2
assert 123 in status["processing_ids"]
assert 456 in status["processing_ids"]
def test_on_extraction_completed(self, processor):
"""Test extraction completion callback."""
extraction_id = 123
processor.running_extractions.add(extraction_id)
# Create a mock completed task
mock_task = Mock()
mock_task.exception.return_value = None
processor._on_extraction_completed(extraction_id, mock_task)
# Should be removed from running extractions
assert extraction_id not in processor.running_extractions
def test_on_extraction_completed_with_exception(self, processor):
"""Test extraction completion callback with exception."""
extraction_id = 123
processor.running_extractions.add(extraction_id)
# Create a mock task with exception
mock_task = Mock()
mock_task.exception.return_value = Exception("Test error")
processor._on_extraction_completed(extraction_id, mock_task)
# Should still be removed from running extractions
assert extraction_id not in processor.running_extractions
@pytest.mark.asyncio
async def test_process_single_extraction_success(self, processor):
"""Test processing a single extraction successfully."""
extraction_id = 123
# Mock the extraction service
mock_service = Mock()
mock_service.process_extraction = AsyncMock(
return_value={"status": "completed", "id": extraction_id}
)
with (
patch(
"app.services.extraction_processor.AsyncSession"
) as mock_session_class,
patch(
"app.services.extraction_processor.ExtractionService",
return_value=mock_service,
),
):
mock_session = AsyncMock()
mock_session_class.return_value.__aenter__.return_value = mock_session
await processor._process_single_extraction(extraction_id)
mock_service.process_extraction.assert_called_once_with(extraction_id)
@pytest.mark.asyncio
async def test_process_single_extraction_failure(self, processor):
"""Test processing a single extraction with failure."""
extraction_id = 123
# Mock the extraction service to raise an exception
mock_service = Mock()
mock_service.process_extraction = AsyncMock(side_effect=Exception("Test error"))
with (
patch(
"app.services.extraction_processor.AsyncSession"
) as mock_session_class,
patch(
"app.services.extraction_processor.ExtractionService",
return_value=mock_service,
),
):
mock_session = AsyncMock()
mock_session_class.return_value.__aenter__.return_value = mock_session
# Should not raise exception (errors are logged)
await processor._process_single_extraction(extraction_id)
mock_service.process_extraction.assert_called_once_with(extraction_id)
@pytest.mark.asyncio
async def test_process_pending_extractions_no_slots(self, processor):
"""Test processing when no slots are available."""
# Fill all slots
for i in range(processor.max_concurrent):
processor.running_extractions.add(i)
# Mock extraction service
mock_service = Mock()
mock_service.get_pending_extractions = AsyncMock(
return_value=[{"id": 100, "status": "pending"}]
)
with (
patch(
"app.services.extraction_processor.AsyncSession"
) as mock_session_class,
patch(
"app.services.extraction_processor.ExtractionService",
return_value=mock_service,
),
):
mock_session = AsyncMock()
mock_session_class.return_value.__aenter__.return_value = mock_session
await processor._process_pending_extractions()
# Should not have started any new extractions
assert 100 not in processor.running_extractions
@pytest.mark.asyncio
async def test_process_pending_extractions_with_slots(self, processor):
"""Test processing when slots are available."""
# Mock extraction service
mock_service = Mock()
mock_service.get_pending_extractions = AsyncMock(
return_value=[
{"id": 100, "status": "pending"},
{"id": 101, "status": "pending"},
]
)
with (
patch(
"app.services.extraction_processor.AsyncSession"
) as mock_session_class,
patch.object(processor, "_process_single_extraction", new_callable=AsyncMock) as mock_process,
patch(
"app.services.extraction_processor.ExtractionService",
return_value=mock_service,
),
patch("asyncio.create_task") as mock_create_task,
):
mock_session = AsyncMock()
mock_session_class.return_value.__aenter__.return_value = mock_session
# Mock task creation
mock_task = Mock()
mock_create_task.return_value = mock_task
await processor._process_pending_extractions()
# Should have added extractions to running set
assert 100 in processor.running_extractions
assert 101 in processor.running_extractions
# Should have created tasks for both
assert mock_create_task.call_count == 2
@pytest.mark.asyncio
async def test_process_pending_extractions_respect_limit(self, processor):
"""Test that processing respects concurrency limit."""
# Set max concurrent to 1 for this test
processor.max_concurrent = 1
# Mock extraction service with multiple pending extractions
mock_service = Mock()
mock_service.get_pending_extractions = AsyncMock(
return_value=[
{"id": 100, "status": "pending"},
{"id": 101, "status": "pending"},
{"id": 102, "status": "pending"},
]
)
with (
patch(
"app.services.extraction_processor.AsyncSession"
) as mock_session_class,
patch.object(processor, "_process_single_extraction", new_callable=AsyncMock) as mock_process,
patch(
"app.services.extraction_processor.ExtractionService",
return_value=mock_service,
),
patch("asyncio.create_task") as mock_create_task,
):
mock_session = AsyncMock()
mock_session_class.return_value.__aenter__.return_value = mock_session
# Mock task creation
mock_task = Mock()
mock_create_task.return_value = mock_task
await processor._process_pending_extractions()
# Should only have started one extraction (due to limit)
assert len(processor.running_extractions) == 1
assert mock_create_task.call_count == 1

View File

@@ -1,7 +1,7 @@
"""Tests for OAuth service."""
from typing import Any
from unittest.mock import Mock, patch
from unittest.mock import AsyncMock, Mock, patch
import pytest
@@ -117,7 +117,7 @@ class TestGoogleOAuthProvider:
"picture": "https://example.com/avatar.jpg",
}
with patch("httpx.AsyncClient.get") as mock_get:
with patch("httpx.AsyncClient.get", new_callable=AsyncMock) as mock_get:
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = mock_response_data
@@ -162,7 +162,7 @@ class TestGitHubOAuthProvider:
{"email": "secondary@example.com", "primary": False, "verified": True},
]
with patch("httpx.AsyncClient.get") as mock_get:
with patch("httpx.AsyncClient.get", new_callable=AsyncMock) as mock_get:
# Mock user profile response
mock_user_response = Mock()
mock_user_response.status_code = 200
@@ -174,7 +174,7 @@ class TestGitHubOAuthProvider:
mock_emails_response.json.return_value = mock_emails_data
# Return different responses based on URL
def side_effect(url, **kwargs):
async def side_effect(url, **kwargs):
if "user/emails" in str(url):
return mock_emails_response
return mock_user_response