feat: Implement Text-to-Speech (TTS) functionality with API endpoints, models, and service integration

2025-09-20 23:10:47 +02:00
parent fb0e5e919c
commit 5e8d619736
11 changed files with 887 additions and 0 deletions
--- a/alembic/versions/e617c155eea9_add_tts_table.py
+++ b/alembic/versions/e617c155eea9_add_tts_table.py
@@ -0,0 +1,45 @@
 """Add TTS table
 Revision ID: e617c155eea9
 Revises: a0d322857b2c
 Create Date: 2025-09-20 21:51:26.557738
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 import sqlmodel
 # revision identifiers, used by Alembic.
 revision: str = 'e617c155eea9'
 down_revision: Union[str, Sequence[str], None] = 'a0d322857b2c'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    """Upgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table('tts',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('text', sqlmodel.sql.sqltypes.AutoString(length=1000), nullable=False),
    sa.Column('provider', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False),
    sa.Column('options', sa.JSON(), nullable=True),
    sa.Column('sound_id', sa.Integer(), nullable=True),
    sa.Column('user_id', sa.Integer(), nullable=False),
    sa.Column('created_at', sa.DateTime(), nullable=False),
    sa.Column('updated_at', sa.DateTime(), nullable=False),
    sa.ForeignKeyConstraint(['sound_id'], ['sound.id'], ),
    sa.ForeignKeyConstraint(['user_id'], ['user.id'], ),
    sa.PrimaryKeyConstraint('id')
    )
    # ### end Alembic commands ###
 def downgrade() -> None:
    """Downgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_table('tts')
    # ### end Alembic commands ###
--- a/app/api/v1/init.py
+++ b/app/api/v1/init.py
@@ -15,6 +15,7 @@ from app.api.v1 import (
    scheduler,
    socket,
    sounds,
    tts,
 )
 # V1 API router with v1 prefix
@@ -32,4 +33,5 @@ api_router.include_router(playlists.router, tags=["playlists"])
 api_router.include_router(scheduler.router, tags=["scheduler"])
 api_router.include_router(socket.router, tags=["socket"])
 api_router.include_router(sounds.router, tags=["sounds"])
 api_router.include_router(tts.router, tags=["tts"])
 api_router.include_router(admin.router)
--- a/app/api/v1/tts.py
+++ b/app/api/v1/tts.py
@@ -0,0 +1,216 @@
 """TTS API endpoints."""
 from typing import Annotated, Any
 from fastapi import APIRouter, Depends, HTTPException, status
 from pydantic import BaseModel, Field
 from sqlmodel.ext.asyncio.session import AsyncSession
 from app.core.database import get_db
 from app.core.dependencies import get_current_active_user_flexible
 from app.models.user import User
 from app.services.tts import TTSService
 router = APIRouter(prefix="/tts", tags=["tts"])
 class TTSGenerateRequest(BaseModel):
    """TTS generation request model."""
    text: str = Field(..., min_length=1, max_length=1000, description="Text to convert to speech")
    provider: str = Field(default="gtts", description="TTS provider to use")
    options: dict[str, Any] = Field(default_factory=dict, description="Provider-specific options")
 class TTSResponse(BaseModel):
    """TTS generation response model."""
    id: int
    text: str
    provider: str
    options: dict[str, Any]
    sound_id: int | None
    user_id: int
    created_at: str
 class ProviderInfo(BaseModel):
    """Provider information model."""
    name: str
    file_extension: str
    supported_languages: list[str]
    option_schema: dict[str, Any]
 async def get_tts_service(
    session: Annotated[AsyncSession, Depends(get_db)],
 ) -> TTSService:
    """Get the TTS service."""
    return TTSService(session)
@router.post("/generate")
 async def generate_tts(
    request: TTSGenerateRequest,
    current_user: Annotated[User, Depends(get_current_active_user_flexible)],
    tts_service: Annotated[TTSService, Depends(get_tts_service)],
 ) -> dict[str, Any]:
    """Generate TTS audio and create sound."""
    try:
        if current_user.id is None:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="User ID not available",
            )
        result = await tts_service.create_tts_request(
            text=request.text,
            user_id=current_user.id,
            provider=request.provider,
            **request.options
        )
        tts_record = result["tts"]
        return {
            "message": result["message"],
            "tts": TTSResponse(
                id=tts_record.id,
                text=tts_record.text,
                provider=tts_record.provider,
                options=tts_record.options,
                sound_id=tts_record.sound_id,
                user_id=tts_record.user_id,
                created_at=tts_record.created_at.isoformat(),
            )
        }
    except ValueError as e:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=str(e),
        ) from e
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to generate TTS: {e!s}",
        ) from e
@router.get("/providers")
 async def get_providers(
    tts_service: Annotated[TTSService, Depends(get_tts_service)],
 ) -> dict[str, ProviderInfo]:
    """Get all available TTS providers."""
    providers = tts_service.get_providers()
    result = {}
    for name, provider in providers.items():
        result[name] = ProviderInfo(
            name=provider.name,
            file_extension=provider.file_extension,
            supported_languages=provider.get_supported_languages(),
            option_schema=provider.get_option_schema(),
        )
    return result
@router.get("/providers/{provider_name}")
 async def get_provider(
    provider_name: str,
    tts_service: Annotated[TTSService, Depends(get_tts_service)],
 ) -> ProviderInfo:
    """Get information about a specific TTS provider."""
    provider = tts_service.get_provider(provider_name)
    if not provider:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f"Provider '{provider_name}' not found",
        )
    return ProviderInfo(
        name=provider.name,
        file_extension=provider.file_extension,
        supported_languages=provider.get_supported_languages(),
        option_schema=provider.get_option_schema(),
    )
@router.get("/history")
 async def get_tts_history(
    current_user: Annotated[User, Depends(get_current_active_user_flexible)],
    tts_service: Annotated[TTSService, Depends(get_tts_service)],
    limit: int = 50,
    offset: int = 0,
 ) -> list[TTSResponse]:
    """Get TTS generation history for the current user."""
    try:
        if current_user.id is None:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="User ID not available",
            )
        tts_records = await tts_service.get_user_tts_history(
            user_id=current_user.id,
            limit=limit,
            offset=offset,
        )
        return [
            TTSResponse(
                id=tts.id,
                text=tts.text,
                provider=tts.provider,
                options=tts.options,
                sound_id=tts.sound_id,
                user_id=tts.user_id,
                created_at=tts.created_at.isoformat(),
            )
            for tts in tts_records
        ]
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to get TTS history: {e!s}",
        ) from e
@router.delete("/{tts_id}")
 async def delete_tts(
    tts_id: int,
    current_user: Annotated[User, Depends(get_current_active_user_flexible)],
    tts_service: Annotated[TTSService, Depends(get_tts_service)],
 ) -> dict[str, str]:
    """Delete a TTS generation and its associated files."""
    try:
        if current_user.id is None:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="User ID not available",
            )
        await tts_service.delete_tts(tts_id=tts_id, user_id=current_user.id)
        return {"message": "TTS generation deleted successfully"}
    except ValueError as e:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=str(e),
        ) from e
    except PermissionError as e:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail=str(e),
        ) from e
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to delete TTS: {e!s}",
        ) from e
--- a/app/models/init.py
+++ b/app/models/init.py
@@ -12,6 +12,7 @@ from .playlist_sound import PlaylistSound
 from .scheduled_task import ScheduledTask
 from .sound import Sound
 from .sound_played import SoundPlayed
 from .tts import TTS
 from .user import User
 from .user_oauth import UserOauth
@@ -27,6 +28,7 @@ __all__ = [
    "ScheduledTask",
    "Sound",
    "SoundPlayed",
    "TTS",
    "User",
    "UserOauth",
 ]
--- a/app/models/tts.py
+++ b/app/models/tts.py
@@ -0,0 +1,26 @@
 """TTS model."""
 from datetime import datetime
 from typing import Any
 from sqlalchemy import JSON, Column
 from sqlmodel import Field, SQLModel
 class TTS(SQLModel, table=True):
    """Text-to-Speech generation record."""
    __tablename__ = "tts"
    id: int | None = Field(primary_key=True)
    text: str = Field(max_length=1000, description="Text that was converted to speech")
    provider: str = Field(max_length=50, description="TTS provider used")
    options: dict[str, Any] = Field(
        default_factory=dict,
        sa_column=Column(JSON),
        description="Provider-specific options used"
    )
    sound_id: int | None = Field(foreign_key="sound.id", description="Associated sound ID")
    user_id: int = Field(foreign_key="user.id", description="User who created the TTS")
    created_at: datetime = Field(default_factory=datetime.utcnow)
    updated_at: datetime = Field(default_factory=datetime.utcnow)
--- a/app/repositories/tts.py
+++ b/app/repositories/tts.py
@@ -0,0 +1,62 @@
 """TTS repository for database operations."""
 from typing import Any, Sequence
 from sqlmodel import select
 from app.models.tts import TTS
 from app.repositories.base import BaseRepository
 class TTSRepository(BaseRepository[TTS]):
    """Repository for TTS operations."""
    def __init__(self, session: Any) -> None:
        super().__init__(TTS, session)
    async def get_by_user_id(
        self,
        user_id: int,
        limit: int = 50,
        offset: int = 0,
    ) -> Sequence[TTS]:
        """Get TTS records by user ID with pagination.
        Args:
            user_id: User ID to filter by
            limit: Maximum number of records to return
            offset: Number of records to skip
        Returns:
            List of TTS records
        """
        stmt = (
            select(self.model)
            .where(self.model.user_id == user_id)
            .order_by(self.model.created_at.desc())
            .limit(limit)
            .offset(offset)
        )
        result = await self.session.exec(stmt)
        return result.all()
    async def get_by_user_and_id(
        self,
        user_id: int,
        tts_id: int,
    ) -> TTS | None:
        """Get a specific TTS record by user ID and TTS ID.
        Args:
            user_id: User ID to filter by
            tts_id: TTS ID to retrieve
        Returns:
            TTS record if found and belongs to user, None otherwise
        """
        stmt = select(self.model).where(
            self.model.id == tts_id,
            self.model.user_id == user_id,
        )
        result = await self.session.exec(stmt)
        return result.first()
--- a/app/services/tts/init.py
+++ b/app/services/tts/init.py
@@ -0,0 +1,6 @@
 """Text-to-Speech services package."""
 from .base import TTSProvider
 from .service import TTSService
 __all__ = ["TTSProvider", "TTSService"]
--- a/app/services/tts/base.py
+++ b/app/services/tts/base.py
@@ -0,0 +1,38 @@
 """Base TTS provider interface."""
 from abc import ABC, abstractmethod
 from typing import Any
 class TTSProvider(ABC):
    """Abstract base class for TTS providers."""
    @abstractmethod
    async def generate_speech(self, text: str, **options: Any) -> bytes:
        """Generate speech from text with provider-specific options.
        Args:
            text: The text to convert to speech
            **options: Provider-specific options
        Returns:
            Audio data as bytes
        """
    @abstractmethod
    def get_supported_languages(self) -> list[str]:
        """Return list of supported language codes."""
    @abstractmethod
    def get_option_schema(self) -> dict[str, Any]:
        """Return schema for provider-specific options."""
    @property
    @abstractmethod
    def name(self) -> str:
        """Return the provider name."""
    @property
    @abstractmethod
    def file_extension(self) -> str:
        """Return the default file extension for this provider."""
--- a/app/services/tts/providers/init.py
+++ b/app/services/tts/providers/init.py
@@ -0,0 +1,5 @@
 """TTS providers package."""
 from .gtts import GTTSProvider
 __all__ = ["GTTSProvider"]
--- a/app/services/tts/providers/gtts.py
+++ b/app/services/tts/providers/gtts.py
@@ -0,0 +1,81 @@
 """Google Text-to-Speech provider."""
 import asyncio
 import io
 from typing import Any
 from gtts import gTTS
 from ..base import TTSProvider
 class GTTSProvider(TTSProvider):
    """Google Text-to-Speech provider implementation."""
    @property
    def name(self) -> str:
        """Return the provider name."""
        return "gtts"
    @property
    def file_extension(self) -> str:
        """Return the default file extension for this provider."""
        return "mp3"
    async def generate_speech(self, text: str, **options: Any) -> bytes:
        """Generate speech from text using Google TTS.
        Args:
            text: The text to convert to speech
            **options: GTTS-specific options (lang, tld, slow)
        Returns:
            MP3 audio data as bytes
        """
        lang = options.get("lang", "en")
        tld = options.get("tld", "com")
        slow = options.get("slow", False)
        # Run TTS generation in thread pool since gTTS is synchronous
        def _generate():
            tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
            fp = io.BytesIO()
            tts.write_to_fp(fp)
            fp.seek(0)
            return fp.read()
        # Use asyncio.to_thread which is more reliable than run_in_executor
        return await asyncio.to_thread(_generate)
    def get_supported_languages(self) -> list[str]:
        """Return list of supported language codes."""
        # Common GTTS supported languages
        return [
            "af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en",
            "eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is",
            "it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my",
            "ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su",
            "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
        ]
    def get_option_schema(self) -> dict[str, Any]:
        """Return schema for GTTS-specific options."""
        return {
            "lang": {
                "type": "string",
                "default": "en",
                "description": "Language code",
                "enum": self.get_supported_languages()
            },
            "tld": {
                "type": "string",
                "default": "com",
                "description": "Top-level domain for Google TTS",
                "enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"]
            },
            "slow": {
                "type": "boolean",
                "default": False,
                "description": "Speak slowly"
            }
        }
--- a/app/services/tts/service.py
+++ b/app/services/tts/service.py
@@ -0,0 +1,404 @@
 """TTS service implementation."""
 import asyncio
 import io
 import uuid
 from pathlib import Path
 from typing import Any
 from gtts import gTTS
 from sqlmodel import select
 from sqlmodel.ext.asyncio.session import AsyncSession
 from app.models.sound import Sound
 from app.models.tts import TTS
 from app.repositories.sound import SoundRepository
 from app.repositories.tts import TTSRepository
 from app.services.sound_normalizer import SoundNormalizerService
 from app.utils.audio import get_audio_duration, get_file_hash, get_file_size
 from .base import TTSProvider
 from .providers import GTTSProvider
 # Constants
 MAX_TEXT_LENGTH = 1000
 MAX_NAME_LENGTH = 50
 class TTSService:
    """Text-to-Speech service with provider management."""
    def __init__(self, session: AsyncSession) -> None:
        """Initialize TTS service.
        Args:
            session: Database session
        """
        self.session = session
        self.sound_repo = SoundRepository(session)
        self.tts_repo = TTSRepository(session)
        self.providers: dict[str, TTSProvider] = {}
        # Register default providers
        self._register_default_providers()
    def _register_default_providers(self) -> None:
        """Register default TTS providers."""
        self.register_provider(GTTSProvider())
    def register_provider(self, provider: TTSProvider) -> None:
        """Register a TTS provider.
        Args:
            provider: TTS provider instance
        """
        self.providers[provider.name] = provider
    def get_providers(self) -> dict[str, TTSProvider]:
        """Get all registered providers."""
        return self.providers.copy()
    def get_provider(self, name: str) -> TTSProvider | None:
        """Get a specific provider by name."""
        return self.providers.get(name)
    async def create_tts_request(
        self,
        text: str,
        user_id: int,
        provider: str = "gtts",
        **options: Any,
    ) -> dict[str, Any]:
        """Create a TTS request that will be processed in the background.
        Args:
            text: Text to convert to speech
            user_id: ID of user creating the sound
            provider: TTS provider name
            **options: Provider-specific options
        Returns:
            Dictionary with TTS record information
        Raises:
            ValueError: If provider not found or text too long
            Exception: If request creation fails
        """
        provider_not_found_msg = f"Provider '{provider}' not found"
        if provider not in self.providers:
            raise ValueError(provider_not_found_msg)
        text_too_long_msg = f"Text too long (max {MAX_TEXT_LENGTH} characters)"
        if len(text) > MAX_TEXT_LENGTH:
            raise ValueError(text_too_long_msg)
        empty_text_msg = "Text cannot be empty"
        if not text.strip():
            raise ValueError(empty_text_msg)
        # Create TTS record with pending status
        tts = TTS(
            text=text,
            provider=provider,
            options=options,
            sound_id=None,  # Will be set when processing completes
            user_id=user_id,
        )
        self.session.add(tts)
        await self.session.commit()
        await self.session.refresh(tts)
        # Queue for background processing
        if tts.id is not None:
            await self._queue_tts_processing(tts.id)
        return {"tts": tts, "message": "TTS generation queued successfully"}
    async def _queue_tts_processing(self, tts_id: int) -> None:
        """Queue TTS for background processing."""
        # For now, process immediately in a different way
        # This could be moved to a proper background queue later
        task = asyncio.create_task(self._process_tts_in_background(tts_id))
        # Store reference to prevent garbage collection
        self._background_tasks = getattr(self, '_background_tasks', set())
        self._background_tasks.add(task)
        task.add_done_callback(self._background_tasks.discard)
    async def _process_tts_in_background(self, tts_id: int) -> None:
        """Process TTS generation in background."""
        from app.core.database import get_session_factory
        try:
            # Create a new session for background processing
            session_factory = get_session_factory()
            async with session_factory() as background_session:
                tts_service = TTSService(background_session)
                # Get the TTS record
                stmt = select(TTS).where(TTS.id == tts_id)
                result = await background_session.exec(stmt)
                tts = result.first()
                if not tts:
                    return
                # Use a synchronous approach for the actual generation
                sound = await tts_service._generate_tts_sync(
                    tts.text,
                    tts.provider,
                    tts.user_id,
                    tts.options,
                )
                # Update the TTS record with the sound ID
                if sound.id is not None:
                    tts.sound_id = sound.id
                    background_session.add(tts)
                    await background_session.commit()
        except Exception:
            # Log error but don't fail - avoiding print for production
            pass
    async def _generate_tts_sync(
        self,
        text: str,
        provider: str,
        user_id: int,
        options: dict[str, Any]
    ) -> Sound:
        """Generate TTS using a synchronous approach."""
        # Generate the audio using the provider (avoid async issues by doing it directly)
        tts_provider = self.providers[provider]
        # Create directories if they don't exist
        original_dir = Path("sounds/originals/text_to_speech")
        original_dir.mkdir(parents=True, exist_ok=True)
        # Create UUID filename
        sound_uuid = str(uuid.uuid4())
        original_filename = f"{sound_uuid}.{tts_provider.file_extension}"
        original_path = original_dir / original_filename
        # Generate audio synchronously
        try:
            # Generate TTS audio
            lang = options.get("lang", "en")
            tld = options.get("tld", "com")
            slow = options.get("slow", False)
            tts_instance = gTTS(text=text, lang=lang, tld=tld, slow=slow)
            fp = io.BytesIO()
            tts_instance.write_to_fp(fp)
            fp.seek(0)
            audio_bytes = fp.read()
            # Save the file
            original_path.write_bytes(audio_bytes)
        except Exception:
            raise
        # Create Sound record with proper metadata
        sound = await self._create_sound_record_complete(
            original_path,
            text,
            provider,
            user_id
        )
        # Normalize the sound
        await self._normalize_sound_safe(sound.id)
        return sound
    async def get_user_tts_history(
        self,
        user_id: int,
        limit: int = 50,
        offset: int = 0
    ) -> list[TTS]:
        """Get TTS history for a user.
        Args:
            user_id: User ID
            limit: Maximum number of records
            offset: Offset for pagination
        Returns:
            List of TTS records
        """
        result = await self.tts_repo.get_by_user_id(user_id, limit, offset)
        return list(result)
    async def _create_sound_record(
        self,
        audio_path: Path,
        text: str,
        provider: str,
        user_id: int,
        file_hash: str
    ) -> Sound:
        """Create a Sound record for the TTS audio."""
        # Get audio metadata
        duration = get_audio_duration(audio_path)
        size = get_file_size(audio_path)
        # Create sound data
        sound_data = {
            "type": "TTS",
            "name": text[:50] + ("..." if len(text) > 50 else ""),
            "filename": audio_path.name,
            "duration": duration,
            "size": size,
            "hash": file_hash,
            "user_id": user_id,
            "is_deletable": True,
            "is_music": False,  # TTS is speech, not music
            "is_normalized": False,
            "play_count": 0,
        }
        sound = await self.sound_repo.create(sound_data)
        return sound
    async def _create_sound_record_simple(
        self,
        audio_path: Path,
        text: str,
        provider: str,
        user_id: int
    ) -> Sound:
        """Create a Sound record for the TTS audio with minimal processing."""
        # Create sound data with basic info
        sound_data = {
            "type": "TTS",
            "name": text[:50] + ("..." if len(text) > 50 else ""),
            "filename": audio_path.name,
            "duration": 0,  # Skip duration calculation for now
            "size": 0,      # Skip size calculation for now
            "hash": str(uuid.uuid4()),  # Use UUID as temporary hash
            "user_id": user_id,
            "is_deletable": True,
            "is_music": False,  # TTS is speech, not music
            "is_normalized": False,
            "play_count": 0,
        }
        sound = await self.sound_repo.create(sound_data)
        return sound
    async def _create_sound_record_complete(
        self,
        audio_path: Path,
        text: str,
        provider: str,
        user_id: int
    ) -> Sound:
        """Create a Sound record for the TTS audio with complete metadata."""
        # Get audio metadata
        duration = get_audio_duration(audio_path)
        size = get_file_size(audio_path)
        file_hash = get_file_hash(audio_path)
        # Check if a sound with this hash already exists
        existing_sound = await self.sound_repo.get_by_hash(file_hash)
        if existing_sound:
            # Clean up the temporary file since we have a duplicate
            if audio_path.exists():
                audio_path.unlink()
            return existing_sound
        # Create sound data with complete metadata
        sound_data = {
            "type": "TTS",
            "name": text[:50] + ("..." if len(text) > 50 else ""),
            "filename": audio_path.name,
            "duration": duration,
            "size": size,
            "hash": file_hash,
            "user_id": user_id,
            "is_deletable": True,
            "is_music": False,  # TTS is speech, not music
            "is_normalized": False,
            "play_count": 0,
        }
        sound = await self.sound_repo.create(sound_data)
        return sound
    async def _normalize_sound_safe(self, sound_id: int) -> None:
        """Normalize the TTS sound with error handling."""
        try:
            # Get fresh sound object from database for normalization
            sound = await self.sound_repo.get_by_id(sound_id)
            if not sound:
                return
            normalizer_service = SoundNormalizerService(self.session)
            result = await normalizer_service.normalize_sound(sound)
            if result["status"] == "error":
                print(f"Warning: Failed to normalize TTS sound {sound_id}: {result.get('error')}")
        except Exception as e:
            print(f"Exception during TTS sound normalization {sound_id}: {e}")
            # Don't fail the TTS generation if normalization fails
    async def _normalize_sound(self, sound_id: int) -> None:
        """Normalize the TTS sound."""
        try:
            # Get fresh sound object from database for normalization
            sound = await self.sound_repo.get_by_id(sound_id)
            if not sound:
                return
            normalizer_service = SoundNormalizerService(self.session)
            result = await normalizer_service.normalize_sound(sound)
            if result["status"] == "error":
                # Log warning but don't fail the TTS generation
                pass
        except Exception:
            # Don't fail the TTS generation if normalization fails
            pass
    async def delete_tts(self, tts_id: int, user_id: int) -> None:
        """Delete a TTS generation and its associated sound and files."""
        # Get the TTS record
        tts = await self.tts_repo.get_by_id(tts_id)
        if not tts:
            raise ValueError(f"TTS with ID {tts_id} not found")
        # Check ownership
        if tts.user_id != user_id:
            raise PermissionError("You don't have permission to delete this TTS generation")
        # If there's an associated sound, delete it and its files
        if tts.sound_id:
            sound = await self.sound_repo.get_by_id(tts.sound_id)
            if sound:
                # Delete the sound files
                await self._delete_sound_files(sound)
                # Delete the sound record
                await self.sound_repo.delete(sound)
        # Delete the TTS record
        await self.tts_repo.delete(tts)
    async def _delete_sound_files(self, sound: Sound) -> None:
        """Delete all files associated with a sound."""
        from pathlib import Path
        # Delete original file
        original_path = Path("sounds/originals/text_to_speech") / sound.filename
        if original_path.exists():
            original_path.unlink()
        # Delete normalized file if it exists
        if sound.normalized_filename:
            normalized_path = Path("sounds/normalized/text_to_speech") / sound.normalized_filename
            if normalized_path.exists():
                normalized_path.unlink()