sdb2-backend/app/services/tts/providers/gtts.py

"""Google Text-to-Speech provider."""

import asyncio
import io
from typing import Any

from gtts import gTTS

from ..base import TTSProvider


class GTTSProvider(TTSProvider):
    """Google Text-to-Speech provider implementation."""

    @property
    def name(self) -> str:
        """Return the provider name."""
        return "gtts"

    @property
    def file_extension(self) -> str:
        """Return the default file extension for this provider."""
        return "mp3"

    async def generate_speech(self, text: str, **options: Any) -> bytes:
        """Generate speech from text using Google TTS.

        Args:
            text: The text to convert to speech
            **options: GTTS-specific options (lang, tld, slow)

        Returns:
            MP3 audio data as bytes
        """
        lang = options.get("lang", "en")
        tld = options.get("tld", "com")
        slow = options.get("slow", False)

        # Run TTS generation in thread pool since gTTS is synchronous
        def _generate():
            tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
            fp = io.BytesIO()
            tts.write_to_fp(fp)
            fp.seek(0)
            return fp.read()

        # Use asyncio.to_thread which is more reliable than run_in_executor
        return await asyncio.to_thread(_generate)

    def get_supported_languages(self) -> list[str]:
        """Return list of supported language codes."""
        # Common GTTS supported languages
        return [
            "af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en",
            "eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is",
            "it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my",
            "ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su",
            "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
        ]

    def get_option_schema(self) -> dict[str, Any]:
        """Return schema for GTTS-specific options."""
        return {
            "lang": {
                "type": "string",
                "default": "en",
                "description": "Language code",
                "enum": self.get_supported_languages()
            },
            "tld": {
                "type": "string",
                "default": "com",
                "description": "Top-level domain for Google TTS",
                "enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"]
            },
            "slow": {
                "type": "boolean",
                "default": False,
                "description": "Speak slowly"
            }
        }