"""Google Text-to-Speech provider.""" import asyncio import io from typing import Any from gtts import gTTS from ..base import TTSProvider class GTTSProvider(TTSProvider): """Google Text-to-Speech provider implementation.""" @property def name(self) -> str: """Return the provider name.""" return "gtts" @property def file_extension(self) -> str: """Return the default file extension for this provider.""" return "mp3" async def generate_speech(self, text: str, **options: Any) -> bytes: """Generate speech from text using Google TTS. Args: text: The text to convert to speech **options: GTTS-specific options (lang, tld, slow) Returns: MP3 audio data as bytes """ lang = options.get("lang", "en") tld = options.get("tld", "com") slow = options.get("slow", False) # Run TTS generation in thread pool since gTTS is synchronous def _generate(): tts = gTTS(text=text, lang=lang, tld=tld, slow=slow) fp = io.BytesIO() tts.write_to_fp(fp) fp.seek(0) return fp.read() # Use asyncio.to_thread which is more reliable than run_in_executor return await asyncio.to_thread(_generate) def get_supported_languages(self) -> list[str]: """Return list of supported language codes.""" # Common GTTS supported languages return [ "af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is", "it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my", "ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw" ] def get_option_schema(self) -> dict[str, Any]: """Return schema for GTTS-specific options.""" return { "lang": { "type": "string", "default": "en", "description": "Language code", "enum": self.get_supported_languages() }, "tld": { "type": "string", "default": "com", "description": "Top-level domain for Google TTS", "enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"] }, "slow": { "type": "boolean", "default": False, "description": "Speak slowly" } }