Files
sdb2-backend/app/services/tts/providers/gtts.py

82 lines
2.7 KiB
Python

"""Google Text-to-Speech provider."""
import asyncio
import io
from typing import Any
from gtts import gTTS
from ..base import TTSProvider
class GTTSProvider(TTSProvider):
"""Google Text-to-Speech provider implementation."""
@property
def name(self) -> str:
"""Return the provider name."""
return "gtts"
@property
def file_extension(self) -> str:
"""Return the default file extension for this provider."""
return "mp3"
async def generate_speech(self, text: str, **options: Any) -> bytes:
"""Generate speech from text using Google TTS.
Args:
text: The text to convert to speech
**options: GTTS-specific options (lang, tld, slow)
Returns:
MP3 audio data as bytes
"""
lang = options.get("lang", "en")
tld = options.get("tld", "com")
slow = options.get("slow", False)
# Run TTS generation in thread pool since gTTS is synchronous
def _generate():
tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
fp = io.BytesIO()
tts.write_to_fp(fp)
fp.seek(0)
return fp.read()
# Use asyncio.to_thread which is more reliable than run_in_executor
return await asyncio.to_thread(_generate)
def get_supported_languages(self) -> list[str]:
"""Return list of supported language codes."""
# Complete list of GTTS supported languages including regional variants
return [
"af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el",
"en", "en-au", "en-ca", "en-gb", "en-ie", "en-in", "en-ng", "en-nz",
"en-ph", "en-za", "en-tz", "en-uk", "en-us",
"eo", "es", "es-es", "es-mx", "es-us", "et", "eu", "fa", "fi",
"fr", "fr-ca", "fr-fr", "ga", "gu", "he", "hi", "hr", "hu", "hy",
"id", "is", "it", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la",
"lv", "mk", "ml", "mr", "ms", "mt", "my", "ne", "nl", "no", "pa",
"pl", "pt", "pt-br", "pt-pt", "ro", "ru", "si", "sk", "sl", "sq",
"sr", "su", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur",
"vi", "yo", "zh", "zh-cn", "zh-tw", "zu",
]
def get_option_schema(self) -> dict[str, Any]:
"""Return schema for GTTS-specific options."""
return {
"lang": {
"type": "string",
"default": "en",
"description": "Language code",
"enum": self.get_supported_languages(),
},
"slow": {
"type": "boolean",
"default": False,
"description": "Speak slowly",
},
}