81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
"""Google Text-to-Speech provider."""
|
|
|
|
import asyncio
|
|
import io
|
|
from typing import Any
|
|
|
|
from gtts import gTTS
|
|
|
|
from ..base import TTSProvider
|
|
|
|
|
|
class GTTSProvider(TTSProvider):
|
|
"""Google Text-to-Speech provider implementation."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
"""Return the provider name."""
|
|
return "gtts"
|
|
|
|
@property
|
|
def file_extension(self) -> str:
|
|
"""Return the default file extension for this provider."""
|
|
return "mp3"
|
|
|
|
async def generate_speech(self, text: str, **options: Any) -> bytes:
|
|
"""Generate speech from text using Google TTS.
|
|
|
|
Args:
|
|
text: The text to convert to speech
|
|
**options: GTTS-specific options (lang, tld, slow)
|
|
|
|
Returns:
|
|
MP3 audio data as bytes
|
|
"""
|
|
lang = options.get("lang", "en")
|
|
tld = options.get("tld", "com")
|
|
slow = options.get("slow", False)
|
|
|
|
# Run TTS generation in thread pool since gTTS is synchronous
|
|
def _generate():
|
|
tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
|
|
fp = io.BytesIO()
|
|
tts.write_to_fp(fp)
|
|
fp.seek(0)
|
|
return fp.read()
|
|
|
|
# Use asyncio.to_thread which is more reliable than run_in_executor
|
|
return await asyncio.to_thread(_generate)
|
|
|
|
def get_supported_languages(self) -> list[str]:
|
|
"""Return list of supported language codes."""
|
|
# Common GTTS supported languages
|
|
return [
|
|
"af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en",
|
|
"eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is",
|
|
"it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my",
|
|
"ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su",
|
|
"sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
|
|
]
|
|
|
|
def get_option_schema(self) -> dict[str, Any]:
|
|
"""Return schema for GTTS-specific options."""
|
|
return {
|
|
"lang": {
|
|
"type": "string",
|
|
"default": "en",
|
|
"description": "Language code",
|
|
"enum": self.get_supported_languages()
|
|
},
|
|
"tld": {
|
|
"type": "string",
|
|
"default": "com",
|
|
"description": "Top-level domain for Google TTS",
|
|
"enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"]
|
|
},
|
|
"slow": {
|
|
"type": "boolean",
|
|
"default": False,
|
|
"description": "Speak slowly"
|
|
}
|
|
} |