feat: Implement Text-to-Speech (TTS) functionality with API endpoints, models, and service integration

This commit is contained in:
JSC
2025-09-20 23:10:47 +02:00
parent fb0e5e919c
commit 5e8d619736
11 changed files with 887 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
"""TTS providers package."""
from .gtts import GTTSProvider
__all__ = ["GTTSProvider"]

View File

@@ -0,0 +1,81 @@
"""Google Text-to-Speech provider."""
import asyncio
import io
from typing import Any
from gtts import gTTS
from ..base import TTSProvider
class GTTSProvider(TTSProvider):
"""Google Text-to-Speech provider implementation."""
@property
def name(self) -> str:
"""Return the provider name."""
return "gtts"
@property
def file_extension(self) -> str:
"""Return the default file extension for this provider."""
return "mp3"
async def generate_speech(self, text: str, **options: Any) -> bytes:
"""Generate speech from text using Google TTS.
Args:
text: The text to convert to speech
**options: GTTS-specific options (lang, tld, slow)
Returns:
MP3 audio data as bytes
"""
lang = options.get("lang", "en")
tld = options.get("tld", "com")
slow = options.get("slow", False)
# Run TTS generation in thread pool since gTTS is synchronous
def _generate():
tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
fp = io.BytesIO()
tts.write_to_fp(fp)
fp.seek(0)
return fp.read()
# Use asyncio.to_thread which is more reliable than run_in_executor
return await asyncio.to_thread(_generate)
def get_supported_languages(self) -> list[str]:
"""Return list of supported language codes."""
# Common GTTS supported languages
return [
"af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en",
"eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is",
"it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my",
"ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su",
"sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
]
def get_option_schema(self) -> dict[str, Any]:
"""Return schema for GTTS-specific options."""
return {
"lang": {
"type": "string",
"default": "en",
"description": "Language code",
"enum": self.get_supported_languages()
},
"tld": {
"type": "string",
"default": "com",
"description": "Top-level domain for Google TTS",
"enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"]
},
"slow": {
"type": "boolean",
"default": False,
"description": "Speak slowly"
}
}