feat: Implement Text-to-Speech (TTS) functionality with API endpoints, models, and service integration
This commit is contained in:
81
app/services/tts/providers/gtts.py
Normal file
81
app/services/tts/providers/gtts.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Google Text-to-Speech provider."""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
from typing import Any
|
||||
|
||||
from gtts import gTTS
|
||||
|
||||
from ..base import TTSProvider
|
||||
|
||||
|
||||
class GTTSProvider(TTSProvider):
|
||||
"""Google Text-to-Speech provider implementation."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""Return the provider name."""
|
||||
return "gtts"
|
||||
|
||||
@property
|
||||
def file_extension(self) -> str:
|
||||
"""Return the default file extension for this provider."""
|
||||
return "mp3"
|
||||
|
||||
async def generate_speech(self, text: str, **options: Any) -> bytes:
|
||||
"""Generate speech from text using Google TTS.
|
||||
|
||||
Args:
|
||||
text: The text to convert to speech
|
||||
**options: GTTS-specific options (lang, tld, slow)
|
||||
|
||||
Returns:
|
||||
MP3 audio data as bytes
|
||||
"""
|
||||
lang = options.get("lang", "en")
|
||||
tld = options.get("tld", "com")
|
||||
slow = options.get("slow", False)
|
||||
|
||||
# Run TTS generation in thread pool since gTTS is synchronous
|
||||
def _generate():
|
||||
tts = gTTS(text=text, lang=lang, tld=tld, slow=slow)
|
||||
fp = io.BytesIO()
|
||||
tts.write_to_fp(fp)
|
||||
fp.seek(0)
|
||||
return fp.read()
|
||||
|
||||
# Use asyncio.to_thread which is more reliable than run_in_executor
|
||||
return await asyncio.to_thread(_generate)
|
||||
|
||||
def get_supported_languages(self) -> list[str]:
|
||||
"""Return list of supported language codes."""
|
||||
# Common GTTS supported languages
|
||||
return [
|
||||
"af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en",
|
||||
"eo", "es", "et", "fi", "fr", "gu", "hi", "hr", "hu", "hy", "id", "is",
|
||||
"it", "ja", "jw", "km", "kn", "ko", "la", "lv", "mk", "ml", "mr", "my",
|
||||
"ne", "nl", "no", "pl", "pt", "ro", "ru", "si", "sk", "sq", "sr", "su",
|
||||
"sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
|
||||
]
|
||||
|
||||
def get_option_schema(self) -> dict[str, Any]:
|
||||
"""Return schema for GTTS-specific options."""
|
||||
return {
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"default": "en",
|
||||
"description": "Language code",
|
||||
"enum": self.get_supported_languages()
|
||||
},
|
||||
"tld": {
|
||||
"type": "string",
|
||||
"default": "com",
|
||||
"description": "Top-level domain for Google TTS",
|
||||
"enum": ["com", "co.uk", "com.au", "ca", "co.in", "ie", "co.za"]
|
||||
},
|
||||
"slow": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "Speak slowly"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user