malek-messaoudii
Refactor audio processing to utilize free models and enhance logging; update TTS and STT services for improved functionality
95cb26e
| import logging | |
| import io | |
| import wave | |
| import numpy as np | |
| logger = logging.getLogger(__name__) | |
| # Try to import gTTS, but provide fallback if not available | |
| try: | |
| from gtts import gTTS | |
| GTTS_AVAILABLE = True | |
| except ImportError: | |
| GTTS_AVAILABLE = False | |
| logger.warning("gTTS not available. Using fallback audio generation.") | |
| async def generate_tts(text: str) -> bytes: | |
| """ | |
| Convert text to speech using free gTTS (Google Text-to-Speech). | |
| Args: | |
| text: Text to convert to speech | |
| Returns: | |
| Audio bytes in MP3 format | |
| Raises: | |
| Exception: If TTS generation fails | |
| """ | |
| try: | |
| logger.info(f"Generating speech for: '{text}'") | |
| # Use gTTS if available | |
| if GTTS_AVAILABLE: | |
| tts = gTTS(text=text, lang='en', slow=False) | |
| audio_buffer = io.BytesIO() | |
| tts.write_to_fp(audio_buffer) | |
| audio_bytes = audio_buffer.getvalue() | |
| else: | |
| # Fallback to simple tone generation | |
| audio_bytes = generate_fallback_audio(text) | |
| logger.info(f"β TTS successful: {len(audio_bytes)} bytes generated") | |
| return audio_bytes | |
| except Exception as e: | |
| logger.error(f"β TTS failed: {str(e)}") | |
| # Ultimate fallback | |
| return generate_silent_audio() | |
| def generate_fallback_audio(text: str) -> bytes: | |
| """ | |
| Generate a simple tone-based audio file as fallback. | |
| """ | |
| try: | |
| # Create a simple sine wave | |
| sample_rate = 22050 | |
| duration = max(1.0, min(3.0, len(text) * 0.1)) | |
| t = np.linspace(0, duration, int(sample_rate * duration), False) | |
| # Generate tones that vary with the text length | |
| base_freq = 440 # A4 note | |
| # Add some variation based on text | |
| freq_variation = min(200, len(text) * 5) | |
| tone = 0.3 * np.sin(2 * np.pi * (base_freq + freq_variation) * t) | |
| # Convert to 16-bit PCM | |
| audio_data = (tone * 32767).astype(np.int16) | |
| # Create WAV file in memory | |
| buffer = io.BytesIO() | |
| with wave.open(buffer, 'wb') as wav_file: | |
| wav_file.setnchannels(1) # Mono | |
| wav_file.setsampwidth(2) # 2 bytes = 16-bit | |
| wav_file.setframerate(sample_rate) | |
| wav_file.writeframes(audio_data.tobytes()) | |
| return buffer.getvalue() | |
| except Exception as e: | |
| logger.error(f"Fallback audio generation failed: {str(e)}") | |
| return generate_silent_audio() | |
| def generate_silent_audio() -> bytes: | |
| """ | |
| Generate a short silent audio file as ultimate fallback. | |
| """ | |
| try: | |
| sample_rate = 22050 | |
| duration = 1.0 | |
| # Generate silence | |
| silent_data = np.zeros(int(sample_rate * duration), dtype=np.int16) | |
| # Create WAV file in memory | |
| buffer = io.BytesIO() | |
| with wave.open(buffer, 'wb') as wav_file: | |
| wav_file.setnchannels(1) # Mono | |
| wav_file.setsampwidth(2) # 2 bytes = 16-bit | |
| wav_file.setframerate(sample_rate) | |
| wav_file.writeframes(silent_data.tobytes()) | |
| return buffer.getvalue() | |
| except Exception as e: | |
| logger.error(f"Silent audio generation failed: {str(e)}") | |
| # Return empty bytes as last resort | |
| return b"" |