malek-messaoudii
commited on
Commit
·
2da4544
1
Parent(s):
220b6c2
Add groq apis
Browse files- config.py +39 -44
- main.py +47 -167
- models/audio.py +0 -30
- models/stt.py +4 -0
- models/tts.py +6 -0
- routes/__init__.py +1 -1
- routes/audio.py +0 -86
- routes/stt_routes.py +20 -0
- routes/tts_routes.py +15 -0
- services/chatbot_service.py +0 -114
- services/gemini_client.py +0 -9
- services/stt_service.py +11 -92
- services/tts_service.py +14 -47
config.py
CHANGED
|
@@ -5,78 +5,73 @@ from pathlib import Path
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
import logging
|
| 7 |
|
| 8 |
-
# Configure logging
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
-
# Load
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
-
#
|
| 15 |
API_DIR = Path(__file__).parent
|
| 16 |
PROJECT_ROOT = API_DIR.parent
|
| 17 |
|
| 18 |
-
# ============ HUGGING FACE ============
|
| 19 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 20 |
HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
|
| 21 |
HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
|
| 22 |
|
| 23 |
-
# ============
|
| 24 |
-
|
| 25 |
-
API_DESCRIPTION = "Complete NLP system with stance detection, KPA, and voice chatbot using free models"
|
| 26 |
-
API_VERSION = "1.0.0"
|
| 27 |
|
| 28 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
HOST = os.getenv("HOST", "0.0.0.0")
|
| 30 |
-
PORT = int(os.getenv("PORT",
|
| 31 |
RELOAD = os.getenv("RELOAD", "false").lower() == "true"
|
| 32 |
|
| 33 |
-
# ============ CORS
|
| 34 |
CORS_ORIGINS = ["*"]
|
| 35 |
CORS_CREDENTIALS = True
|
| 36 |
CORS_METHODS = ["*"]
|
| 37 |
CORS_HEADERS = ["*"]
|
| 38 |
|
| 39 |
-
# ============ FREE VOICE MODELS ============
|
| 40 |
-
# Speech-to-Text
|
| 41 |
-
STT_MODEL_ID = "openai/whisper-base"
|
| 42 |
-
STT_DEVICE = "cpu" # Change to "cuda" if GPU available
|
| 43 |
-
|
| 44 |
-
# Text-to-Speech
|
| 45 |
-
TTS_ENGINE = "gtts" # Google Text-to-Speech (free)
|
| 46 |
-
TTS_LANGUAGE = "en"
|
| 47 |
-
|
| 48 |
-
# Chatbot
|
| 49 |
-
CHATBOT_MODEL_ID = "microsoft/DialoGPT-medium"
|
| 50 |
-
CHATBOT_DEVICE = "cpu" # Change to "cuda" if GPU available
|
| 51 |
-
|
| 52 |
# ============ AUDIO SETTINGS ============
|
| 53 |
-
ALLOWED_AUDIO_TYPES = {
|
| 54 |
-
"audio/wav",
|
| 55 |
-
"audio/x-wav",
|
| 56 |
-
"audio/mpeg",
|
| 57 |
-
"audio/mp3",
|
| 58 |
-
"audio/mp4",
|
| 59 |
-
"audio/m4a"
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
MAX_TEXT_LENGTH = 500
|
| 63 |
-
MIN_TEXT_LENGTH = 1
|
| 64 |
MAX_AUDIO_SIZE = 10 * 1024 * 1024 # 10MB
|
| 65 |
AUDIO_SAMPLE_RATE = 16000
|
| 66 |
AUDIO_DURATION_LIMIT = 120 # seconds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
# ============ MODEL
|
| 69 |
PRELOAD_MODELS_ON_STARTUP = True
|
| 70 |
LOAD_STANCE_MODEL = True
|
| 71 |
LOAD_KPA_MODEL = True
|
| 72 |
-
LOAD_STT_MODEL =
|
| 73 |
-
LOAD_CHATBOT_MODEL =
|
| 74 |
-
LOAD_TTS_MODEL = False
|
| 75 |
|
| 76 |
logger.info("="*60)
|
| 77 |
logger.info("✓ Configuration loaded successfully")
|
| 78 |
-
logger.info("
|
| 79 |
-
logger.info(f"
|
| 80 |
-
logger.info(f"
|
| 81 |
-
logger.info(f"
|
| 82 |
-
logger.info("
|
|
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
import logging
|
| 7 |
|
|
|
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
# Load .env variables
|
| 11 |
load_dotenv()
|
| 12 |
|
| 13 |
+
# ============ DIRECTORIES ============
|
| 14 |
API_DIR = Path(__file__).parent
|
| 15 |
PROJECT_ROOT = API_DIR.parent
|
| 16 |
|
| 17 |
+
# ============ HUGGING FACE MODELS ============
|
| 18 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 19 |
HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
|
| 20 |
HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
|
| 21 |
|
| 22 |
+
# ============ GROQ MODELS ============
|
| 23 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# **Speech-to-Text**
|
| 26 |
+
GROQ_STT_MODEL = "whisper-large-v3-turbo"
|
| 27 |
+
|
| 28 |
+
# **Text-to-Speech**
|
| 29 |
+
GROQ_TTS_MODEL = "playai-tts"
|
| 30 |
+
GROQ_TTS_VOICE = "Aaliyah-PlayAI"
|
| 31 |
+
GROQ_TTS_FORMAT = "wav"
|
| 32 |
+
|
| 33 |
+
# **Chat Model**
|
| 34 |
+
GROQ_CHAT_MODEL = "llama3-70b-8192"
|
| 35 |
+
|
| 36 |
+
# ============ API META ============
|
| 37 |
+
API_TITLE = "NLP Debater - Voice Chatbot"
|
| 38 |
+
API_DESCRIPTION = "NLP stance detection, KPA, and Groq STT/TTS chatbot"
|
| 39 |
+
API_VERSION = "2.0.0"
|
| 40 |
+
|
| 41 |
+
# ============ SERVER ============
|
| 42 |
HOST = os.getenv("HOST", "0.0.0.0")
|
| 43 |
+
PORT = int(os.getenv("PORT", 7860))
|
| 44 |
RELOAD = os.getenv("RELOAD", "false").lower() == "true"
|
| 45 |
|
| 46 |
+
# ============ CORS ============
|
| 47 |
CORS_ORIGINS = ["*"]
|
| 48 |
CORS_CREDENTIALS = True
|
| 49 |
CORS_METHODS = ["*"]
|
| 50 |
CORS_HEADERS = ["*"]
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# ============ AUDIO SETTINGS ============
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
MAX_AUDIO_SIZE = 10 * 1024 * 1024 # 10MB
|
| 54 |
AUDIO_SAMPLE_RATE = 16000
|
| 55 |
AUDIO_DURATION_LIMIT = 120 # seconds
|
| 56 |
+
ALLOWED_AUDIO_TYPES = {
|
| 57 |
+
"audio/wav", "audio/x-wav",
|
| 58 |
+
"audio/mpeg", "audio/mp3",
|
| 59 |
+
"audio/mp4", "audio/m4a"
|
| 60 |
+
}
|
| 61 |
|
| 62 |
+
# ============ MODEL PRELOADING ============
|
| 63 |
PRELOAD_MODELS_ON_STARTUP = True
|
| 64 |
LOAD_STANCE_MODEL = True
|
| 65 |
LOAD_KPA_MODEL = True
|
| 66 |
+
LOAD_STT_MODEL = False # Groq STT = no preload
|
| 67 |
+
LOAD_CHATBOT_MODEL = False # Groq Chat = no preload
|
| 68 |
+
LOAD_TTS_MODEL = False # Groq TTS = no preload
|
| 69 |
|
| 70 |
logger.info("="*60)
|
| 71 |
logger.info("✓ Configuration loaded successfully")
|
| 72 |
+
logger.info(f" HF Stance Model : {HUGGINGFACE_STANCE_MODEL_ID}")
|
| 73 |
+
logger.info(f" HF Label Model : {HUGGINGFACE_LABEL_MODEL_ID}")
|
| 74 |
+
logger.info(f" GROQ STT Model : {GROQ_STT_MODEL}")
|
| 75 |
+
logger.info(f" GROQ TTS Model : {GROQ_TTS_MODEL}")
|
| 76 |
+
logger.info(f" GROQ Chat Model : {GROQ_CHAT_MODEL}")
|
| 77 |
+
logger.info("="*60)
|
main.py
CHANGED
|
@@ -1,52 +1,17 @@
|
|
| 1 |
import sys
|
| 2 |
from pathlib import Path
|
| 3 |
-
import os
|
| 4 |
-
import subprocess
|
| 5 |
import logging
|
| 6 |
from contextlib import asynccontextmanager
|
| 7 |
|
| 8 |
-
# Configure logging first
|
| 9 |
logging.basicConfig(
|
| 10 |
level=logging.INFO,
|
| 11 |
-
format=
|
| 12 |
)
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
-
# Add
|
| 16 |
app_dir = Path(__file__).parent
|
| 17 |
-
|
| 18 |
-
sys.path.insert(0, str(app_dir))
|
| 19 |
-
|
| 20 |
-
def install_ffmpeg():
|
| 21 |
-
"""Install ffmpeg on system (required for audio processing)"""
|
| 22 |
-
try:
|
| 23 |
-
result = subprocess.run(["which", "ffmpeg"], capture_output=True, text=True)
|
| 24 |
-
if result.returncode == 0:
|
| 25 |
-
version_result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
|
| 26 |
-
if version_result.returncode == 0:
|
| 27 |
-
version = version_result.stdout.split()[2]
|
| 28 |
-
logger.info(f"✓ ffmpeg already installed: {version}")
|
| 29 |
-
return True
|
| 30 |
-
|
| 31 |
-
logger.info("Installing ffmpeg...")
|
| 32 |
-
subprocess.run(["apt-get", "update"], check=True, capture_output=True)
|
| 33 |
-
subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=True, capture_output=True)
|
| 34 |
-
|
| 35 |
-
verify = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
|
| 36 |
-
if verify.returncode == 0:
|
| 37 |
-
version = verify.stdout.split()[2]
|
| 38 |
-
logger.info(f"✓ ffmpeg installed successfully: {version}")
|
| 39 |
-
return True
|
| 40 |
-
return False
|
| 41 |
-
except Exception as e:
|
| 42 |
-
logger.warning(f"⚠️ ffmpeg installation warning: {e}")
|
| 43 |
-
return False
|
| 44 |
-
|
| 45 |
-
# Install system dependencies first
|
| 46 |
-
logger.info("="*60)
|
| 47 |
-
logger.info("Checking system dependencies...")
|
| 48 |
-
logger.info("="*60)
|
| 49 |
-
install_ffmpeg()
|
| 50 |
|
| 51 |
from fastapi import FastAPI
|
| 52 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -56,82 +21,49 @@ from config import (
|
|
| 56 |
API_TITLE, API_DESCRIPTION, API_VERSION,
|
| 57 |
HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
|
| 58 |
HOST, PORT, RELOAD,
|
| 59 |
-
CORS_ORIGINS,
|
| 60 |
-
PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL
|
| 61 |
-
LOAD_STT_MODEL, LOAD_CHATBOT_MODEL, STT_MODEL_ID, CHATBOT_MODEL_ID
|
| 62 |
)
|
| 63 |
|
| 64 |
@asynccontextmanager
|
| 65 |
async def lifespan(app: FastAPI):
|
| 66 |
-
"""Load models on startup and cleanup on shutdown"""
|
| 67 |
logger.info("="*60)
|
| 68 |
-
logger.info("🚀
|
| 69 |
logger.info("="*60)
|
| 70 |
-
|
| 71 |
if PRELOAD_MODELS_ON_STARTUP:
|
| 72 |
-
|
|
|
|
| 73 |
if LOAD_STANCE_MODEL:
|
| 74 |
try:
|
| 75 |
-
logger.info(f"Loading Stance Model: {HUGGINGFACE_STANCE_MODEL_ID}")
|
| 76 |
from services.stance_model_manager import load_model as load_stance
|
| 77 |
load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
|
| 78 |
-
logger.info("✓ Stance model loaded
|
| 79 |
except Exception as e:
|
| 80 |
-
logger.error(f"✗
|
| 81 |
-
|
| 82 |
-
# Load KPA
|
| 83 |
if LOAD_KPA_MODEL:
|
| 84 |
try:
|
| 85 |
-
logger.info(f"Loading KPA Model: {HUGGINGFACE_LABEL_MODEL_ID}")
|
| 86 |
from services.label_model_manager import load_model as load_kpa
|
| 87 |
load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
|
| 88 |
-
logger.info("✓ KPA model loaded
|
| 89 |
-
except Exception as e:
|
| 90 |
-
logger.error(f"✗ KPA model loading failed: {str(e)}")
|
| 91 |
-
|
| 92 |
-
# Load STT Model (Speech-to-Text)
|
| 93 |
-
if LOAD_STT_MODEL:
|
| 94 |
-
try:
|
| 95 |
-
logger.info(f"Loading STT Model: {STT_MODEL_ID}")
|
| 96 |
-
from services.stt_service import STTService
|
| 97 |
-
stt_service = STTService()
|
| 98 |
-
await stt_service.initialize()
|
| 99 |
-
logger.info("✓ STT model loaded successfully")
|
| 100 |
except Exception as e:
|
| 101 |
-
logger.error(f"✗
|
| 102 |
-
|
| 103 |
-
# Load Chatbot Model
|
| 104 |
-
if LOAD_CHATBOT_MODEL:
|
| 105 |
-
try:
|
| 106 |
-
logger.info(f"Loading Chatbot Model: {CHATBOT_MODEL_ID}")
|
| 107 |
-
from services.chatbot_service import ChatbotService
|
| 108 |
-
chatbot_service = ChatbotService()
|
| 109 |
-
await chatbot_service.initialize()
|
| 110 |
-
logger.info("✓ Chatbot model loaded successfully")
|
| 111 |
-
except Exception as e:
|
| 112 |
-
logger.error(f"✗ Chatbot model loading failed: {str(e)}")
|
| 113 |
-
|
| 114 |
-
logger.info("="*60)
|
| 115 |
-
logger.info("✓ API startup complete - Ready to serve requests")
|
| 116 |
-
logger.info(f"📚 API Docs: http://{HOST}:{PORT}/docs")
|
| 117 |
-
logger.info("="*60)
|
| 118 |
-
|
| 119 |
-
yield # Application runs here
|
| 120 |
-
|
| 121 |
-
# Shutdown
|
| 122 |
-
logger.info("Shutting down API...")
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
app = FastAPI(
|
| 126 |
title=API_TITLE,
|
| 127 |
description=API_DESCRIPTION,
|
| 128 |
version=API_VERSION,
|
| 129 |
-
|
| 130 |
-
redoc_url="/redoc",
|
| 131 |
-
lifespan=lifespan,
|
| 132 |
)
|
| 133 |
|
| 134 |
-
#
|
| 135 |
app.add_middleware(
|
| 136 |
CORSMiddleware,
|
| 137 |
allow_origins=CORS_ORIGINS,
|
|
@@ -140,98 +72,46 @@ app.add_middleware(
|
|
| 140 |
allow_headers=CORS_HEADERS,
|
| 141 |
)
|
| 142 |
|
| 143 |
-
#
|
|
|
|
|
|
|
| 144 |
try:
|
| 145 |
-
from routes.
|
| 146 |
-
app.include_router(
|
| 147 |
-
logger.info("✓
|
| 148 |
except Exception as e:
|
| 149 |
-
logger.warning(f"
|
| 150 |
|
|
|
|
| 151 |
try:
|
| 152 |
-
from routes.
|
| 153 |
-
app.include_router(
|
| 154 |
-
logger.info("✓
|
| 155 |
except Exception as e:
|
| 156 |
-
logger.warning(f"
|
| 157 |
|
|
|
|
| 158 |
try:
|
| 159 |
from routes import api_router
|
| 160 |
app.include_router(api_router)
|
| 161 |
-
logger.info("✓ API routes
|
| 162 |
except Exception as e:
|
| 163 |
-
logger.warning(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
# Health check endpoints
|
| 166 |
@app.get("/")
|
| 167 |
async def root():
|
| 168 |
-
"""Root endpoint"""
|
| 169 |
return {
|
| 170 |
-
"message": "NLP Debater API with Voice
|
| 171 |
-
"status": "healthy",
|
| 172 |
-
"version": API_VERSION,
|
| 173 |
"docs": "/docs",
|
| 174 |
-
"
|
| 175 |
-
|
| 176 |
-
"audio_processing": "/docs#/Audio%20Processing",
|
| 177 |
-
"health": "/health",
|
| 178 |
-
"models-status": "/models-status"
|
| 179 |
-
}
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
@app.get("/health")
|
| 183 |
-
async def health_check():
|
| 184 |
-
"""Simple health check"""
|
| 185 |
-
return {"status": "healthy", "message": "API is running"}
|
| 186 |
-
|
| 187 |
-
@app.get("/models-status")
|
| 188 |
-
async def models_status():
|
| 189 |
-
"""Check which models are loaded"""
|
| 190 |
-
status = {
|
| 191 |
-
"stt_model": "unknown",
|
| 192 |
-
"tts_engine": "gtts (free)",
|
| 193 |
-
"chatbot_model": "unknown",
|
| 194 |
-
"stance_model": "unknown",
|
| 195 |
-
"kpa_model": "unknown"
|
| 196 |
}
|
| 197 |
-
|
| 198 |
-
try:
|
| 199 |
-
from services.stt_service import STTService
|
| 200 |
-
stt_service = STTService()
|
| 201 |
-
status["stt_model"] = "loaded" if hasattr(stt_service, 'initialized') and stt_service.initialized else "not loaded"
|
| 202 |
-
except:
|
| 203 |
-
status["stt_model"] = "error"
|
| 204 |
-
|
| 205 |
-
try:
|
| 206 |
-
from services.chatbot_service import ChatbotService
|
| 207 |
-
chatbot_service = ChatbotService()
|
| 208 |
-
status["chatbot_model"] = "loaded" if hasattr(chatbot_service, 'initialized') and chatbot_service.initialized else "not loaded"
|
| 209 |
-
except:
|
| 210 |
-
status["chatbot_model"] = "error"
|
| 211 |
-
|
| 212 |
-
return status
|
| 213 |
-
|
| 214 |
-
@app.get("/check-ffmpeg")
|
| 215 |
-
async def check_ffmpeg():
|
| 216 |
-
"""Check if ffmpeg is installed"""
|
| 217 |
-
try:
|
| 218 |
-
result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
|
| 219 |
-
if result.returncode == 0:
|
| 220 |
-
version = result.stdout.split('\n')[0]
|
| 221 |
-
return {"status": "available", "version": version}
|
| 222 |
-
else:
|
| 223 |
-
return {"status": "error", "error": result.stderr}
|
| 224 |
-
except FileNotFoundError:
|
| 225 |
-
return {"status": "not found", "error": "ffmpeg is not installed"}
|
| 226 |
|
| 227 |
if __name__ == "__main__":
|
| 228 |
-
|
| 229 |
-
logger.info(f"📚 Documentation: http://{HOST}:{PORT}/docs")
|
| 230 |
-
|
| 231 |
-
uvicorn.run(
|
| 232 |
-
"main:app",
|
| 233 |
-
host=HOST,
|
| 234 |
-
port=PORT,
|
| 235 |
-
reload=RELOAD,
|
| 236 |
-
log_level="info"
|
| 237 |
-
)
|
|
|
|
| 1 |
import sys
|
| 2 |
from pathlib import Path
|
|
|
|
|
|
|
| 3 |
import logging
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
|
|
|
| 6 |
logging.basicConfig(
|
| 7 |
level=logging.INFO,
|
| 8 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 9 |
)
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
+
# Add app directory
|
| 13 |
app_dir = Path(__file__).parent
|
| 14 |
+
sys.path.insert(0, str(app_dir))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
from fastapi import FastAPI
|
| 17 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 21 |
API_TITLE, API_DESCRIPTION, API_VERSION,
|
| 22 |
HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
|
| 23 |
HOST, PORT, RELOAD,
|
| 24 |
+
CORS_ORIGINS, CORS_METHODS, CORS_HEADERS, CORS_CREDENTIALS,
|
| 25 |
+
PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
@asynccontextmanager
|
| 29 |
async def lifespan(app: FastAPI):
|
|
|
|
| 30 |
logger.info("="*60)
|
| 31 |
+
logger.info("🚀 API STARTUP - Loading HuggingFace models...")
|
| 32 |
logger.info("="*60)
|
| 33 |
+
|
| 34 |
if PRELOAD_MODELS_ON_STARTUP:
|
| 35 |
+
|
| 36 |
+
# Load stance model
|
| 37 |
if LOAD_STANCE_MODEL:
|
| 38 |
try:
|
|
|
|
| 39 |
from services.stance_model_manager import load_model as load_stance
|
| 40 |
load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
|
| 41 |
+
logger.info("✓ Stance model loaded")
|
| 42 |
except Exception as e:
|
| 43 |
+
logger.error(f"✗ Failed loading stance model: {e}")
|
| 44 |
+
|
| 45 |
+
# Load KPA model
|
| 46 |
if LOAD_KPA_MODEL:
|
| 47 |
try:
|
|
|
|
| 48 |
from services.label_model_manager import load_model as load_kpa
|
| 49 |
load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
|
| 50 |
+
logger.info("✓ KPA model loaded")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
+
logger.error(f"✗ Failed loading KPA model: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
logger.info("✓ Startup complete. API ready.")
|
| 55 |
+
yield
|
| 56 |
+
logger.info("🛑 Shutting down...")
|
| 57 |
+
|
| 58 |
+
# ------------- FASTAPI APP -------------
|
| 59 |
app = FastAPI(
|
| 60 |
title=API_TITLE,
|
| 61 |
description=API_DESCRIPTION,
|
| 62 |
version=API_VERSION,
|
| 63 |
+
lifespan=lifespan
|
|
|
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
+
# ------------- CORS -------------
|
| 67 |
app.add_middleware(
|
| 68 |
CORSMiddleware,
|
| 69 |
allow_origins=CORS_ORIGINS,
|
|
|
|
| 72 |
allow_headers=CORS_HEADERS,
|
| 73 |
)
|
| 74 |
|
| 75 |
+
# ============ ROUTES ============
|
| 76 |
+
|
| 77 |
+
# STT route (Groq Whisper)
|
| 78 |
try:
|
| 79 |
+
from routes.stt_routes import router as stt_router
|
| 80 |
+
app.include_router(stt_router, prefix="/api/v1/stt", tags=["Speech To Text"])
|
| 81 |
+
logger.info("✓ STT route loaded (Groq Whisper)")
|
| 82 |
except Exception as e:
|
| 83 |
+
logger.warning(f"⚠ Failed loading STT route: {e}")
|
| 84 |
|
| 85 |
+
# TTS route (Groq PlayAI TTS)
|
| 86 |
try:
|
| 87 |
+
from routes.tts_routes import router as tts_router
|
| 88 |
+
app.include_router(tts_router, prefix="/api/v1/tts", tags=["Text To Speech"])
|
| 89 |
+
logger.info("✓ TTS route loaded (Groq PlayAI TTS)")
|
| 90 |
except Exception as e:
|
| 91 |
+
logger.warning(f"⚠ Failed loading TTS route: {e}")
|
| 92 |
|
| 93 |
+
# Main NLP system routes
|
| 94 |
try:
|
| 95 |
from routes import api_router
|
| 96 |
app.include_router(api_router)
|
| 97 |
+
logger.info("✓ Main API routes loaded")
|
| 98 |
except Exception as e:
|
| 99 |
+
logger.warning(f"⚠ Failed loading main API routes: {e}")
|
| 100 |
+
|
| 101 |
+
# ------------------ BASIC ROUTES ------------------
|
| 102 |
+
|
| 103 |
+
@app.get("/health")
|
| 104 |
+
async def health():
|
| 105 |
+
return {"status": "healthy", "service": "NLP Debater + Groq Voice"}
|
| 106 |
|
|
|
|
| 107 |
@app.get("/")
|
| 108 |
async def root():
|
|
|
|
| 109 |
return {
|
| 110 |
+
"message": "NLP Debater API with Groq Voice Support",
|
|
|
|
|
|
|
| 111 |
"docs": "/docs",
|
| 112 |
+
"voice_stt": "/api/v1/stt",
|
| 113 |
+
"voice_tts": "/api/v1/tts"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
if __name__ == "__main__":
|
| 117 |
+
uvicorn.run("main:app", host=HOST, port=PORT, reload=RELOAD)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/audio.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
from pydantic import BaseModel, Field
|
| 2 |
-
from typing import Optional, List, Dict, Any
|
| 3 |
-
from enum import Enum
|
| 4 |
-
from datetime import datetime
|
| 5 |
-
|
| 6 |
-
class MessageType(str, Enum):
|
| 7 |
-
TEXT = "text"
|
| 8 |
-
AUDIO = "audio"
|
| 9 |
-
|
| 10 |
-
class UserMessage(BaseModel):
|
| 11 |
-
message_id: str = Field(..., description="Unique message ID")
|
| 12 |
-
content: str = Field(..., description="Text content or audio base64")
|
| 13 |
-
message_type: MessageType = Field(..., description="Message type")
|
| 14 |
-
session_id: str = Field(..., description="User session ID")
|
| 15 |
-
timestamp: datetime = Field(default_factory=datetime.now)
|
| 16 |
-
|
| 17 |
-
class ChatbotResponse(BaseModel):
|
| 18 |
-
response_id: str = Field(..., description="Unique response ID")
|
| 19 |
-
text_response: str = Field(..., description="Chatbot text response")
|
| 20 |
-
audio_response: Optional[str] = Field(None, description="Audio response in base64")
|
| 21 |
-
audio_url: Optional[str] = Field(None, description="Generated audio URL")
|
| 22 |
-
session_id: str = Field(..., description="User session ID")
|
| 23 |
-
timestamp: datetime = Field(default_factory=datetime.now)
|
| 24 |
-
|
| 25 |
-
class ChatSession(BaseModel):
|
| 26 |
-
session_id: str = Field(..., description="Session ID")
|
| 27 |
-
user_id: Optional[str] = Field(None, description="User ID")
|
| 28 |
-
created_at: datetime = Field(default_factory=datetime.now)
|
| 29 |
-
last_activity: datetime = Field(default_factory=datetime.now)
|
| 30 |
-
conversation_history: List[Dict[str, Any]] = Field(default_factory=list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/stt.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class STTResponse(BaseModel):
|
| 4 |
+
text: str
|
models/tts.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class TTSRequest(BaseModel):
|
| 4 |
+
text: str
|
| 5 |
+
voice: str = "Aaliyah-PlayAI"
|
| 6 |
+
format: str = "wav"
|
routes/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
from fastapi import APIRouter
|
| 4 |
from . import root, health, stance, label
|
| 5 |
-
from routes.
|
| 6 |
# Create main router
|
| 7 |
api_router = APIRouter()
|
| 8 |
|
|
|
|
| 2 |
|
| 3 |
from fastapi import APIRouter
|
| 4 |
from . import root, health, stance, label
|
| 5 |
+
from routes.tts_routes import router as audio_router
|
| 6 |
# Create main router
|
| 7 |
api_router = APIRouter()
|
| 8 |
|
routes/audio.py
DELETED
|
@@ -1,86 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
| 2 |
-
from fastapi.responses import JSONResponse
|
| 3 |
-
import uuid
|
| 4 |
-
import base64
|
| 5 |
-
from models.audio import UserMessage, ChatbotResponse, MessageType
|
| 6 |
-
from services.chatbot_service import ChatbotService
|
| 7 |
-
|
| 8 |
-
router = APIRouter()
|
| 9 |
-
chatbot_service = ChatbotService()
|
| 10 |
-
|
| 11 |
-
@router.post("/chat/message", response_model=ChatbotResponse)
|
| 12 |
-
async def send_chat_message(
|
| 13 |
-
session_id: str = Form(...),
|
| 14 |
-
message_type: str = Form(...),
|
| 15 |
-
message: str = Form(None),
|
| 16 |
-
audio_file: UploadFile = File(None)
|
| 17 |
-
):
|
| 18 |
-
try:
|
| 19 |
-
# Validate input
|
| 20 |
-
if not message and not audio_file:
|
| 21 |
-
raise HTTPException(status_code=400, detail="Either message or audio file must be provided")
|
| 22 |
-
|
| 23 |
-
if message_type == "audio" and not audio_file:
|
| 24 |
-
raise HTTPException(status_code=400, detail="Audio file required for audio messages")
|
| 25 |
-
|
| 26 |
-
# Process audio file if provided
|
| 27 |
-
content = ""
|
| 28 |
-
if audio_file:
|
| 29 |
-
audio_data = await audio_file.read()
|
| 30 |
-
content = base64.b64encode(audio_data).decode('utf-8')
|
| 31 |
-
else:
|
| 32 |
-
content = message
|
| 33 |
-
|
| 34 |
-
# Create user message
|
| 35 |
-
user_message = UserMessage(
|
| 36 |
-
message_id=str(uuid.uuid4()),
|
| 37 |
-
content=content,
|
| 38 |
-
message_type=MessageType(message_type),
|
| 39 |
-
session_id=session_id
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
# Process through chatbot service
|
| 43 |
-
response = await chatbot_service.process_user_message(user_message)
|
| 44 |
-
|
| 45 |
-
return response
|
| 46 |
-
|
| 47 |
-
except Exception as e:
|
| 48 |
-
raise HTTPException(status_code=500, detail=f"Error processing message: {str(e)}")
|
| 49 |
-
|
| 50 |
-
@router.post("/chat/audio")
|
| 51 |
-
async def send_audio_message(
|
| 52 |
-
session_id: str = Form(...),
|
| 53 |
-
audio_file: UploadFile = File(...)
|
| 54 |
-
):
|
| 55 |
-
"""Endpoint specifically for audio messages"""
|
| 56 |
-
try:
|
| 57 |
-
audio_data = await audio_file.read()
|
| 58 |
-
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
|
| 59 |
-
|
| 60 |
-
user_message = UserMessage(
|
| 61 |
-
message_id=str(uuid.uuid4()),
|
| 62 |
-
content=audio_base64,
|
| 63 |
-
message_type=MessageType.AUDIO,
|
| 64 |
-
session_id=session_id
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
response = await chatbot_service.process_user_message(user_message)
|
| 68 |
-
return response
|
| 69 |
-
|
| 70 |
-
except Exception as e:
|
| 71 |
-
raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
|
| 72 |
-
|
| 73 |
-
@router.get("/session/{session_id}/history")
|
| 74 |
-
async def get_session_history(session_id: str):
|
| 75 |
-
"""Get conversation history for a session"""
|
| 76 |
-
history = chatbot_service.get_session_history(session_id)
|
| 77 |
-
if not history:
|
| 78 |
-
raise HTTPException(status_code=404, detail="Session not found")
|
| 79 |
-
return history
|
| 80 |
-
|
| 81 |
-
@router.post("/session/new")
|
| 82 |
-
async def create_new_session():
|
| 83 |
-
"""Create a new chat session"""
|
| 84 |
-
session_id = str(uuid.uuid4())
|
| 85 |
-
chatbot_service._get_or_create_session(session_id)
|
| 86 |
-
return {"session_id": session_id, "message": "New session created"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
routes/stt_routes.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, UploadFile, File
|
| 2 |
+
from services.stt_service import speech_to_text
|
| 3 |
+
from models.stt import STTResponse
|
| 4 |
+
import os
|
| 5 |
+
import uuid
|
| 6 |
+
|
| 7 |
+
router = APIRouter(prefix="/stt", tags=["Speech To Text"])
|
| 8 |
+
|
| 9 |
+
@router.post("/", response_model=STTResponse)
|
| 10 |
+
async def convert_stt(file: UploadFile = File(...)):
|
| 11 |
+
temp_name = f"audio/temp/{uuid.uuid4()}_{file.filename}"
|
| 12 |
+
|
| 13 |
+
with open(temp_name, "wb") as f:
|
| 14 |
+
f.write(await file.read())
|
| 15 |
+
|
| 16 |
+
text = speech_to_text(temp_name)
|
| 17 |
+
|
| 18 |
+
os.remove(temp_name)
|
| 19 |
+
|
| 20 |
+
return STTResponse(text=text)
|
routes/tts_routes.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from fastapi.responses import FileResponse
|
| 3 |
+
from models.tts import TTSRequest
|
| 4 |
+
from services.tts_service import text_to_speech
|
| 5 |
+
|
| 6 |
+
router = APIRouter(prefix="/tts", tags=["Text To Speech"])
|
| 7 |
+
|
| 8 |
+
@router.post("/")
|
| 9 |
+
async def generate_tts(request: TTSRequest):
|
| 10 |
+
output_path = text_to_speech(
|
| 11 |
+
text=request.text,
|
| 12 |
+
voice=request.voice,
|
| 13 |
+
fmt=request.format
|
| 14 |
+
)
|
| 15 |
+
return FileResponse(output_path, filename=output_path.name)
|
services/chatbot_service.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
import base64
|
| 2 |
-
import uuid
|
| 3 |
-
from typing import Optional, Dict, Any
|
| 4 |
-
from datetime import datetime
|
| 5 |
-
from models.audio import ChatbotResponse, UserMessage
|
| 6 |
-
|
| 7 |
-
class ChatbotService:
|
| 8 |
-
def __init__(self):
|
| 9 |
-
self.sessions: Dict[str, Dict[str, Any]] = {}
|
| 10 |
-
self.initialized = False
|
| 11 |
-
self._initialize_services()
|
| 12 |
-
|
| 13 |
-
def _initialize_services(self):
|
| 14 |
-
"""Initialize services"""
|
| 15 |
-
try:
|
| 16 |
-
from services.tts_service import SimpleTTSService
|
| 17 |
-
self.tts_service = SimpleTTSService()
|
| 18 |
-
print("✓ TTS service initialized")
|
| 19 |
-
except ImportError as e:
|
| 20 |
-
print(f"⚠️ TTS service not available: {e}")
|
| 21 |
-
self.tts_service = None
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
from services.stt_service import STTService
|
| 25 |
-
self.stt_service = STTService()
|
| 26 |
-
print("✓ STT service initialized")
|
| 27 |
-
except ImportError as e:
|
| 28 |
-
print(f"⚠️ STT service not available: {e}")
|
| 29 |
-
self.stt_service = None
|
| 30 |
-
|
| 31 |
-
async def initialize(self):
|
| 32 |
-
"""Initialize the chatbot service"""
|
| 33 |
-
if self.stt_service:
|
| 34 |
-
await self.stt_service.initialize()
|
| 35 |
-
self.initialized = True
|
| 36 |
-
print("✓ Chatbot Service fully initialized")
|
| 37 |
-
|
| 38 |
-
async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
|
| 39 |
-
# Update session
|
| 40 |
-
session = self._get_or_create_session(user_message.session_id)
|
| 41 |
-
|
| 42 |
-
# Process message based on type
|
| 43 |
-
if user_message.message_type == "audio" and self.stt_service:
|
| 44 |
-
text_input = await self.stt_service.transcribe_audio_base64(user_message.content)
|
| 45 |
-
elif user_message.message_type == "audio":
|
| 46 |
-
text_input = "[Voice message received]"
|
| 47 |
-
else:
|
| 48 |
-
text_input = user_message.content
|
| 49 |
-
|
| 50 |
-
# Add to conversation history
|
| 51 |
-
session["conversation_history"].append({
|
| 52 |
-
"role": "user",
|
| 53 |
-
"content": text_input,
|
| 54 |
-
"timestamp": user_message.timestamp
|
| 55 |
-
})
|
| 56 |
-
|
| 57 |
-
# Generate chatbot response
|
| 58 |
-
chatbot_text = await self._generate_chatbot_response(text_input, session)
|
| 59 |
-
|
| 60 |
-
# TTS: Convert response to audio
|
| 61 |
-
audio_base64 = None
|
| 62 |
-
if self.tts_service:
|
| 63 |
-
audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
|
| 64 |
-
|
| 65 |
-
# Create response
|
| 66 |
-
response = ChatbotResponse(
|
| 67 |
-
response_id=str(uuid.uuid4()),
|
| 68 |
-
text_response=chatbot_text,
|
| 69 |
-
audio_response=audio_base64,
|
| 70 |
-
session_id=user_message.session_id
|
| 71 |
-
)
|
| 72 |
-
|
| 73 |
-
# Add response to history
|
| 74 |
-
session["conversation_history"].append({
|
| 75 |
-
"role": "assistant",
|
| 76 |
-
"content": chatbot_text,
|
| 77 |
-
"audio_response": audio_base64,
|
| 78 |
-
"timestamp": response.timestamp
|
| 79 |
-
})
|
| 80 |
-
|
| 81 |
-
return response
|
| 82 |
-
|
| 83 |
-
async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
|
| 84 |
-
"""Chatbot response generation logic"""
|
| 85 |
-
user_input_lower = user_input.lower()
|
| 86 |
-
|
| 87 |
-
if any(greet in user_input_lower for greet in ["hello", "hi", "hey"]):
|
| 88 |
-
return "Hello! I'm your voice assistant. How can I help you today?"
|
| 89 |
-
|
| 90 |
-
if "time" in user_input_lower:
|
| 91 |
-
return f"The current time is {datetime.now().strftime('%H:%M')}"
|
| 92 |
-
|
| 93 |
-
if "help" in user_input_lower:
|
| 94 |
-
return "I can process both text and voice messages. Try sending me a voice note!"
|
| 95 |
-
|
| 96 |
-
if "name" in user_input_lower:
|
| 97 |
-
return "I'm your AI voice assistant. I'm here to help with your questions!"
|
| 98 |
-
|
| 99 |
-
if "voice" in user_input_lower or "audio" in user_input_lower:
|
| 100 |
-
return "Yes! I support voice messages. You can speak to me and I'll respond with audio too!"
|
| 101 |
-
|
| 102 |
-
return f"I understand you said: '{user_input}'. How can I assist you further?"
|
| 103 |
-
|
| 104 |
-
def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
|
| 105 |
-
if session_id not in self.sessions:
|
| 106 |
-
self.sessions[session_id] = {
|
| 107 |
-
"conversation_history": [],
|
| 108 |
-
"created_at": datetime.now(),
|
| 109 |
-
"last_activity": datetime.now()
|
| 110 |
-
}
|
| 111 |
-
return self.sessions[session_id]
|
| 112 |
-
|
| 113 |
-
def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:
|
| 114 |
-
return self.sessions.get(session_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
services/gemini_client.py
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
# This file is no longer needed since we're using free models
|
| 2 |
-
import logging
|
| 3 |
-
|
| 4 |
-
logger = logging.getLogger(__name__)
|
| 5 |
-
|
| 6 |
-
def get_gemini_client():
|
| 7 |
-
"""Gemini client is no longer used"""
|
| 8 |
-
logger.warning("Gemini client is deprecated - using free models instead")
|
| 9 |
-
raise Exception("Gemini API is no longer used. Free models are being used instead.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
services/stt_service.py
CHANGED
|
@@ -1,94 +1,13 @@
|
|
| 1 |
-
import base64
|
| 2 |
-
import io
|
| 3 |
-
import tempfile
|
| 4 |
import os
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
|
| 19 |
-
"""Transcribe base64 audio to text - SIMPLIFIED VERSION"""
|
| 20 |
-
try:
|
| 21 |
-
# Decode audio
|
| 22 |
-
audio_data = base64.b64decode(audio_base64)
|
| 23 |
-
|
| 24 |
-
# For now, return a placeholder since we don't have STT models configured
|
| 25 |
-
# In a real implementation, you would use Whisper, Vosk, or other STT models here
|
| 26 |
-
|
| 27 |
-
audio_info = await self._get_audio_info(audio_data)
|
| 28 |
-
return f"[Audio received: {audio_info}. STT service needs model configuration.]"
|
| 29 |
-
|
| 30 |
-
except Exception as e:
|
| 31 |
-
print(f"Transcription error: {e}")
|
| 32 |
-
return "Sorry, I couldn't process the audio message."
|
| 33 |
-
|
| 34 |
-
async def _get_audio_info(self, audio_data: bytes) -> str:
|
| 35 |
-
"""Get basic information about the audio file"""
|
| 36 |
-
try:
|
| 37 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 38 |
-
temp_path = temp_file.name
|
| 39 |
-
temp_file.write(audio_data)
|
| 40 |
-
|
| 41 |
-
try:
|
| 42 |
-
with wave.open(temp_path, 'rb') as wav_file:
|
| 43 |
-
frames = wav_file.getnframes()
|
| 44 |
-
rate = wav_file.getframerate()
|
| 45 |
-
duration = frames / float(rate)
|
| 46 |
-
return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
|
| 47 |
-
except:
|
| 48 |
-
return f"Size: {len(audio_data)} bytes"
|
| 49 |
-
|
| 50 |
-
finally:
|
| 51 |
-
if os.path.exists(temp_path):
|
| 52 |
-
os.unlink(temp_path)
|
| 53 |
-
|
| 54 |
-
# Alternative STT service using Whisper if available
|
| 55 |
-
class WhisperSTTService:
|
| 56 |
-
def __init__(self):
|
| 57 |
-
self.model = None
|
| 58 |
-
self.initialized = False
|
| 59 |
-
|
| 60 |
-
async def initialize(self):
|
| 61 |
-
"""Initialize Whisper STT service"""
|
| 62 |
-
try:
|
| 63 |
-
import whisper
|
| 64 |
-
self.model = whisper.load_model("medium")
|
| 65 |
-
self.initialized = True
|
| 66 |
-
print("✓ Whisper STT Service initialized")
|
| 67 |
-
except ImportError:
|
| 68 |
-
print("⚠️ Whisper not available. Install with: pip install openai-whisper")
|
| 69 |
-
self.initialized = False
|
| 70 |
-
except Exception as e:
|
| 71 |
-
print(f"⚠️ Whisper initialization failed: {e}")
|
| 72 |
-
self.initialized = False
|
| 73 |
-
|
| 74 |
-
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
|
| 75 |
-
"""Transcribe using Whisper"""
|
| 76 |
-
if not self.initialized:
|
| 77 |
-
return "STT service not available. Please install Whisper."
|
| 78 |
-
|
| 79 |
-
try:
|
| 80 |
-
audio_data = base64.b64decode(audio_base64)
|
| 81 |
-
|
| 82 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
| 83 |
-
temp_path = temp_file.name
|
| 84 |
-
temp_file.write(audio_data)
|
| 85 |
-
|
| 86 |
-
result = self.model.transcribe(temp_path, language=language)
|
| 87 |
-
transcription = result["text"]
|
| 88 |
-
|
| 89 |
-
os.unlink(temp_path)
|
| 90 |
-
return transcription
|
| 91 |
-
|
| 92 |
-
except Exception as e:
|
| 93 |
-
print(f"Whisper transcription error: {e}")
|
| 94 |
-
return "Sorry, I couldn't transcribe the audio."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
from groq import Groq
|
| 3 |
+
from config import client
|
| 4 |
|
| 5 |
+
def speech_to_text(file_path: str):
|
| 6 |
+
with open(file_path, "rb") as f:
|
| 7 |
+
transcription = client.audio.transcriptions.create(
|
| 8 |
+
file=(file_path, f.read()),
|
| 9 |
+
model="whisper-large-v3-turbo",
|
| 10 |
+
temperature=0,
|
| 11 |
+
response_format="verbose_json"
|
| 12 |
+
)
|
| 13 |
+
return transcription.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
services/tts_service.py
CHANGED
|
@@ -1,48 +1,15 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
| 3 |
-
import os
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
except ImportError:
|
| 18 |
-
print("⚠️ SpeechRecognition not available. STT will return placeholder text.")
|
| 19 |
-
self.initialized = False
|
| 20 |
-
|
| 21 |
-
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
|
| 22 |
-
"""Transcribe base64 audio to text"""
|
| 23 |
-
if not self.initialized or not self.recognizer:
|
| 24 |
-
return "[Audio received - install SpeechRecognition for transcription]"
|
| 25 |
-
|
| 26 |
-
try:
|
| 27 |
-
import speech_recognition as sr
|
| 28 |
-
import io
|
| 29 |
-
|
| 30 |
-
# Decode audio
|
| 31 |
-
audio_data = base64.b64decode(audio_base64)
|
| 32 |
-
|
| 33 |
-
# Use SpeechRecognition
|
| 34 |
-
audio_file = sr.AudioFile(io.BytesIO(audio_data))
|
| 35 |
-
|
| 36 |
-
with audio_file as source:
|
| 37 |
-
# Adjust for ambient noise
|
| 38 |
-
self.recognizer.adjust_for_ambient_noise(source)
|
| 39 |
-
audio = self.recognizer.record(source)
|
| 40 |
-
|
| 41 |
-
return self.recognizer.recognize_google(audio, language=language)
|
| 42 |
-
|
| 43 |
-
except sr.UnknownValueError:
|
| 44 |
-
return "Could not understand audio"
|
| 45 |
-
except sr.RequestError as e:
|
| 46 |
-
return f"Error with speech recognition service: {e}"
|
| 47 |
-
except Exception as e:
|
| 48 |
-
return f"Error processing audio: {str(e)}"
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
from config import client
|
|
|
|
| 3 |
|
| 4 |
+
def text_to_speech(text: str, voice: str = "Aaliyah-PlayAI", fmt: str = "wav"):
|
| 5 |
+
speech_path = Path("audio/temp/output." + fmt)
|
| 6 |
+
|
| 7 |
+
response = client.audio.speech.create(
|
| 8 |
+
model="playai-tts",
|
| 9 |
+
voice=voice,
|
| 10 |
+
response_format=fmt,
|
| 11 |
+
input=text,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
response.stream_to_file(speech_path)
|
| 15 |
+
return speech_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|