Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 10 days ago

Commit

2da4544

1 Parent(s): 220b6c2

Add groq apis

Browse files

Files changed (13) hide show

config.py +39 -44
main.py +47 -167
models/audio.py +0 -30
models/stt.py +4 -0
models/tts.py +6 -0
routes/__init__.py +1 -1
routes/audio.py +0 -86
routes/stt_routes.py +20 -0
routes/tts_routes.py +15 -0
services/chatbot_service.py +0 -114
services/gemini_client.py +0 -9
services/stt_service.py +11 -92
services/tts_service.py +14 -47

config.py CHANGED Viewed

@@ -5,78 +5,73 @@ from pathlib import Path
 from dotenv import load_dotenv
 import logging
-# Configure logging
 logger = logging.getLogger(__name__)
-# Load environment variables from .env file
 load_dotenv()
-# Get project root directory
 API_DIR = Path(__file__).parent
 PROJECT_ROOT = API_DIR.parent
-# ============ HUGGING FACE ============
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
 HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
 HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
-# ============ API CONFIGURATION ============
-API_TITLE = "NLP Debater - Voice Chatbot API"
-API_DESCRIPTION = "Complete NLP system with stance detection, KPA, and voice chatbot using free models"
-API_VERSION = "1.0.0"
-# ============ SERVER CONFIGURATION ============
 HOST = os.getenv("HOST", "0.0.0.0")
-PORT = int(os.getenv("PORT", "7860"))
 RELOAD = os.getenv("RELOAD", "false").lower() == "true"
-# ============ CORS CONFIGURATION ============
 CORS_ORIGINS = ["*"]
 CORS_CREDENTIALS = True
 CORS_METHODS = ["*"]
 CORS_HEADERS = ["*"]
-# ============ FREE VOICE MODELS ============
-# Speech-to-Text
-STT_MODEL_ID = "openai/whisper-base"
-STT_DEVICE = "cpu"  # Change to "cuda" if GPU available
-# Text-to-Speech
-TTS_ENGINE = "gtts"  # Google Text-to-Speech (free)
-TTS_LANGUAGE = "en"
-# Chatbot
-CHATBOT_MODEL_ID = "microsoft/DialoGPT-medium"
-CHATBOT_DEVICE = "cpu"  # Change to "cuda" if GPU available
 # ============ AUDIO SETTINGS ============
-ALLOWED_AUDIO_TYPES = {
-    "audio/wav",
-    "audio/x-wav",
-    "audio/mpeg",
-    "audio/mp3",
-    "audio/mp4",
-    "audio/m4a"
-}
-MAX_TEXT_LENGTH = 500
-MIN_TEXT_LENGTH = 1
 MAX_AUDIO_SIZE = 10 * 1024 * 1024  # 10MB
 AUDIO_SAMPLE_RATE = 16000
 AUDIO_DURATION_LIMIT = 120  # seconds
-# ============ MODEL LOADING ============
 PRELOAD_MODELS_ON_STARTUP = True
 LOAD_STANCE_MODEL = True
 LOAD_KPA_MODEL = True
-LOAD_STT_MODEL = True
-LOAD_CHATBOT_MODEL = True
-LOAD_TTS_MODEL = False  # gTTS doesn't need preloading
 logger.info("="*60)
 logger.info("✓ Configuration loaded successfully")
-logger.info("✓ Using FREE models for all services")
-logger.info(f"  - STT: {STT_MODEL_ID}")
-logger.info(f"  - TTS: {TTS_ENGINE}")
-logger.info(f"  - Chatbot: {CHATBOT_MODEL_ID}")
-logger.info("="*60)

 from dotenv import load_dotenv
 import logging
 logger = logging.getLogger(__name__)
+# Load .env variables
 load_dotenv()
+# ============ DIRECTORIES ============
 API_DIR = Path(__file__).parent
 PROJECT_ROOT = API_DIR.parent
+# ============ HUGGING FACE MODELS ============
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
 HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
 HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
+# ============ GROQ MODELS ============
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
+# **Speech-to-Text**
+GROQ_STT_MODEL = "whisper-large-v3-turbo"
+# **Text-to-Speech**
+GROQ_TTS_MODEL = "playai-tts"
+GROQ_TTS_VOICE = "Aaliyah-PlayAI"
+GROQ_TTS_FORMAT = "wav"
+# **Chat Model**
+GROQ_CHAT_MODEL = "llama3-70b-8192"
+# ============ API META ============
+API_TITLE = "NLP Debater - Voice Chatbot"
+API_DESCRIPTION = "NLP stance detection, KPA, and Groq STT/TTS chatbot"
+API_VERSION = "2.0.0"
+# ============ SERVER ============
 HOST = os.getenv("HOST", "0.0.0.0")
+PORT = int(os.getenv("PORT", 7860))
 RELOAD = os.getenv("RELOAD", "false").lower() == "true"
+# ============ CORS ============
 CORS_ORIGINS = ["*"]
 CORS_CREDENTIALS = True
 CORS_METHODS = ["*"]
 CORS_HEADERS = ["*"]
 # ============ AUDIO SETTINGS ============
 MAX_AUDIO_SIZE = 10 * 1024 * 1024  # 10MB
 AUDIO_SAMPLE_RATE = 16000
 AUDIO_DURATION_LIMIT = 120  # seconds
+ALLOWED_AUDIO_TYPES = {
+    "audio/wav", "audio/x-wav",
+    "audio/mpeg", "audio/mp3",
+    "audio/mp4", "audio/m4a"
+}
+# ============ MODEL PRELOADING ============
 PRELOAD_MODELS_ON_STARTUP = True
 LOAD_STANCE_MODEL = True
 LOAD_KPA_MODEL = True
+LOAD_STT_MODEL = False        # Groq STT = no preload
+LOAD_CHATBOT_MODEL = False    # Groq Chat = no preload
+LOAD_TTS_MODEL = False        # Groq TTS = no preload
 logger.info("="*60)
 logger.info("✓ Configuration loaded successfully")
+logger.info(f"  HF Stance Model : {HUGGINGFACE_STANCE_MODEL_ID}")
+logger.info(f"  HF Label Model  : {HUGGINGFACE_LABEL_MODEL_ID}")
+logger.info(f"  GROQ STT Model  : {GROQ_STT_MODEL}")
+logger.info(f"  GROQ TTS Model  : {GROQ_TTS_MODEL}")
+logger.info(f"  GROQ Chat Model : {GROQ_CHAT_MODEL}")
+logger.info("="*60)

main.py CHANGED Viewed

@@ -1,52 +1,17 @@
 import sys
 from pathlib import Path
-import os
-import subprocess
 import logging
 from contextlib import asynccontextmanager
-# Configure logging first
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Add the app directory to Python path
 app_dir = Path(__file__).parent
-if str(app_dir) not in sys.path:
-    sys.path.insert(0, str(app_dir))
-def install_ffmpeg():
-    """Install ffmpeg on system (required for audio processing)"""
-    try:
-        result = subprocess.run(["which", "ffmpeg"], capture_output=True, text=True)
-        if result.returncode == 0:
-            version_result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
-            if version_result.returncode == 0:
-                version = version_result.stdout.split()[2]
-                logger.info(f"✓ ffmpeg already installed: {version}")
-                return True
-        logger.info("Installing ffmpeg...")
-        subprocess.run(["apt-get", "update"], check=True, capture_output=True)
-        subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=True, capture_output=True)
-        verify = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
-        if verify.returncode == 0:
-            version = verify.stdout.split()[2]
-            logger.info(f"✓ ffmpeg installed successfully: {version}")
-            return True
-        return False
-    except Exception as e:
-        logger.warning(f"⚠️ ffmpeg installation warning: {e}")
-        return False
-# Install system dependencies first
-logger.info("="*60)
-logger.info("Checking system dependencies...")
-logger.info("="*60)
-install_ffmpeg()
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
@@ -56,82 +21,49 @@ from config import (
     API_TITLE, API_DESCRIPTION, API_VERSION,
     HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
     HOST, PORT, RELOAD,
-    CORS_ORIGINS, CORS_CREDENTIALS, CORS_METHODS, CORS_HEADERS,
-    PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL,
-    LOAD_STT_MODEL, LOAD_CHATBOT_MODEL, STT_MODEL_ID, CHATBOT_MODEL_ID
 )
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Load models on startup and cleanup on shutdown"""
     logger.info("="*60)
-    logger.info("🚀 STARTING API - Loading Models...")
     logger.info("="*60)
     if PRELOAD_MODELS_ON_STARTUP:
-        # Load Stance Detection Model
         if LOAD_STANCE_MODEL:
             try:
-                logger.info(f"Loading Stance Model: {HUGGINGFACE_STANCE_MODEL_ID}")
                 from services.stance_model_manager import load_model as load_stance
                 load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
-                logger.info("✓ Stance model loaded successfully")
             except Exception as e:
-                logger.error(f"✗ Stance model loading failed: {str(e)}")
-        # Load KPA/Label Model
         if LOAD_KPA_MODEL:
             try:
-                logger.info(f"Loading KPA Model: {HUGGINGFACE_LABEL_MODEL_ID}")
                 from services.label_model_manager import load_model as load_kpa
                 load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
-                logger.info("✓ KPA model loaded successfully")
-            except Exception as e:
-                logger.error(f"✗ KPA model loading failed: {str(e)}")
-        # Load STT Model (Speech-to-Text)
-        if LOAD_STT_MODEL:
-            try:
-                logger.info(f"Loading STT Model: {STT_MODEL_ID}")
-                from services.stt_service import STTService
-                stt_service = STTService()
-                await stt_service.initialize()
-                logger.info("✓ STT model loaded successfully")
             except Exception as e:
-                logger.error(f"✗ STT model loading failed: {str(e)}")
-        # Load Chatbot Model
-        if LOAD_CHATBOT_MODEL:
-            try:
-                logger.info(f"Loading Chatbot Model: {CHATBOT_MODEL_ID}")
-                from services.chatbot_service import ChatbotService
-                chatbot_service = ChatbotService()
-                await chatbot_service.initialize()
-                logger.info("✓ Chatbot model loaded successfully")
-            except Exception as e:
-                logger.error(f"✗ Chatbot model loading failed: {str(e)}")
-    logger.info("="*60)
-    logger.info("✓ API startup complete - Ready to serve requests")
-    logger.info(f"📚 API Docs: http://{HOST}:{PORT}/docs")
-    logger.info("="*60)
-    yield  # Application runs here
-    # Shutdown
-    logger.info("Shutting down API...")
-# Create FastAPI application
 app = FastAPI(
     title=API_TITLE,
     description=API_DESCRIPTION,
     version=API_VERSION,
-    docs_url="/docs",
-    redoc_url="/redoc",
-    lifespan=lifespan,
 )
-# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=CORS_ORIGINS,
@@ -140,98 +72,46 @@ app.add_middleware(
     allow_headers=CORS_HEADERS,
 )
-# Include routers
 try:
-    from routes.audio import router as chatbot_router
-    app.include_router(chatbot_router, prefix="/api/v1", tags=["Voice Chatbot"])
-    logger.info("✓ Chatbot routes registered")
 except Exception as e:
-    logger.warning(f"⚠️ Chatbot routes failed to load: {e}")
 try:
-    from routes.audio import router as audio_router
-    app.include_router(audio_router, prefix="/audio", tags=["Audio Processing"])
-    logger.info("✓ Audio routes registered")
 except Exception as e:
-    logger.warning(f"⚠️ Audio routes failed to load: {e}")
 try:
     from routes import api_router
     app.include_router(api_router)
-    logger.info("✓ API routes registered")
 except Exception as e:
-    logger.warning(f"⚠️ API routes failed to load: {e}")
-# Health check endpoints
 @app.get("/")
 async def root():
-    """Root endpoint"""
     return {
-        "message": "NLP Debater API with Voice Chatbot",
-        "status": "healthy",
-        "version": API_VERSION,
         "docs": "/docs",
-        "endpoints": {
-            "voice_chatbot": "/api/v1/chat/message",
-            "audio_processing": "/docs#/Audio%20Processing",
-            "health": "/health",
-            "models-status": "/models-status"
-        }
-    }
-@app.get("/health")
-async def health_check():
-    """Simple health check"""
-    return {"status": "healthy", "message": "API is running"}
-@app.get("/models-status")
-async def models_status():
-    """Check which models are loaded"""
-    status = {
-        "stt_model": "unknown",
-        "tts_engine": "gtts (free)",
-        "chatbot_model": "unknown",
-        "stance_model": "unknown",
-        "kpa_model": "unknown"
     }
-    try:
-        from services.stt_service import STTService
-        stt_service = STTService()
-        status["stt_model"] = "loaded" if hasattr(stt_service, 'initialized') and stt_service.initialized else "not loaded"
-    except:
-        status["stt_model"] = "error"
-    try:
-        from services.chatbot_service import ChatbotService
-        chatbot_service = ChatbotService()
-        status["chatbot_model"] = "loaded" if hasattr(chatbot_service, 'initialized') and chatbot_service.initialized else "not loaded"
-    except:
-        status["chatbot_model"] = "error"
-    return status
-@app.get("/check-ffmpeg")
-async def check_ffmpeg():
-    """Check if ffmpeg is installed"""
-    try:
-        result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
-        if result.returncode == 0:
-            version = result.stdout.split('\n')[0]
-            return {"status": "available", "version": version}
-        else:
-            return {"status": "error", "error": result.stderr}
-    except FileNotFoundError:
-        return {"status": "not found", "error": "ffmpeg is not installed"}
 if __name__ == "__main__":
-    logger.info(f"🚀 Starting server on {HOST}:{PORT}")
-    logger.info(f"📚 Documentation: http://{HOST}:{PORT}/docs")
-    uvicorn.run(
-        "main:app",
-        host=HOST,
-        port=PORT,
-        reload=RELOAD,
-        log_level="info"
-    )

 import sys
 from pathlib import Path
 import logging
 from contextlib import asynccontextmanager
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
+# Add app directory
 app_dir = Path(__file__).parent
+sys.path.insert(0, str(app_dir))
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
     API_TITLE, API_DESCRIPTION, API_VERSION,
     HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
     HOST, PORT, RELOAD,
+    CORS_ORIGINS, CORS_METHODS, CORS_HEADERS, CORS_CREDENTIALS,
+    PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL
 )
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     logger.info("="*60)
+    logger.info("🚀 API STARTUP - Loading HuggingFace models...")
     logger.info("="*60)
     if PRELOAD_MODELS_ON_STARTUP:
+        # Load stance model
         if LOAD_STANCE_MODEL:
             try:
                 from services.stance_model_manager import load_model as load_stance
                 load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
+                logger.info("✓ Stance model loaded")
             except Exception as e:
+                logger.error(f"✗ Failed loading stance model: {e}")
+        # Load KPA model
         if LOAD_KPA_MODEL:
             try:
                 from services.label_model_manager import load_model as load_kpa
                 load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
+                logger.info("✓ KPA model loaded")
             except Exception as e:
+                logger.error(f"✗ Failed loading KPA model: {e}")
+    logger.info("✓ Startup complete. API ready.")
+    yield
+    logger.info("🛑 Shutting down...")
+# ------------- FASTAPI APP -------------
 app = FastAPI(
     title=API_TITLE,
     description=API_DESCRIPTION,
     version=API_VERSION,
+    lifespan=lifespan
 )
+# ------------- CORS -------------
 app.add_middleware(
     CORSMiddleware,
     allow_origins=CORS_ORIGINS,
     allow_headers=CORS_HEADERS,
 )
+# ============ ROUTES ============
+# STT route (Groq Whisper)
 try:
+    from routes.stt_routes import router as stt_router
+    app.include_router(stt_router, prefix="/api/v1/stt", tags=["Speech To Text"])
+    logger.info("✓ STT route loaded (Groq Whisper)")
 except Exception as e:
+    logger.warning(f"⚠ Failed loading STT route: {e}")
+# TTS route (Groq PlayAI TTS)
 try:
+    from routes.tts_routes import router as tts_router
+    app.include_router(tts_router, prefix="/api/v1/tts", tags=["Text To Speech"])
+    logger.info("✓ TTS route loaded (Groq PlayAI TTS)")
 except Exception as e:
+    logger.warning(f"⚠ Failed loading TTS route: {e}")
+# Main NLP system routes
 try:
     from routes import api_router
     app.include_router(api_router)
+    logger.info("✓ Main API routes loaded")
 except Exception as e:
+    logger.warning(f"⚠ Failed loading main API routes: {e}")
+# ------------------ BASIC ROUTES ------------------
+@app.get("/health")
+async def health():
+    return {"status": "healthy", "service": "NLP Debater + Groq Voice"}
 @app.get("/")
 async def root():
     return {
+        "message": "NLP Debater API with Groq Voice Support",
         "docs": "/docs",
+        "voice_stt": "/api/v1/stt",
+        "voice_tts": "/api/v1/tts"
     }
 if __name__ == "__main__":
+    uvicorn.run("main:app", host=HOST, port=PORT, reload=RELOAD)

models/audio.py DELETED Viewed

@@ -1,30 +0,0 @@
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from enum import Enum
-from datetime import datetime
-class MessageType(str, Enum):
-    TEXT = "text"
-    AUDIO = "audio"
-class UserMessage(BaseModel):
-    message_id: str = Field(..., description="Unique message ID")
-    content: str = Field(..., description="Text content or audio base64")
-    message_type: MessageType = Field(..., description="Message type")
-    session_id: str = Field(..., description="User session ID")
-    timestamp: datetime = Field(default_factory=datetime.now)
-class ChatbotResponse(BaseModel):
-    response_id: str = Field(..., description="Unique response ID")
-    text_response: str = Field(..., description="Chatbot text response")
-    audio_response: Optional[str] = Field(None, description="Audio response in base64")
-    audio_url: Optional[str] = Field(None, description="Generated audio URL")
-    session_id: str = Field(..., description="User session ID")
-    timestamp: datetime = Field(default_factory=datetime.now)
-class ChatSession(BaseModel):
-    session_id: str = Field(..., description="Session ID")
-    user_id: Optional[str] = Field(None, description="User ID")
-    created_at: datetime = Field(default_factory=datetime.now)
-    last_activity: datetime = Field(default_factory=datetime.now)
-    conversation_history: List[Dict[str, Any]] = Field(default_factory=list)

models/stt.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from pydantic import BaseModel
+class STTResponse(BaseModel):
+    text: str

models/tts.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class TTSRequest(BaseModel):
+    text: str
+    voice: str = "Aaliyah-PlayAI"
+    format: str = "wav"

routes/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from fastapi import APIRouter
 from . import root, health, stance, label
-from routes.audio import router as audio_router
 # Create main router
 api_router = APIRouter()

 from fastapi import APIRouter
 from . import root, health, stance, label
+from routes.tts_routes import router as audio_router
 # Create main router
 api_router = APIRouter()

routes/audio.py DELETED Viewed

@@ -1,86 +0,0 @@
-from fastapi import APIRouter, HTTPException, UploadFile, File, Form
-from fastapi.responses import JSONResponse
-import uuid
-import base64
-from models.audio import UserMessage, ChatbotResponse, MessageType
-from services.chatbot_service import ChatbotService
-router = APIRouter()
-chatbot_service = ChatbotService()
-@router.post("/chat/message", response_model=ChatbotResponse)
-async def send_chat_message(
-    session_id: str = Form(...),
-    message_type: str = Form(...),
-    message: str = Form(None),
-    audio_file: UploadFile = File(None)
-):
-    try:
-        # Validate input
-        if not message and not audio_file:
-            raise HTTPException(status_code=400, detail="Either message or audio file must be provided")
-        if message_type == "audio" and not audio_file:
-            raise HTTPException(status_code=400, detail="Audio file required for audio messages")
-        # Process audio file if provided
-        content = ""
-        if audio_file:
-            audio_data = await audio_file.read()
-            content = base64.b64encode(audio_data).decode('utf-8')
-        else:
-            content = message
-        # Create user message
-        user_message = UserMessage(
-            message_id=str(uuid.uuid4()),
-            content=content,
-            message_type=MessageType(message_type),
-            session_id=session_id
-        )
-        # Process through chatbot service
-        response = await chatbot_service.process_user_message(user_message)
-        return response
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing message: {str(e)}")
-@router.post("/chat/audio")
-async def send_audio_message(
-    session_id: str = Form(...),
-    audio_file: UploadFile = File(...)
-):
-    """Endpoint specifically for audio messages"""
-    try:
-        audio_data = await audio_file.read()
-        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
-        user_message = UserMessage(
-            message_id=str(uuid.uuid4()),
-            content=audio_base64,
-            message_type=MessageType.AUDIO,
-            session_id=session_id
-        )
-        response = await chatbot_service.process_user_message(user_message)
-        return response
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
-@router.get("/session/{session_id}/history")
-async def get_session_history(session_id: str):
-    """Get conversation history for a session"""
-    history = chatbot_service.get_session_history(session_id)
-    if not history:
-        raise HTTPException(status_code=404, detail="Session not found")
-    return history
-@router.post("/session/new")
-async def create_new_session():
-    """Create a new chat session"""
-    session_id = str(uuid.uuid4())
-    chatbot_service._get_or_create_session(session_id)
-    return {"session_id": session_id, "message": "New session created"}

routes/stt_routes.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from fastapi import APIRouter, UploadFile, File
+from services.stt_service import speech_to_text
+from models.stt import STTResponse
+import os
+import uuid
+router = APIRouter(prefix="/stt", tags=["Speech To Text"])
+@router.post("/", response_model=STTResponse)
+async def convert_stt(file: UploadFile = File(...)):
+    temp_name = f"audio/temp/{uuid.uuid4()}_{file.filename}"
+    with open(temp_name, "wb") as f:
+        f.write(await file.read())
+    text = speech_to_text(temp_name)
+    os.remove(temp_name)
+    return STTResponse(text=text)

routes/tts_routes.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from fastapi import APIRouter
+from fastapi.responses import FileResponse
+from models.tts import TTSRequest
+from services.tts_service import text_to_speech
+router = APIRouter(prefix="/tts", tags=["Text To Speech"])
+@router.post("/")
+async def generate_tts(request: TTSRequest):
+    output_path = text_to_speech(
+        text=request.text,
+        voice=request.voice,
+        fmt=request.format
+    )
+    return FileResponse(output_path, filename=output_path.name)

services/chatbot_service.py DELETED Viewed

@@ -1,114 +0,0 @@
-import base64
-import uuid
-from typing import Optional, Dict, Any
-from datetime import datetime
-from models.audio import ChatbotResponse, UserMessage
-class ChatbotService:
-    def __init__(self):
-        self.sessions: Dict[str, Dict[str, Any]] = {}
-        self.initialized = False
-        self._initialize_services()
-    def _initialize_services(self):
-        """Initialize services"""
-        try:
-            from services.tts_service import SimpleTTSService
-            self.tts_service = SimpleTTSService()
-            print("✓ TTS service initialized")
-        except ImportError as e:
-            print(f"⚠️ TTS service not available: {e}")
-            self.tts_service = None
-        try:
-            from services.stt_service import STTService
-            self.stt_service = STTService()
-            print("✓ STT service initialized")
-        except ImportError as e:
-            print(f"⚠️ STT service not available: {e}")
-            self.stt_service = None
-    async def initialize(self):
-        """Initialize the chatbot service"""
-        if self.stt_service:
-            await self.stt_service.initialize()
-        self.initialized = True
-        print("✓ Chatbot Service fully initialized")
-    async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
-        # Update session
-        session = self._get_or_create_session(user_message.session_id)
-        # Process message based on type
-        if user_message.message_type == "audio" and self.stt_service:
-            text_input = await self.stt_service.transcribe_audio_base64(user_message.content)
-        elif user_message.message_type == "audio":
-            text_input = "[Voice message received]"
-        else:
-            text_input = user_message.content
-        # Add to conversation history
-        session["conversation_history"].append({
-            "role": "user",
-            "content": text_input,
-            "timestamp": user_message.timestamp
-        })
-        # Generate chatbot response
-        chatbot_text = await self._generate_chatbot_response(text_input, session)
-        # TTS: Convert response to audio
-        audio_base64 = None
-        if self.tts_service:
-            audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
-        # Create response
-        response = ChatbotResponse(
-            response_id=str(uuid.uuid4()),
-            text_response=chatbot_text,
-            audio_response=audio_base64,
-            session_id=user_message.session_id
-        )
-        # Add response to history
-        session["conversation_history"].append({
-            "role": "assistant",
-            "content": chatbot_text,
-            "audio_response": audio_base64,
-            "timestamp": response.timestamp
-        })
-        return response
-    async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
-        """Chatbot response generation logic"""
-        user_input_lower = user_input.lower()
-        if any(greet in user_input_lower for greet in ["hello", "hi", "hey"]):
-            return "Hello! I'm your voice assistant. How can I help you today?"
-        if "time" in user_input_lower:
-            return f"The current time is {datetime.now().strftime('%H:%M')}"
-        if "help" in user_input_lower:
-            return "I can process both text and voice messages. Try sending me a voice note!"
-        if "name" in user_input_lower:
-            return "I'm your AI voice assistant. I'm here to help with your questions!"
-        if "voice" in user_input_lower or "audio" in user_input_lower:
-            return "Yes! I support voice messages. You can speak to me and I'll respond with audio too!"
-        return f"I understand you said: '{user_input}'. How can I assist you further?"
-    def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
-        if session_id not in self.sessions:
-            self.sessions[session_id] = {
-                "conversation_history": [],
-                "created_at": datetime.now(),
-                "last_activity": datetime.now()
-            }
-        return self.sessions[session_id]
-    def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:
-        return self.sessions.get(session_id)

services/gemini_client.py DELETED Viewed

@@ -1,9 +0,0 @@
-# This file is no longer needed since we're using free models
-import logging
-logger = logging.getLogger(__name__)
-def get_gemini_client():
-    """Gemini client is no longer used"""
-    logger.warning("Gemini client is deprecated - using free models instead")
-    raise Exception("Gemini API is no longer used. Free models are being used instead.")

services/stt_service.py CHANGED Viewed

@@ -1,94 +1,13 @@
-import base64
-import io
-import tempfile
 import os
-import wave
-import audioop
-class STTService:
-    def __init__(self):
-        self.initialized = False
-    async def initialize(self):
-        """Initialize STT service"""
-        # For now, we'll use a simple approach without external dependencies
-        self.initialized = True
-        print("✓ STT Service initialized (basic mode)")
-    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
-        """Transcribe base64 audio to text - SIMPLIFIED VERSION"""
-        try:
-            # Decode audio
-            audio_data = base64.b64decode(audio_base64)
-            # For now, return a placeholder since we don't have STT models configured
-            # In a real implementation, you would use Whisper, Vosk, or other STT models here
-            audio_info = await self._get_audio_info(audio_data)
-            return f"[Audio received: {audio_info}. STT service needs model configuration.]"
-        except Exception as e:
-            print(f"Transcription error: {e}")
-            return "Sorry, I couldn't process the audio message."
-    async def _get_audio_info(self, audio_data: bytes) -> str:
-        """Get basic information about the audio file"""
-        try:
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-                temp_path = temp_file.name
-                temp_file.write(audio_data)
-            try:
-                with wave.open(temp_path, 'rb') as wav_file:
-                    frames = wav_file.getnframes()
-                    rate = wav_file.getframerate()
-                    duration = frames / float(rate)
-                    return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
-            except:
-                return f"Size: {len(audio_data)} bytes"
-        finally:
-            if os.path.exists(temp_path):
-                os.unlink(temp_path)
-# Alternative STT service using Whisper if available
-class WhisperSTTService:
-    def __init__(self):
-        self.model = None
-        self.initialized = False
-    async def initialize(self):
-        """Initialize Whisper STT service"""
-        try:
-            import whisper
-            self.model = whisper.load_model("medium")
-            self.initialized = True
-            print("✓ Whisper STT Service initialized")
-        except ImportError:
-            print("⚠️ Whisper not available. Install with: pip install openai-whisper")
-            self.initialized = False
-        except Exception as e:
-            print(f"⚠️ Whisper initialization failed: {e}")
-            self.initialized = False
-    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
-        """Transcribe using Whisper"""
-        if not self.initialized:
-            return "STT service not available. Please install Whisper."
-        try:
-            audio_data = base64.b64decode(audio_base64)
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-                temp_path = temp_file.name
-                temp_file.write(audio_data)
-            result = self.model.transcribe(temp_path, language=language)
-            transcription = result["text"]
-            os.unlink(temp_path)
-            return transcription
-        except Exception as e:
-            print(f"Whisper transcription error: {e}")
-            return "Sorry, I couldn't transcribe the audio."

 import os
+from groq import Groq
+from config import client
+def speech_to_text(file_path: str):
+    with open(file_path, "rb") as f:
+        transcription = client.audio.transcriptions.create(
+            file=(file_path, f.read()),
+            model="whisper-large-v3-turbo",
+            temperature=0,
+            response_format="verbose_json"
+        )
+    return transcription.text

services/tts_service.py CHANGED Viewed

@@ -1,48 +1,15 @@
-import base64
-import tempfile
-import os
-class STTService:
-    def __init__(self):
-        self.initialized = False
-        self.recognizer = None
-    async def initialize(self):
-        """Initialize STT service"""
-        try:
-            import speech_recognition as sr
-            self.recognizer = sr.Recognizer()
-            self.initialized = True
-            print("✓ STT Service initialized (SpeechRecognition)")
-        except ImportError:
-            print("⚠️ SpeechRecognition not available. STT will return placeholder text.")
-            self.initialized = False
-    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
-        """Transcribe base64 audio to text"""
-        if not self.initialized or not self.recognizer:
-            return "[Audio received - install SpeechRecognition for transcription]"
-        try:
-            import speech_recognition as sr
-            import io
-            # Decode audio
-            audio_data = base64.b64decode(audio_base64)
-            # Use SpeechRecognition
-            audio_file = sr.AudioFile(io.BytesIO(audio_data))
-            with audio_file as source:
-                # Adjust for ambient noise
-                self.recognizer.adjust_for_ambient_noise(source)
-                audio = self.recognizer.record(source)
-            return self.recognizer.recognize_google(audio, language=language)
-        except sr.UnknownValueError:
-            return "Could not understand audio"
-        except sr.RequestError as e:
-            return f"Error with speech recognition service: {e}"
-        except Exception as e:
-            return f"Error processing audio: {str(e)}"

+from pathlib import Path
+from config import client
+def text_to_speech(text: str, voice: str = "Aaliyah-PlayAI", fmt: str = "wav"):
+    speech_path = Path("audio/temp/output." + fmt)
+    response = client.audio.speech.create(
+        model="playai-tts",
+        voice=voice,
+        response_format=fmt,
+        input=text,
+    )
+    response.stream_to_file(speech_path)
+    return speech_path