malek-messaoudii commited on
Commit
2da4544
·
1 Parent(s): 220b6c2

Add groq apis

Browse files
config.py CHANGED
@@ -5,78 +5,73 @@ from pathlib import Path
5
  from dotenv import load_dotenv
6
  import logging
7
 
8
- # Configure logging
9
  logger = logging.getLogger(__name__)
10
 
11
- # Load environment variables from .env file
12
  load_dotenv()
13
 
14
- # Get project root directory
15
  API_DIR = Path(__file__).parent
16
  PROJECT_ROOT = API_DIR.parent
17
 
18
- # ============ HUGGING FACE ============
19
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
20
  HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
21
  HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
22
 
23
- # ============ API CONFIGURATION ============
24
- API_TITLE = "NLP Debater - Voice Chatbot API"
25
- API_DESCRIPTION = "Complete NLP system with stance detection, KPA, and voice chatbot using free models"
26
- API_VERSION = "1.0.0"
27
 
28
- # ============ SERVER CONFIGURATION ============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  HOST = os.getenv("HOST", "0.0.0.0")
30
- PORT = int(os.getenv("PORT", "7860"))
31
  RELOAD = os.getenv("RELOAD", "false").lower() == "true"
32
 
33
- # ============ CORS CONFIGURATION ============
34
  CORS_ORIGINS = ["*"]
35
  CORS_CREDENTIALS = True
36
  CORS_METHODS = ["*"]
37
  CORS_HEADERS = ["*"]
38
 
39
- # ============ FREE VOICE MODELS ============
40
- # Speech-to-Text
41
- STT_MODEL_ID = "openai/whisper-base"
42
- STT_DEVICE = "cpu" # Change to "cuda" if GPU available
43
-
44
- # Text-to-Speech
45
- TTS_ENGINE = "gtts" # Google Text-to-Speech (free)
46
- TTS_LANGUAGE = "en"
47
-
48
- # Chatbot
49
- CHATBOT_MODEL_ID = "microsoft/DialoGPT-medium"
50
- CHATBOT_DEVICE = "cpu" # Change to "cuda" if GPU available
51
-
52
  # ============ AUDIO SETTINGS ============
53
- ALLOWED_AUDIO_TYPES = {
54
- "audio/wav",
55
- "audio/x-wav",
56
- "audio/mpeg",
57
- "audio/mp3",
58
- "audio/mp4",
59
- "audio/m4a"
60
- }
61
-
62
- MAX_TEXT_LENGTH = 500
63
- MIN_TEXT_LENGTH = 1
64
  MAX_AUDIO_SIZE = 10 * 1024 * 1024 # 10MB
65
  AUDIO_SAMPLE_RATE = 16000
66
  AUDIO_DURATION_LIMIT = 120 # seconds
 
 
 
 
 
67
 
68
- # ============ MODEL LOADING ============
69
  PRELOAD_MODELS_ON_STARTUP = True
70
  LOAD_STANCE_MODEL = True
71
  LOAD_KPA_MODEL = True
72
- LOAD_STT_MODEL = True
73
- LOAD_CHATBOT_MODEL = True
74
- LOAD_TTS_MODEL = False # gTTS doesn't need preloading
75
 
76
  logger.info("="*60)
77
  logger.info("✓ Configuration loaded successfully")
78
- logger.info(" Using FREE models for all services")
79
- logger.info(f" - STT: {STT_MODEL_ID}")
80
- logger.info(f" - TTS: {TTS_ENGINE}")
81
- logger.info(f" - Chatbot: {CHATBOT_MODEL_ID}")
82
- logger.info("="*60)
 
 
5
  from dotenv import load_dotenv
6
  import logging
7
 
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ # Load .env variables
11
  load_dotenv()
12
 
13
+ # ============ DIRECTORIES ============
14
  API_DIR = Path(__file__).parent
15
  PROJECT_ROOT = API_DIR.parent
16
 
17
+ # ============ HUGGING FACE MODELS ============
18
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
19
  HUGGINGFACE_STANCE_MODEL_ID = os.getenv("HUGGINGFACE_STANCE_MODEL_ID")
20
  HUGGINGFACE_LABEL_MODEL_ID = os.getenv("HUGGINGFACE_LABEL_MODEL_ID")
21
 
22
+ # ============ GROQ MODELS ============
23
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
 
 
24
 
25
+ # **Speech-to-Text**
26
+ GROQ_STT_MODEL = "whisper-large-v3-turbo"
27
+
28
+ # **Text-to-Speech**
29
+ GROQ_TTS_MODEL = "playai-tts"
30
+ GROQ_TTS_VOICE = "Aaliyah-PlayAI"
31
+ GROQ_TTS_FORMAT = "wav"
32
+
33
+ # **Chat Model**
34
+ GROQ_CHAT_MODEL = "llama3-70b-8192"
35
+
36
+ # ============ API META ============
37
+ API_TITLE = "NLP Debater - Voice Chatbot"
38
+ API_DESCRIPTION = "NLP stance detection, KPA, and Groq STT/TTS chatbot"
39
+ API_VERSION = "2.0.0"
40
+
41
+ # ============ SERVER ============
42
  HOST = os.getenv("HOST", "0.0.0.0")
43
+ PORT = int(os.getenv("PORT", 7860))
44
  RELOAD = os.getenv("RELOAD", "false").lower() == "true"
45
 
46
+ # ============ CORS ============
47
  CORS_ORIGINS = ["*"]
48
  CORS_CREDENTIALS = True
49
  CORS_METHODS = ["*"]
50
  CORS_HEADERS = ["*"]
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ============ AUDIO SETTINGS ============
 
 
 
 
 
 
 
 
 
 
 
53
  MAX_AUDIO_SIZE = 10 * 1024 * 1024 # 10MB
54
  AUDIO_SAMPLE_RATE = 16000
55
  AUDIO_DURATION_LIMIT = 120 # seconds
56
+ ALLOWED_AUDIO_TYPES = {
57
+ "audio/wav", "audio/x-wav",
58
+ "audio/mpeg", "audio/mp3",
59
+ "audio/mp4", "audio/m4a"
60
+ }
61
 
62
+ # ============ MODEL PRELOADING ============
63
  PRELOAD_MODELS_ON_STARTUP = True
64
  LOAD_STANCE_MODEL = True
65
  LOAD_KPA_MODEL = True
66
+ LOAD_STT_MODEL = False # Groq STT = no preload
67
+ LOAD_CHATBOT_MODEL = False # Groq Chat = no preload
68
+ LOAD_TTS_MODEL = False # Groq TTS = no preload
69
 
70
  logger.info("="*60)
71
  logger.info("✓ Configuration loaded successfully")
72
+ logger.info(f" HF Stance Model : {HUGGINGFACE_STANCE_MODEL_ID}")
73
+ logger.info(f" HF Label Model : {HUGGINGFACE_LABEL_MODEL_ID}")
74
+ logger.info(f" GROQ STT Model : {GROQ_STT_MODEL}")
75
+ logger.info(f" GROQ TTS Model : {GROQ_TTS_MODEL}")
76
+ logger.info(f" GROQ Chat Model : {GROQ_CHAT_MODEL}")
77
+ logger.info("="*60)
main.py CHANGED
@@ -1,52 +1,17 @@
1
  import sys
2
  from pathlib import Path
3
- import os
4
- import subprocess
5
  import logging
6
  from contextlib import asynccontextmanager
7
 
8
- # Configure logging first
9
  logging.basicConfig(
10
  level=logging.INFO,
11
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
12
  )
13
  logger = logging.getLogger(__name__)
14
 
15
- # Add the app directory to Python path
16
  app_dir = Path(__file__).parent
17
- if str(app_dir) not in sys.path:
18
- sys.path.insert(0, str(app_dir))
19
-
20
- def install_ffmpeg():
21
- """Install ffmpeg on system (required for audio processing)"""
22
- try:
23
- result = subprocess.run(["which", "ffmpeg"], capture_output=True, text=True)
24
- if result.returncode == 0:
25
- version_result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
26
- if version_result.returncode == 0:
27
- version = version_result.stdout.split()[2]
28
- logger.info(f"✓ ffmpeg already installed: {version}")
29
- return True
30
-
31
- logger.info("Installing ffmpeg...")
32
- subprocess.run(["apt-get", "update"], check=True, capture_output=True)
33
- subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=True, capture_output=True)
34
-
35
- verify = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
36
- if verify.returncode == 0:
37
- version = verify.stdout.split()[2]
38
- logger.info(f"✓ ffmpeg installed successfully: {version}")
39
- return True
40
- return False
41
- except Exception as e:
42
- logger.warning(f"⚠️ ffmpeg installation warning: {e}")
43
- return False
44
-
45
- # Install system dependencies first
46
- logger.info("="*60)
47
- logger.info("Checking system dependencies...")
48
- logger.info("="*60)
49
- install_ffmpeg()
50
 
51
  from fastapi import FastAPI
52
  from fastapi.middleware.cors import CORSMiddleware
@@ -56,82 +21,49 @@ from config import (
56
  API_TITLE, API_DESCRIPTION, API_VERSION,
57
  HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
58
  HOST, PORT, RELOAD,
59
- CORS_ORIGINS, CORS_CREDENTIALS, CORS_METHODS, CORS_HEADERS,
60
- PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL,
61
- LOAD_STT_MODEL, LOAD_CHATBOT_MODEL, STT_MODEL_ID, CHATBOT_MODEL_ID
62
  )
63
 
64
  @asynccontextmanager
65
  async def lifespan(app: FastAPI):
66
- """Load models on startup and cleanup on shutdown"""
67
  logger.info("="*60)
68
- logger.info("🚀 STARTING API - Loading Models...")
69
  logger.info("="*60)
70
-
71
  if PRELOAD_MODELS_ON_STARTUP:
72
- # Load Stance Detection Model
 
73
  if LOAD_STANCE_MODEL:
74
  try:
75
- logger.info(f"Loading Stance Model: {HUGGINGFACE_STANCE_MODEL_ID}")
76
  from services.stance_model_manager import load_model as load_stance
77
  load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
78
- logger.info("✓ Stance model loaded successfully")
79
  except Exception as e:
80
- logger.error(f"✗ Stance model loading failed: {str(e)}")
81
-
82
- # Load KPA/Label Model
83
  if LOAD_KPA_MODEL:
84
  try:
85
- logger.info(f"Loading KPA Model: {HUGGINGFACE_LABEL_MODEL_ID}")
86
  from services.label_model_manager import load_model as load_kpa
87
  load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
88
- logger.info("✓ KPA model loaded successfully")
89
- except Exception as e:
90
- logger.error(f"✗ KPA model loading failed: {str(e)}")
91
-
92
- # Load STT Model (Speech-to-Text)
93
- if LOAD_STT_MODEL:
94
- try:
95
- logger.info(f"Loading STT Model: {STT_MODEL_ID}")
96
- from services.stt_service import STTService
97
- stt_service = STTService()
98
- await stt_service.initialize()
99
- logger.info("✓ STT model loaded successfully")
100
  except Exception as e:
101
- logger.error(f"✗ STT model loading failed: {str(e)}")
102
-
103
- # Load Chatbot Model
104
- if LOAD_CHATBOT_MODEL:
105
- try:
106
- logger.info(f"Loading Chatbot Model: {CHATBOT_MODEL_ID}")
107
- from services.chatbot_service import ChatbotService
108
- chatbot_service = ChatbotService()
109
- await chatbot_service.initialize()
110
- logger.info("✓ Chatbot model loaded successfully")
111
- except Exception as e:
112
- logger.error(f"✗ Chatbot model loading failed: {str(e)}")
113
-
114
- logger.info("="*60)
115
- logger.info("✓ API startup complete - Ready to serve requests")
116
- logger.info(f"📚 API Docs: http://{HOST}:{PORT}/docs")
117
- logger.info("="*60)
118
-
119
- yield # Application runs here
120
-
121
- # Shutdown
122
- logger.info("Shutting down API...")
123
 
124
- # Create FastAPI application
 
 
 
 
125
  app = FastAPI(
126
  title=API_TITLE,
127
  description=API_DESCRIPTION,
128
  version=API_VERSION,
129
- docs_url="/docs",
130
- redoc_url="/redoc",
131
- lifespan=lifespan,
132
  )
133
 
134
- # Add CORS middleware
135
  app.add_middleware(
136
  CORSMiddleware,
137
  allow_origins=CORS_ORIGINS,
@@ -140,98 +72,46 @@ app.add_middleware(
140
  allow_headers=CORS_HEADERS,
141
  )
142
 
143
- # Include routers
 
 
144
  try:
145
- from routes.audio import router as chatbot_router
146
- app.include_router(chatbot_router, prefix="/api/v1", tags=["Voice Chatbot"])
147
- logger.info("✓ Chatbot routes registered")
148
  except Exception as e:
149
- logger.warning(f"⚠️ Chatbot routes failed to load: {e}")
150
 
 
151
  try:
152
- from routes.audio import router as audio_router
153
- app.include_router(audio_router, prefix="/audio", tags=["Audio Processing"])
154
- logger.info("✓ Audio routes registered")
155
  except Exception as e:
156
- logger.warning(f"⚠️ Audio routes failed to load: {e}")
157
 
 
158
  try:
159
  from routes import api_router
160
  app.include_router(api_router)
161
- logger.info("✓ API routes registered")
162
  except Exception as e:
163
- logger.warning(f"⚠️ API routes failed to load: {e}")
 
 
 
 
 
 
164
 
165
- # Health check endpoints
166
  @app.get("/")
167
  async def root():
168
- """Root endpoint"""
169
  return {
170
- "message": "NLP Debater API with Voice Chatbot",
171
- "status": "healthy",
172
- "version": API_VERSION,
173
  "docs": "/docs",
174
- "endpoints": {
175
- "voice_chatbot": "/api/v1/chat/message",
176
- "audio_processing": "/docs#/Audio%20Processing",
177
- "health": "/health",
178
- "models-status": "/models-status"
179
- }
180
- }
181
-
182
- @app.get("/health")
183
- async def health_check():
184
- """Simple health check"""
185
- return {"status": "healthy", "message": "API is running"}
186
-
187
- @app.get("/models-status")
188
- async def models_status():
189
- """Check which models are loaded"""
190
- status = {
191
- "stt_model": "unknown",
192
- "tts_engine": "gtts (free)",
193
- "chatbot_model": "unknown",
194
- "stance_model": "unknown",
195
- "kpa_model": "unknown"
196
  }
197
-
198
- try:
199
- from services.stt_service import STTService
200
- stt_service = STTService()
201
- status["stt_model"] = "loaded" if hasattr(stt_service, 'initialized') and stt_service.initialized else "not loaded"
202
- except:
203
- status["stt_model"] = "error"
204
-
205
- try:
206
- from services.chatbot_service import ChatbotService
207
- chatbot_service = ChatbotService()
208
- status["chatbot_model"] = "loaded" if hasattr(chatbot_service, 'initialized') and chatbot_service.initialized else "not loaded"
209
- except:
210
- status["chatbot_model"] = "error"
211
-
212
- return status
213
-
214
- @app.get("/check-ffmpeg")
215
- async def check_ffmpeg():
216
- """Check if ffmpeg is installed"""
217
- try:
218
- result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
219
- if result.returncode == 0:
220
- version = result.stdout.split('\n')[0]
221
- return {"status": "available", "version": version}
222
- else:
223
- return {"status": "error", "error": result.stderr}
224
- except FileNotFoundError:
225
- return {"status": "not found", "error": "ffmpeg is not installed"}
226
 
227
  if __name__ == "__main__":
228
- logger.info(f"🚀 Starting server on {HOST}:{PORT}")
229
- logger.info(f"📚 Documentation: http://{HOST}:{PORT}/docs")
230
-
231
- uvicorn.run(
232
- "main:app",
233
- host=HOST,
234
- port=PORT,
235
- reload=RELOAD,
236
- log_level="info"
237
- )
 
1
  import sys
2
  from pathlib import Path
 
 
3
  import logging
4
  from contextlib import asynccontextmanager
5
 
 
6
  logging.basicConfig(
7
  level=logging.INFO,
8
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
9
  )
10
  logger = logging.getLogger(__name__)
11
 
12
+ # Add app directory
13
  app_dir = Path(__file__).parent
14
+ sys.path.insert(0, str(app_dir))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  from fastapi import FastAPI
17
  from fastapi.middleware.cors import CORSMiddleware
 
21
  API_TITLE, API_DESCRIPTION, API_VERSION,
22
  HUGGINGFACE_API_KEY, HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_LABEL_MODEL_ID,
23
  HOST, PORT, RELOAD,
24
+ CORS_ORIGINS, CORS_METHODS, CORS_HEADERS, CORS_CREDENTIALS,
25
+ PRELOAD_MODELS_ON_STARTUP, LOAD_STANCE_MODEL, LOAD_KPA_MODEL
 
26
  )
27
 
28
  @asynccontextmanager
29
  async def lifespan(app: FastAPI):
 
30
  logger.info("="*60)
31
+ logger.info("🚀 API STARTUP - Loading HuggingFace models...")
32
  logger.info("="*60)
33
+
34
  if PRELOAD_MODELS_ON_STARTUP:
35
+
36
+ # Load stance model
37
  if LOAD_STANCE_MODEL:
38
  try:
 
39
  from services.stance_model_manager import load_model as load_stance
40
  load_stance(HUGGINGFACE_STANCE_MODEL_ID, HUGGINGFACE_API_KEY)
41
+ logger.info("✓ Stance model loaded")
42
  except Exception as e:
43
+ logger.error(f"✗ Failed loading stance model: {e}")
44
+
45
+ # Load KPA model
46
  if LOAD_KPA_MODEL:
47
  try:
 
48
  from services.label_model_manager import load_model as load_kpa
49
  load_kpa(HUGGINGFACE_LABEL_MODEL_ID, HUGGINGFACE_API_KEY)
50
+ logger.info("✓ KPA model loaded")
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
+ logger.error(f"✗ Failed loading KPA model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ logger.info("✓ Startup complete. API ready.")
55
+ yield
56
+ logger.info("🛑 Shutting down...")
57
+
58
+ # ------------- FASTAPI APP -------------
59
  app = FastAPI(
60
  title=API_TITLE,
61
  description=API_DESCRIPTION,
62
  version=API_VERSION,
63
+ lifespan=lifespan
 
 
64
  )
65
 
66
+ # ------------- CORS -------------
67
  app.add_middleware(
68
  CORSMiddleware,
69
  allow_origins=CORS_ORIGINS,
 
72
  allow_headers=CORS_HEADERS,
73
  )
74
 
75
+ # ============ ROUTES ============
76
+
77
+ # STT route (Groq Whisper)
78
  try:
79
+ from routes.stt_routes import router as stt_router
80
+ app.include_router(stt_router, prefix="/api/v1/stt", tags=["Speech To Text"])
81
+ logger.info("✓ STT route loaded (Groq Whisper)")
82
  except Exception as e:
83
+ logger.warning(f" Failed loading STT route: {e}")
84
 
85
+ # TTS route (Groq PlayAI TTS)
86
  try:
87
+ from routes.tts_routes import router as tts_router
88
+ app.include_router(tts_router, prefix="/api/v1/tts", tags=["Text To Speech"])
89
+ logger.info("✓ TTS route loaded (Groq PlayAI TTS)")
90
  except Exception as e:
91
+ logger.warning(f" Failed loading TTS route: {e}")
92
 
93
+ # Main NLP system routes
94
  try:
95
  from routes import api_router
96
  app.include_router(api_router)
97
+ logger.info("✓ Main API routes loaded")
98
  except Exception as e:
99
+ logger.warning(f" Failed loading main API routes: {e}")
100
+
101
+ # ------------------ BASIC ROUTES ------------------
102
+
103
+ @app.get("/health")
104
+ async def health():
105
+ return {"status": "healthy", "service": "NLP Debater + Groq Voice"}
106
 
 
107
  @app.get("/")
108
  async def root():
 
109
  return {
110
+ "message": "NLP Debater API with Groq Voice Support",
 
 
111
  "docs": "/docs",
112
+ "voice_stt": "/api/v1/stt",
113
+ "voice_tts": "/api/v1/tts"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  if __name__ == "__main__":
117
+ uvicorn.run("main:app", host=HOST, port=PORT, reload=RELOAD)
 
 
 
 
 
 
 
 
 
models/audio.py DELETED
@@ -1,30 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import Optional, List, Dict, Any
3
- from enum import Enum
4
- from datetime import datetime
5
-
6
- class MessageType(str, Enum):
7
- TEXT = "text"
8
- AUDIO = "audio"
9
-
10
- class UserMessage(BaseModel):
11
- message_id: str = Field(..., description="Unique message ID")
12
- content: str = Field(..., description="Text content or audio base64")
13
- message_type: MessageType = Field(..., description="Message type")
14
- session_id: str = Field(..., description="User session ID")
15
- timestamp: datetime = Field(default_factory=datetime.now)
16
-
17
- class ChatbotResponse(BaseModel):
18
- response_id: str = Field(..., description="Unique response ID")
19
- text_response: str = Field(..., description="Chatbot text response")
20
- audio_response: Optional[str] = Field(None, description="Audio response in base64")
21
- audio_url: Optional[str] = Field(None, description="Generated audio URL")
22
- session_id: str = Field(..., description="User session ID")
23
- timestamp: datetime = Field(default_factory=datetime.now)
24
-
25
- class ChatSession(BaseModel):
26
- session_id: str = Field(..., description="Session ID")
27
- user_id: Optional[str] = Field(None, description="User ID")
28
- created_at: datetime = Field(default_factory=datetime.now)
29
- last_activity: datetime = Field(default_factory=datetime.now)
30
- conversation_history: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/stt.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class STTResponse(BaseModel):
4
+ text: str
models/tts.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class TTSRequest(BaseModel):
4
+ text: str
5
+ voice: str = "Aaliyah-PlayAI"
6
+ format: str = "wav"
routes/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  from fastapi import APIRouter
4
  from . import root, health, stance, label
5
- from routes.audio import router as audio_router
6
  # Create main router
7
  api_router = APIRouter()
8
 
 
2
 
3
  from fastapi import APIRouter
4
  from . import root, health, stance, label
5
+ from routes.tts_routes import router as audio_router
6
  # Create main router
7
  api_router = APIRouter()
8
 
routes/audio.py DELETED
@@ -1,86 +0,0 @@
1
- from fastapi import APIRouter, HTTPException, UploadFile, File, Form
2
- from fastapi.responses import JSONResponse
3
- import uuid
4
- import base64
5
- from models.audio import UserMessage, ChatbotResponse, MessageType
6
- from services.chatbot_service import ChatbotService
7
-
8
- router = APIRouter()
9
- chatbot_service = ChatbotService()
10
-
11
- @router.post("/chat/message", response_model=ChatbotResponse)
12
- async def send_chat_message(
13
- session_id: str = Form(...),
14
- message_type: str = Form(...),
15
- message: str = Form(None),
16
- audio_file: UploadFile = File(None)
17
- ):
18
- try:
19
- # Validate input
20
- if not message and not audio_file:
21
- raise HTTPException(status_code=400, detail="Either message or audio file must be provided")
22
-
23
- if message_type == "audio" and not audio_file:
24
- raise HTTPException(status_code=400, detail="Audio file required for audio messages")
25
-
26
- # Process audio file if provided
27
- content = ""
28
- if audio_file:
29
- audio_data = await audio_file.read()
30
- content = base64.b64encode(audio_data).decode('utf-8')
31
- else:
32
- content = message
33
-
34
- # Create user message
35
- user_message = UserMessage(
36
- message_id=str(uuid.uuid4()),
37
- content=content,
38
- message_type=MessageType(message_type),
39
- session_id=session_id
40
- )
41
-
42
- # Process through chatbot service
43
- response = await chatbot_service.process_user_message(user_message)
44
-
45
- return response
46
-
47
- except Exception as e:
48
- raise HTTPException(status_code=500, detail=f"Error processing message: {str(e)}")
49
-
50
- @router.post("/chat/audio")
51
- async def send_audio_message(
52
- session_id: str = Form(...),
53
- audio_file: UploadFile = File(...)
54
- ):
55
- """Endpoint specifically for audio messages"""
56
- try:
57
- audio_data = await audio_file.read()
58
- audio_base64 = base64.b64encode(audio_data).decode('utf-8')
59
-
60
- user_message = UserMessage(
61
- message_id=str(uuid.uuid4()),
62
- content=audio_base64,
63
- message_type=MessageType.AUDIO,
64
- session_id=session_id
65
- )
66
-
67
- response = await chatbot_service.process_user_message(user_message)
68
- return response
69
-
70
- except Exception as e:
71
- raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
72
-
73
- @router.get("/session/{session_id}/history")
74
- async def get_session_history(session_id: str):
75
- """Get conversation history for a session"""
76
- history = chatbot_service.get_session_history(session_id)
77
- if not history:
78
- raise HTTPException(status_code=404, detail="Session not found")
79
- return history
80
-
81
- @router.post("/session/new")
82
- async def create_new_session():
83
- """Create a new chat session"""
84
- session_id = str(uuid.uuid4())
85
- chatbot_service._get_or_create_session(session_id)
86
- return {"session_id": session_id, "message": "New session created"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
routes/stt_routes.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File
2
+ from services.stt_service import speech_to_text
3
+ from models.stt import STTResponse
4
+ import os
5
+ import uuid
6
+
7
+ router = APIRouter(prefix="/stt", tags=["Speech To Text"])
8
+
9
+ @router.post("/", response_model=STTResponse)
10
+ async def convert_stt(file: UploadFile = File(...)):
11
+ temp_name = f"audio/temp/{uuid.uuid4()}_{file.filename}"
12
+
13
+ with open(temp_name, "wb") as f:
14
+ f.write(await file.read())
15
+
16
+ text = speech_to_text(temp_name)
17
+
18
+ os.remove(temp_name)
19
+
20
+ return STTResponse(text=text)
routes/tts_routes.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from fastapi.responses import FileResponse
3
+ from models.tts import TTSRequest
4
+ from services.tts_service import text_to_speech
5
+
6
+ router = APIRouter(prefix="/tts", tags=["Text To Speech"])
7
+
8
+ @router.post("/")
9
+ async def generate_tts(request: TTSRequest):
10
+ output_path = text_to_speech(
11
+ text=request.text,
12
+ voice=request.voice,
13
+ fmt=request.format
14
+ )
15
+ return FileResponse(output_path, filename=output_path.name)
services/chatbot_service.py DELETED
@@ -1,114 +0,0 @@
1
- import base64
2
- import uuid
3
- from typing import Optional, Dict, Any
4
- from datetime import datetime
5
- from models.audio import ChatbotResponse, UserMessage
6
-
7
- class ChatbotService:
8
- def __init__(self):
9
- self.sessions: Dict[str, Dict[str, Any]] = {}
10
- self.initialized = False
11
- self._initialize_services()
12
-
13
- def _initialize_services(self):
14
- """Initialize services"""
15
- try:
16
- from services.tts_service import SimpleTTSService
17
- self.tts_service = SimpleTTSService()
18
- print("✓ TTS service initialized")
19
- except ImportError as e:
20
- print(f"⚠️ TTS service not available: {e}")
21
- self.tts_service = None
22
-
23
- try:
24
- from services.stt_service import STTService
25
- self.stt_service = STTService()
26
- print("✓ STT service initialized")
27
- except ImportError as e:
28
- print(f"⚠️ STT service not available: {e}")
29
- self.stt_service = None
30
-
31
- async def initialize(self):
32
- """Initialize the chatbot service"""
33
- if self.stt_service:
34
- await self.stt_service.initialize()
35
- self.initialized = True
36
- print("✓ Chatbot Service fully initialized")
37
-
38
- async def process_user_message(self, user_message: UserMessage) -> ChatbotResponse:
39
- # Update session
40
- session = self._get_or_create_session(user_message.session_id)
41
-
42
- # Process message based on type
43
- if user_message.message_type == "audio" and self.stt_service:
44
- text_input = await self.stt_service.transcribe_audio_base64(user_message.content)
45
- elif user_message.message_type == "audio":
46
- text_input = "[Voice message received]"
47
- else:
48
- text_input = user_message.content
49
-
50
- # Add to conversation history
51
- session["conversation_history"].append({
52
- "role": "user",
53
- "content": text_input,
54
- "timestamp": user_message.timestamp
55
- })
56
-
57
- # Generate chatbot response
58
- chatbot_text = await self._generate_chatbot_response(text_input, session)
59
-
60
- # TTS: Convert response to audio
61
- audio_base64 = None
62
- if self.tts_service:
63
- audio_base64 = await self.tts_service.text_to_speech_base64(chatbot_text)
64
-
65
- # Create response
66
- response = ChatbotResponse(
67
- response_id=str(uuid.uuid4()),
68
- text_response=chatbot_text,
69
- audio_response=audio_base64,
70
- session_id=user_message.session_id
71
- )
72
-
73
- # Add response to history
74
- session["conversation_history"].append({
75
- "role": "assistant",
76
- "content": chatbot_text,
77
- "audio_response": audio_base64,
78
- "timestamp": response.timestamp
79
- })
80
-
81
- return response
82
-
83
- async def _generate_chatbot_response(self, user_input: str, session: Dict[str, Any]) -> str:
84
- """Chatbot response generation logic"""
85
- user_input_lower = user_input.lower()
86
-
87
- if any(greet in user_input_lower for greet in ["hello", "hi", "hey"]):
88
- return "Hello! I'm your voice assistant. How can I help you today?"
89
-
90
- if "time" in user_input_lower:
91
- return f"The current time is {datetime.now().strftime('%H:%M')}"
92
-
93
- if "help" in user_input_lower:
94
- return "I can process both text and voice messages. Try sending me a voice note!"
95
-
96
- if "name" in user_input_lower:
97
- return "I'm your AI voice assistant. I'm here to help with your questions!"
98
-
99
- if "voice" in user_input_lower or "audio" in user_input_lower:
100
- return "Yes! I support voice messages. You can speak to me and I'll respond with audio too!"
101
-
102
- return f"I understand you said: '{user_input}'. How can I assist you further?"
103
-
104
- def _get_or_create_session(self, session_id: str) -> Dict[str, Any]:
105
- if session_id not in self.sessions:
106
- self.sessions[session_id] = {
107
- "conversation_history": [],
108
- "created_at": datetime.now(),
109
- "last_activity": datetime.now()
110
- }
111
- return self.sessions[session_id]
112
-
113
- def get_session_history(self, session_id: str) -> Optional[Dict[str, Any]]:
114
- return self.sessions.get(session_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/gemini_client.py DELETED
@@ -1,9 +0,0 @@
1
- # This file is no longer needed since we're using free models
2
- import logging
3
-
4
- logger = logging.getLogger(__name__)
5
-
6
- def get_gemini_client():
7
- """Gemini client is no longer used"""
8
- logger.warning("Gemini client is deprecated - using free models instead")
9
- raise Exception("Gemini API is no longer used. Free models are being used instead.")
 
 
 
 
 
 
 
 
 
 
services/stt_service.py CHANGED
@@ -1,94 +1,13 @@
1
- import base64
2
- import io
3
- import tempfile
4
  import os
5
- import wave
6
- import audioop
7
 
8
- class STTService:
9
- def __init__(self):
10
- self.initialized = False
11
-
12
- async def initialize(self):
13
- """Initialize STT service"""
14
- # For now, we'll use a simple approach without external dependencies
15
- self.initialized = True
16
- print("✓ STT Service initialized (basic mode)")
17
-
18
- async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
19
- """Transcribe base64 audio to text - SIMPLIFIED VERSION"""
20
- try:
21
- # Decode audio
22
- audio_data = base64.b64decode(audio_base64)
23
-
24
- # For now, return a placeholder since we don't have STT models configured
25
- # In a real implementation, you would use Whisper, Vosk, or other STT models here
26
-
27
- audio_info = await self._get_audio_info(audio_data)
28
- return f"[Audio received: {audio_info}. STT service needs model configuration.]"
29
-
30
- except Exception as e:
31
- print(f"Transcription error: {e}")
32
- return "Sorry, I couldn't process the audio message."
33
-
34
- async def _get_audio_info(self, audio_data: bytes) -> str:
35
- """Get basic information about the audio file"""
36
- try:
37
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
38
- temp_path = temp_file.name
39
- temp_file.write(audio_data)
40
-
41
- try:
42
- with wave.open(temp_path, 'rb') as wav_file:
43
- frames = wav_file.getnframes()
44
- rate = wav_file.getframerate()
45
- duration = frames / float(rate)
46
- return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
47
- except:
48
- return f"Size: {len(audio_data)} bytes"
49
-
50
- finally:
51
- if os.path.exists(temp_path):
52
- os.unlink(temp_path)
53
-
54
- # Alternative STT service using Whisper if available
55
- class WhisperSTTService:
56
- def __init__(self):
57
- self.model = None
58
- self.initialized = False
59
-
60
- async def initialize(self):
61
- """Initialize Whisper STT service"""
62
- try:
63
- import whisper
64
- self.model = whisper.load_model("medium")
65
- self.initialized = True
66
- print("✓ Whisper STT Service initialized")
67
- except ImportError:
68
- print("⚠️ Whisper not available. Install with: pip install openai-whisper")
69
- self.initialized = False
70
- except Exception as e:
71
- print(f"⚠️ Whisper initialization failed: {e}")
72
- self.initialized = False
73
-
74
- async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
75
- """Transcribe using Whisper"""
76
- if not self.initialized:
77
- return "STT service not available. Please install Whisper."
78
-
79
- try:
80
- audio_data = base64.b64decode(audio_base64)
81
-
82
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
83
- temp_path = temp_file.name
84
- temp_file.write(audio_data)
85
-
86
- result = self.model.transcribe(temp_path, language=language)
87
- transcription = result["text"]
88
-
89
- os.unlink(temp_path)
90
- return transcription
91
-
92
- except Exception as e:
93
- print(f"Whisper transcription error: {e}")
94
- return "Sorry, I couldn't transcribe the audio."
 
 
 
 
1
  import os
2
+ from groq import Groq
3
+ from config import client
4
 
5
+ def speech_to_text(file_path: str):
6
+ with open(file_path, "rb") as f:
7
+ transcription = client.audio.transcriptions.create(
8
+ file=(file_path, f.read()),
9
+ model="whisper-large-v3-turbo",
10
+ temperature=0,
11
+ response_format="verbose_json"
12
+ )
13
+ return transcription.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/tts_service.py CHANGED
@@ -1,48 +1,15 @@
1
- import base64
2
- import tempfile
3
- import os
4
 
5
- class STTService:
6
- def __init__(self):
7
- self.initialized = False
8
- self.recognizer = None
9
-
10
- async def initialize(self):
11
- """Initialize STT service"""
12
- try:
13
- import speech_recognition as sr
14
- self.recognizer = sr.Recognizer()
15
- self.initialized = True
16
- print("✓ STT Service initialized (SpeechRecognition)")
17
- except ImportError:
18
- print("⚠️ SpeechRecognition not available. STT will return placeholder text.")
19
- self.initialized = False
20
-
21
- async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
22
- """Transcribe base64 audio to text"""
23
- if not self.initialized or not self.recognizer:
24
- return "[Audio received - install SpeechRecognition for transcription]"
25
-
26
- try:
27
- import speech_recognition as sr
28
- import io
29
-
30
- # Decode audio
31
- audio_data = base64.b64decode(audio_base64)
32
-
33
- # Use SpeechRecognition
34
- audio_file = sr.AudioFile(io.BytesIO(audio_data))
35
-
36
- with audio_file as source:
37
- # Adjust for ambient noise
38
- self.recognizer.adjust_for_ambient_noise(source)
39
- audio = self.recognizer.record(source)
40
-
41
- return self.recognizer.recognize_google(audio, language=language)
42
-
43
- except sr.UnknownValueError:
44
- return "Could not understand audio"
45
- except sr.RequestError as e:
46
- return f"Error with speech recognition service: {e}"
47
- except Exception as e:
48
- return f"Error processing audio: {str(e)}"
 
1
+ from pathlib import Path
2
+ from config import client
 
3
 
4
+ def text_to_speech(text: str, voice: str = "Aaliyah-PlayAI", fmt: str = "wav"):
5
+ speech_path = Path("audio/temp/output." + fmt)
6
+
7
+ response = client.audio.speech.create(
8
+ model="playai-tts",
9
+ voice=voice,
10
+ response_format=fmt,
11
+ input=text,
12
+ )
13
+
14
+ response.stream_to_file(speech_path)
15
+ return speech_path