malek-messaoudii
commited on
Commit
Β·
3b2b211
1
Parent(s):
e8aa76b
Update requirements and refactor STT and chatbot services for improved model loading and error handling
Browse files- requirements.txt +2 -1
- services/chatbot_service.py +11 -47
- services/stt_service.py +58 -32
requirements.txt
CHANGED
|
@@ -10,4 +10,5 @@ huggingface_hub>=0.19.0
|
|
| 10 |
python-multipart
|
| 11 |
google-genai>=0.4.0
|
| 12 |
gtts==2.5.1
|
| 13 |
-
requests==2.31.0
|
|
|
|
|
|
| 10 |
python-multipart
|
| 11 |
google-genai>=0.4.0
|
| 12 |
gtts==2.5.1
|
| 13 |
+
requests==2.31.0
|
| 14 |
+
ffmpeg-python==0.2.0
|
services/chatbot_service.py
CHANGED
|
@@ -1,25 +1,21 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
import logging
|
| 3 |
|
| 4 |
logger = logging.getLogger(__name__)
|
| 5 |
|
| 6 |
-
# Global chatbot
|
| 7 |
chatbot_pipeline = None
|
| 8 |
-
chat_history = {}
|
| 9 |
|
| 10 |
def load_chatbot_model():
|
| 11 |
-
"""Load
|
| 12 |
global chatbot_pipeline
|
| 13 |
try:
|
| 14 |
-
logger.info("Loading
|
| 15 |
-
|
| 16 |
-
# Use a more reliable model
|
| 17 |
-
model_name = "microsoft/DialoGPT-small" # More reliable than medium
|
| 18 |
|
| 19 |
chatbot_pipeline = pipeline(
|
| 20 |
"text-generation",
|
| 21 |
-
model=
|
| 22 |
-
tokenizer=model_name,
|
| 23 |
device="cpu"
|
| 24 |
)
|
| 25 |
logger.info("β Chatbot model loaded successfully")
|
|
@@ -44,15 +40,14 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
|
|
| 44 |
# Prepare prompt
|
| 45 |
prompt = f"User: {user_text}\nAssistant:"
|
| 46 |
|
| 47 |
-
# Generate response
|
| 48 |
response = chatbot_pipeline(
|
| 49 |
prompt,
|
| 50 |
-
max_new_tokens=100,
|
| 51 |
do_sample=True,
|
| 52 |
temperature=0.7,
|
| 53 |
top_p=0.9,
|
| 54 |
-
pad_token_id=chatbot_pipeline.tokenizer.eos_token_id
|
| 55 |
-
repetition_penalty=1.1
|
| 56 |
)
|
| 57 |
|
| 58 |
# Extract the response
|
|
@@ -65,8 +60,6 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
|
|
| 65 |
bot_response = generated_text.replace(prompt, "").strip()
|
| 66 |
|
| 67 |
# Clean up the response
|
| 68 |
-
bot_response = clean_response(bot_response)
|
| 69 |
-
|
| 70 |
if not bot_response:
|
| 71 |
bot_response = get_fallback_response(user_text)
|
| 72 |
|
|
@@ -77,41 +70,12 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
|
|
| 77 |
logger.error(f"β Chatbot response failed: {str(e)}")
|
| 78 |
return get_fallback_response(user_text)
|
| 79 |
|
| 80 |
-
|
| 81 |
-
def clean_response(response: str) -> str:
|
| 82 |
-
"""Clean and format the chatbot response"""
|
| 83 |
-
if not response:
|
| 84 |
-
return ""
|
| 85 |
-
|
| 86 |
-
# Remove extra spaces
|
| 87 |
-
response = ' '.join(response.split())
|
| 88 |
-
|
| 89 |
-
# Remove any incomplete sentences at the end
|
| 90 |
-
if len(response) > 1:
|
| 91 |
-
# Ensure it ends with proper punctuation
|
| 92 |
-
if not response.endswith(('.', '!', '?')):
|
| 93 |
-
# Find the last sentence end
|
| 94 |
-
last_period = response.rfind('.')
|
| 95 |
-
last_exclamation = response.rfind('!')
|
| 96 |
-
last_question = response.rfind('?')
|
| 97 |
-
last_end = max(last_period, last_exclamation, last_question)
|
| 98 |
-
|
| 99 |
-
if last_end > 0:
|
| 100 |
-
response = response[:last_end + 1]
|
| 101 |
-
else:
|
| 102 |
-
response = response + '.'
|
| 103 |
-
|
| 104 |
-
return response.strip()
|
| 105 |
-
|
| 106 |
-
|
| 107 |
def get_fallback_response(user_text: str) -> str:
|
| 108 |
-
"""Provide
|
| 109 |
fallback_responses = [
|
| 110 |
f"I understand you said: '{user_text}'. How can I help you with that?",
|
| 111 |
f"That's interesting! Regarding '{user_text}', what would you like to know?",
|
| 112 |
-
f"Thanks for your message about '{user_text}'. How can I assist you further?"
|
| 113 |
-
f"I heard you mention '{user_text}'. Could you tell me more about what you need?",
|
| 114 |
-
f"Regarding '{user_text}', I'd be happy to help. What specific information are you looking for?"
|
| 115 |
]
|
| 116 |
|
| 117 |
import random
|
|
|
|
| 1 |
+
# services/chatbot_service.py (CONFIRMED WORKING VERSION)
|
| 2 |
+
from transformers import pipeline
|
| 3 |
import logging
|
| 4 |
|
| 5 |
logger = logging.getLogger(__name__)
|
| 6 |
|
| 7 |
+
# Global chatbot pipeline
|
| 8 |
chatbot_pipeline = None
|
|
|
|
| 9 |
|
| 10 |
def load_chatbot_model():
|
| 11 |
+
"""Load the free chatbot model"""
|
| 12 |
global chatbot_pipeline
|
| 13 |
try:
|
| 14 |
+
logger.info("Loading DialoGPT chatbot model...")
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
chatbot_pipeline = pipeline(
|
| 17 |
"text-generation",
|
| 18 |
+
model="microsoft/DialoGPT-small",
|
|
|
|
| 19 |
device="cpu"
|
| 20 |
)
|
| 21 |
logger.info("β Chatbot model loaded successfully")
|
|
|
|
| 40 |
# Prepare prompt
|
| 41 |
prompt = f"User: {user_text}\nAssistant:"
|
| 42 |
|
| 43 |
+
# Generate response
|
| 44 |
response = chatbot_pipeline(
|
| 45 |
prompt,
|
| 46 |
+
max_new_tokens=100,
|
| 47 |
do_sample=True,
|
| 48 |
temperature=0.7,
|
| 49 |
top_p=0.9,
|
| 50 |
+
pad_token_id=chatbot_pipeline.tokenizer.eos_token_id
|
|
|
|
| 51 |
)
|
| 52 |
|
| 53 |
# Extract the response
|
|
|
|
| 60 |
bot_response = generated_text.replace(prompt, "").strip()
|
| 61 |
|
| 62 |
# Clean up the response
|
|
|
|
|
|
|
| 63 |
if not bot_response:
|
| 64 |
bot_response = get_fallback_response(user_text)
|
| 65 |
|
|
|
|
| 70 |
logger.error(f"β Chatbot response failed: {str(e)}")
|
| 71 |
return get_fallback_response(user_text)
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
def get_fallback_response(user_text: str) -> str:
|
| 74 |
+
"""Provide fallback responses"""
|
| 75 |
fallback_responses = [
|
| 76 |
f"I understand you said: '{user_text}'. How can I help you with that?",
|
| 77 |
f"That's interesting! Regarding '{user_text}', what would you like to know?",
|
| 78 |
+
f"Thanks for your message about '{user_text}'. How can I assist you further?"
|
|
|
|
|
|
|
| 79 |
]
|
| 80 |
|
| 81 |
import random
|
services/stt_service.py
CHANGED
|
@@ -1,29 +1,68 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import logging
|
| 3 |
import tempfile
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
| 9 |
"""
|
| 10 |
-
Convert audio bytes to text using
|
| 11 |
-
No ffmpeg required!
|
| 12 |
"""
|
|
|
|
|
|
|
| 13 |
try:
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
headers = {}
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
transcribed_text = result.get("text", "").strip()
|
| 28 |
|
| 29 |
if not transcribed_text:
|
|
@@ -32,26 +71,13 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
|
| 32 |
logger.info(f"β STT successful: '{transcribed_text}'")
|
| 33 |
return transcribed_text
|
| 34 |
|
| 35 |
-
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
return await fallback_stt(audio_bytes, filename)
|
| 40 |
|
| 41 |
except Exception as e:
|
| 42 |
logger.error(f"β STT failed: {str(e)}")
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
|
| 47 |
-
"""Fallback STT using a simpler approach"""
|
| 48 |
-
try:
|
| 49 |
-
# Simple fallback that doesn't require ffmpeg
|
| 50 |
-
file_size = len(audio_bytes)
|
| 51 |
-
file_type = filename.split('.')[-1] if '.' in filename else 'unknown'
|
| 52 |
-
|
| 53 |
-
return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."
|
| 54 |
-
|
| 55 |
-
except Exception as e:
|
| 56 |
-
logger.error(f"Fallback STT also failed: {str(e)}")
|
| 57 |
-
return "Audio processing failed. Please try a different audio format or install ffmpeg."
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import pipeline
|
| 3 |
import logging
|
| 4 |
import tempfile
|
| 5 |
import os
|
| 6 |
+
import subprocess
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
# Global STT pipeline
|
| 11 |
+
stt_pipeline = None
|
| 12 |
+
|
| 13 |
+
def load_stt_model():
|
| 14 |
+
"""Load the free Whisper model for speech-to-text"""
|
| 15 |
+
global stt_pipeline
|
| 16 |
+
try:
|
| 17 |
+
# Check if ffmpeg is available
|
| 18 |
+
if not check_ffmpeg():
|
| 19 |
+
logger.warning("ffmpeg not found. STT may not work properly.")
|
| 20 |
+
|
| 21 |
+
logger.info("Loading Whisper-medium STT model...")
|
| 22 |
+
stt_pipeline = pipeline(
|
| 23 |
+
"automatic-speech-recognition",
|
| 24 |
+
model="openai/whisper-medium",
|
| 25 |
+
device="cpu"
|
| 26 |
+
)
|
| 27 |
+
logger.info("β Whisper-medium STT model loaded successfully")
|
| 28 |
+
except Exception as e:
|
| 29 |
+
logger.error(f"β Failed to load Whisper-medium model: {str(e)}")
|
| 30 |
+
stt_pipeline = None
|
| 31 |
+
|
| 32 |
+
def check_ffmpeg():
|
| 33 |
+
"""Check if ffmpeg is available"""
|
| 34 |
+
try:
|
| 35 |
+
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
| 36 |
+
return True
|
| 37 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
| 41 |
"""
|
| 42 |
+
Convert audio bytes to text using free Whisper model.
|
|
|
|
| 43 |
"""
|
| 44 |
+
global stt_pipeline
|
| 45 |
+
|
| 46 |
try:
|
| 47 |
+
if stt_pipeline is None:
|
| 48 |
+
load_stt_model()
|
| 49 |
+
if stt_pipeline is None:
|
| 50 |
+
raise Exception("STT model failed to load")
|
| 51 |
|
| 52 |
+
# Check ffmpeg again before processing
|
| 53 |
+
if not check_ffmpeg():
|
| 54 |
+
return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server."
|
| 55 |
|
| 56 |
+
logger.info(f"Converting audio to text using Whisper-medium")
|
|
|
|
| 57 |
|
| 58 |
+
# Save audio bytes to temporary file
|
| 59 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
|
| 60 |
+
temp_audio.write(audio_bytes)
|
| 61 |
+
temp_audio_path = temp_audio.name
|
| 62 |
|
| 63 |
+
try:
|
| 64 |
+
# Transcribe using Whisper
|
| 65 |
+
result = stt_pipeline(temp_audio_path)
|
| 66 |
transcribed_text = result.get("text", "").strip()
|
| 67 |
|
| 68 |
if not transcribed_text:
|
|
|
|
| 71 |
logger.info(f"β STT successful: '{transcribed_text}'")
|
| 72 |
return transcribed_text
|
| 73 |
|
| 74 |
+
finally:
|
| 75 |
+
# Clean up temporary file
|
| 76 |
+
if os.path.exists(temp_audio_path):
|
| 77 |
+
os.unlink(temp_audio_path)
|
|
|
|
| 78 |
|
| 79 |
except Exception as e:
|
| 80 |
logger.error(f"β STT failed: {str(e)}")
|
| 81 |
+
if "ffmpeg" in str(e).lower():
|
| 82 |
+
return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server."
|
| 83 |
+
raise Exception(f"Speech-to-text conversion failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|