malek-messaoudii commited on
Commit
3b2b211
Β·
1 Parent(s): e8aa76b

Update requirements and refactor STT and chatbot services for improved model loading and error handling

Browse files
requirements.txt CHANGED
@@ -10,4 +10,5 @@ huggingface_hub>=0.19.0
10
  python-multipart
11
  google-genai>=0.4.0
12
  gtts==2.5.1
13
- requests==2.31.0
 
 
10
  python-multipart
11
  google-genai>=0.4.0
12
  gtts==2.5.1
13
+ requests==2.31.0
14
+ ffmpeg-python==0.2.0
services/chatbot_service.py CHANGED
@@ -1,25 +1,21 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 
2
  import logging
3
 
4
  logger = logging.getLogger(__name__)
5
 
6
- # Global chatbot components
7
  chatbot_pipeline = None
8
- chat_history = {}
9
 
10
  def load_chatbot_model():
11
- """Load a better free chatbot model"""
12
  global chatbot_pipeline
13
  try:
14
- logger.info("Loading better chatbot model...")
15
-
16
- # Use a more reliable model
17
- model_name = "microsoft/DialoGPT-small" # More reliable than medium
18
 
19
  chatbot_pipeline = pipeline(
20
  "text-generation",
21
- model=model_name,
22
- tokenizer=model_name,
23
  device="cpu"
24
  )
25
  logger.info("βœ“ Chatbot model loaded successfully")
@@ -44,15 +40,14 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
44
  # Prepare prompt
45
  prompt = f"User: {user_text}\nAssistant:"
46
 
47
- # Generate response with better parameters
48
  response = chatbot_pipeline(
49
  prompt,
50
- max_new_tokens=100, # Reduced for better responses
51
  do_sample=True,
52
  temperature=0.7,
53
  top_p=0.9,
54
- pad_token_id=chatbot_pipeline.tokenizer.eos_token_id,
55
- repetition_penalty=1.1
56
  )
57
 
58
  # Extract the response
@@ -65,8 +60,6 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
65
  bot_response = generated_text.replace(prompt, "").strip()
66
 
67
  # Clean up the response
68
- bot_response = clean_response(bot_response)
69
-
70
  if not bot_response:
71
  bot_response = get_fallback_response(user_text)
72
 
@@ -77,41 +70,12 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
77
  logger.error(f"βœ— Chatbot response failed: {str(e)}")
78
  return get_fallback_response(user_text)
79
 
80
-
81
- def clean_response(response: str) -> str:
82
- """Clean and format the chatbot response"""
83
- if not response:
84
- return ""
85
-
86
- # Remove extra spaces
87
- response = ' '.join(response.split())
88
-
89
- # Remove any incomplete sentences at the end
90
- if len(response) > 1:
91
- # Ensure it ends with proper punctuation
92
- if not response.endswith(('.', '!', '?')):
93
- # Find the last sentence end
94
- last_period = response.rfind('.')
95
- last_exclamation = response.rfind('!')
96
- last_question = response.rfind('?')
97
- last_end = max(last_period, last_exclamation, last_question)
98
-
99
- if last_end > 0:
100
- response = response[:last_end + 1]
101
- else:
102
- response = response + '.'
103
-
104
- return response.strip()
105
-
106
-
107
  def get_fallback_response(user_text: str) -> str:
108
- """Provide better fallback responses"""
109
  fallback_responses = [
110
  f"I understand you said: '{user_text}'. How can I help you with that?",
111
  f"That's interesting! Regarding '{user_text}', what would you like to know?",
112
- f"Thanks for your message about '{user_text}'. How can I assist you further?",
113
- f"I heard you mention '{user_text}'. Could you tell me more about what you need?",
114
- f"Regarding '{user_text}', I'd be happy to help. What specific information are you looking for?"
115
  ]
116
 
117
  import random
 
1
+ # services/chatbot_service.py (CONFIRMED WORKING VERSION)
2
+ from transformers import pipeline
3
  import logging
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
+ # Global chatbot pipeline
8
  chatbot_pipeline = None
 
9
 
10
  def load_chatbot_model():
11
+ """Load the free chatbot model"""
12
  global chatbot_pipeline
13
  try:
14
+ logger.info("Loading DialoGPT chatbot model...")
 
 
 
15
 
16
  chatbot_pipeline = pipeline(
17
  "text-generation",
18
+ model="microsoft/DialoGPT-small",
 
19
  device="cpu"
20
  )
21
  logger.info("βœ“ Chatbot model loaded successfully")
 
40
  # Prepare prompt
41
  prompt = f"User: {user_text}\nAssistant:"
42
 
43
+ # Generate response
44
  response = chatbot_pipeline(
45
  prompt,
46
+ max_new_tokens=100,
47
  do_sample=True,
48
  temperature=0.7,
49
  top_p=0.9,
50
+ pad_token_id=chatbot_pipeline.tokenizer.eos_token_id
 
51
  )
52
 
53
  # Extract the response
 
60
  bot_response = generated_text.replace(prompt, "").strip()
61
 
62
  # Clean up the response
 
 
63
  if not bot_response:
64
  bot_response = get_fallback_response(user_text)
65
 
 
70
  logger.error(f"βœ— Chatbot response failed: {str(e)}")
71
  return get_fallback_response(user_text)
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def get_fallback_response(user_text: str) -> str:
74
+ """Provide fallback responses"""
75
  fallback_responses = [
76
  f"I understand you said: '{user_text}'. How can I help you with that?",
77
  f"That's interesting! Regarding '{user_text}', what would you like to know?",
78
+ f"Thanks for your message about '{user_text}'. How can I assist you further?"
 
 
79
  ]
80
 
81
  import random
services/stt_service.py CHANGED
@@ -1,29 +1,68 @@
1
- import requests
 
2
  import logging
3
  import tempfile
4
  import os
 
5
 
6
  logger = logging.getLogger(__name__)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
9
  """
10
- Convert audio bytes to text using Hugging Face Inference API.
11
- No ffmpeg required!
12
  """
 
 
13
  try:
14
- logger.info(f"Converting audio to text using Hugging Face API")
 
 
 
15
 
16
- # Use Hugging Face Inference API (free, no ffmpeg needed)
17
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
 
18
 
19
- # For Hugging Face Spaces, you might not need an API key for public models
20
- headers = {}
21
 
22
- # Send audio bytes directly to Hugging Face API
23
- response = requests.post(API_URL, headers=headers, data=audio_bytes)
 
 
24
 
25
- if response.status_code == 200:
26
- result = response.json()
 
27
  transcribed_text = result.get("text", "").strip()
28
 
29
  if not transcribed_text:
@@ -32,26 +71,13 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
32
  logger.info(f"βœ“ STT successful: '{transcribed_text}'")
33
  return transcribed_text
34
 
35
- else:
36
- # If API fails, use fallback
37
- error_msg = f"Hugging Face API error: {response.status_code}"
38
- logger.error(error_msg)
39
- return await fallback_stt(audio_bytes, filename)
40
 
41
  except Exception as e:
42
  logger.error(f"βœ— STT failed: {str(e)}")
43
- return await fallback_stt(audio_bytes, filename)
44
-
45
-
46
- async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
47
- """Fallback STT using a simpler approach"""
48
- try:
49
- # Simple fallback that doesn't require ffmpeg
50
- file_size = len(audio_bytes)
51
- file_type = filename.split('.')[-1] if '.' in filename else 'unknown'
52
-
53
- return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."
54
-
55
- except Exception as e:
56
- logger.error(f"Fallback STT also failed: {str(e)}")
57
- return "Audio processing failed. Please try a different audio format or install ffmpeg."
 
1
+ import torch
2
+ from transformers import pipeline
3
  import logging
4
  import tempfile
5
  import os
6
+ import subprocess
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ # Global STT pipeline
11
+ stt_pipeline = None
12
+
13
+ def load_stt_model():
14
+ """Load the free Whisper model for speech-to-text"""
15
+ global stt_pipeline
16
+ try:
17
+ # Check if ffmpeg is available
18
+ if not check_ffmpeg():
19
+ logger.warning("ffmpeg not found. STT may not work properly.")
20
+
21
+ logger.info("Loading Whisper-medium STT model...")
22
+ stt_pipeline = pipeline(
23
+ "automatic-speech-recognition",
24
+ model="openai/whisper-medium",
25
+ device="cpu"
26
+ )
27
+ logger.info("βœ“ Whisper-medium STT model loaded successfully")
28
+ except Exception as e:
29
+ logger.error(f"βœ— Failed to load Whisper-medium model: {str(e)}")
30
+ stt_pipeline = None
31
+
32
+ def check_ffmpeg():
33
+ """Check if ffmpeg is available"""
34
+ try:
35
+ subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
36
+ return True
37
+ except (subprocess.CalledProcessError, FileNotFoundError):
38
+ return False
39
+
40
  async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
41
  """
42
+ Convert audio bytes to text using free Whisper model.
 
43
  """
44
+ global stt_pipeline
45
+
46
  try:
47
+ if stt_pipeline is None:
48
+ load_stt_model()
49
+ if stt_pipeline is None:
50
+ raise Exception("STT model failed to load")
51
 
52
+ # Check ffmpeg again before processing
53
+ if not check_ffmpeg():
54
+ return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server."
55
 
56
+ logger.info(f"Converting audio to text using Whisper-medium")
 
57
 
58
+ # Save audio bytes to temporary file
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
60
+ temp_audio.write(audio_bytes)
61
+ temp_audio_path = temp_audio.name
62
 
63
+ try:
64
+ # Transcribe using Whisper
65
+ result = stt_pipeline(temp_audio_path)
66
  transcribed_text = result.get("text", "").strip()
67
 
68
  if not transcribed_text:
 
71
  logger.info(f"βœ“ STT successful: '{transcribed_text}'")
72
  return transcribed_text
73
 
74
+ finally:
75
+ # Clean up temporary file
76
+ if os.path.exists(temp_audio_path):
77
+ os.unlink(temp_audio_path)
 
78
 
79
  except Exception as e:
80
  logger.error(f"βœ— STT failed: {str(e)}")
81
+ if "ffmpeg" in str(e).lower():
82
+ return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server."
83
+ raise Exception(f"Speech-to-text conversion failed: {str(e)}")