malek-messaoudii commited on
Commit
200de02
·
1 Parent(s): bd8a3b8

feat: Implement audio transcription and speech generation endpoints with file upload support and improved response handling

Browse files
Files changed (1) hide show
  1. routes/mcp_routes.py +95 -31
routes/mcp_routes.py CHANGED
@@ -1,10 +1,14 @@
1
  """Routes pour exposer MCP via FastAPI pour Swagger UI"""
2
 
3
- from fastapi import APIRouter, HTTPException
 
4
  from typing import Dict, Any, Optional
5
  from pydantic import BaseModel, Field
6
  import logging
7
  import json
 
 
 
8
 
9
  from services.mcp_service import mcp_server
10
  from models.mcp_models import (
@@ -52,16 +56,6 @@ class MatchKeypointRequest(BaseModel):
52
  }
53
  }
54
 
55
- class TranscribeAudioRequest(BaseModel):
56
- """Request pour transcrire un audio"""
57
- audio_path: str = Field(..., description="Chemin vers le fichier audio")
58
-
59
- class Config:
60
- json_schema_extra = {
61
- "example": {
62
- "audio_path": "/path/to/audio.wav"
63
- }
64
- }
65
 
66
  class GenerateSpeechRequest(BaseModel):
67
  """Request pour générer de la parole"""
@@ -302,10 +296,10 @@ async def mcp_detect_stance(request: DetectStanceRequest):
302
  "argument": request.argument
303
  })
304
 
305
- # Extraire les données du résultat MCP
306
  parsed_result = None
307
  if isinstance(result, dict):
308
- # Si le résultat contient une clé "result" avec une liste de ContentBlock
309
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
310
  content_block = result["result"][0]
311
  if hasattr(content_block, 'text') and content_block.text:
@@ -313,7 +307,11 @@ async def mcp_detect_stance(request: DetectStanceRequest):
313
  parsed_result = json.loads(content_block.text)
314
  except json.JSONDecodeError:
315
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
 
316
  else:
 
317
  parsed_result = result
318
  elif isinstance(result, (list, tuple)) and len(result) > 0:
319
  if hasattr(result[0], 'text') and result[0].text:
@@ -321,11 +319,13 @@ async def mcp_detect_stance(request: DetectStanceRequest):
321
  parsed_result = json.loads(result[0].text)
322
  except json.JSONDecodeError:
323
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
324
  else:
325
  parsed_result = result
326
 
327
- if not parsed_result:
328
- raise HTTPException(status_code=500, detail="Empty response from MCP tool")
329
 
330
  # Construire la réponse structurée
331
  response = DetectStanceResponse(
@@ -340,6 +340,9 @@ async def mcp_detect_stance(request: DetectStanceRequest):
340
 
341
  except HTTPException:
342
  raise
 
 
 
343
  except Exception as e:
344
  logger.error(f"Error in detect_stance: {e}")
345
  raise HTTPException(status_code=500, detail=f"Error executing tool detect_stance: {e}")
@@ -353,7 +356,7 @@ async def mcp_match_keypoint(request: MatchKeypointRequest):
353
  "key_point": request.key_point
354
  })
355
 
356
- # Extraire les données du résultat MCP
357
  parsed_result = None
358
  if isinstance(result, dict):
359
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
@@ -363,6 +366,8 @@ async def mcp_match_keypoint(request: MatchKeypointRequest):
363
  parsed_result = json.loads(content_block.text)
364
  except json.JSONDecodeError:
365
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
366
  else:
367
  parsed_result = result
368
  elif isinstance(result, (list, tuple)) and len(result) > 0:
@@ -371,11 +376,13 @@ async def mcp_match_keypoint(request: MatchKeypointRequest):
371
  parsed_result = json.loads(result[0].text)
372
  except json.JSONDecodeError:
373
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
374
  else:
375
  parsed_result = result
376
 
377
- if not parsed_result:
378
- raise HTTPException(status_code=500, detail="Empty response from MCP tool")
379
 
380
  # Construire la réponse structurée
381
  response = MatchKeypointResponse(
@@ -390,16 +397,35 @@ async def mcp_match_keypoint(request: MatchKeypointRequest):
390
 
391
  except HTTPException:
392
  raise
 
 
 
393
  except Exception as e:
394
  logger.error(f"Error in match_keypoint_argument: {e}")
395
  raise HTTPException(status_code=500, detail=f"Error executing tool match_keypoint_argument: {e}")
396
 
397
  @router.post("/tools/transcribe-audio", response_model=TranscribeAudioResponse, summary="Transcrire un audio en texte")
398
- async def mcp_transcribe_audio(request: TranscribeAudioRequest):
399
- """Convertit un fichier audio en texte"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  try:
 
401
  result = await mcp_server.call_tool("transcribe_audio", {
402
- "audio_path": request.audio_path
403
  })
404
 
405
  # Extraire le texte du résultat MCP
@@ -436,10 +462,14 @@ async def mcp_transcribe_audio(request: TranscribeAudioRequest):
436
  except Exception as e:
437
  logger.error(f"Error in transcribe_audio: {e}")
438
  raise HTTPException(status_code=500, detail=f"Error executing tool transcribe_audio: {e}")
 
 
 
 
439
 
440
- @router.post("/tools/generate-speech", response_model=GenerateSpeechResponse, summary="Générer de la parole à partir de texte")
441
  async def mcp_generate_speech(request: GenerateSpeechRequest):
442
- """Convertit du texte en fichier audio"""
443
  try:
444
  result = await mcp_server.call_tool("generate_speech", {
445
  "text": request.text,
@@ -466,12 +496,35 @@ async def mcp_generate_speech(request: GenerateSpeechRequest):
466
  else:
467
  audio_path = str(result)
468
 
 
 
 
 
 
 
 
 
 
469
  if not audio_path:
470
  raise HTTPException(status_code=500, detail="Empty audio path from MCP tool")
471
 
472
- response = GenerateSpeechResponse(audio_path=audio_path)
 
 
 
 
 
 
 
473
  logger.info(f"Speech generated: {audio_path}")
474
- return response
 
 
 
 
 
 
 
475
 
476
  except HTTPException:
477
  raise
@@ -488,7 +541,7 @@ async def mcp_generate_argument(request: GenerateRequest):
488
  "position": request.position
489
  })
490
 
491
- # Extraire les données du résultat MCP
492
  parsed_result = None
493
  if isinstance(result, dict):
494
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
@@ -497,7 +550,10 @@ async def mcp_generate_argument(request: GenerateRequest):
497
  try:
498
  parsed_result = json.loads(content_block.text)
499
  except json.JSONDecodeError:
500
- raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
 
501
  else:
502
  parsed_result = result
503
  elif isinstance(result, (list, tuple)) and len(result) > 0:
@@ -505,18 +561,26 @@ async def mcp_generate_argument(request: GenerateRequest):
505
  try:
506
  parsed_result = json.loads(result[0].text)
507
  except json.JSONDecodeError:
508
- raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
 
 
509
  else:
510
  parsed_result = result
511
 
512
- if not parsed_result:
513
- raise HTTPException(status_code=500, detail="Empty response from MCP tool")
 
 
 
 
 
 
514
 
515
  # Construire la réponse structurée
516
  response = GenerateResponse(
517
  topic=parsed_result.get("topic", request.topic),
518
  position=parsed_result.get("position", request.position),
519
- argument=parsed_result.get("argument", ""),
520
  timestamp=datetime.now().isoformat()
521
  )
522
 
 
1
  """Routes pour exposer MCP via FastAPI pour Swagger UI"""
2
 
3
+ from fastapi import APIRouter, HTTPException, UploadFile, File
4
+ from fastapi.responses import FileResponse
5
  from typing import Dict, Any, Optional
6
  from pydantic import BaseModel, Field
7
  import logging
8
  import json
9
+ import tempfile
10
+ import os
11
+ from pathlib import Path
12
 
13
  from services.mcp_service import mcp_server
14
  from models.mcp_models import (
 
56
  }
57
  }
58
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  class GenerateSpeechRequest(BaseModel):
61
  """Request pour générer de la parole"""
 
296
  "argument": request.argument
297
  })
298
 
299
+ # FastMCP avec json_response=True retourne directement le dict
300
  parsed_result = None
301
  if isinstance(result, dict):
302
+ # Vérifier si c'est un dict avec "result" contenant des ContentBlocks
303
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
304
  content_block = result["result"][0]
305
  if hasattr(content_block, 'text') and content_block.text:
 
307
  parsed_result = json.loads(content_block.text)
308
  except json.JSONDecodeError:
309
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
310
+ else:
311
+ # Si pas de text, utiliser le dict directement
312
+ parsed_result = result
313
  else:
314
+ # Dict direct retourné par le tool
315
  parsed_result = result
316
  elif isinstance(result, (list, tuple)) and len(result) > 0:
317
  if hasattr(result[0], 'text') and result[0].text:
 
319
  parsed_result = json.loads(result[0].text)
320
  except json.JSONDecodeError:
321
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
322
+ else:
323
+ parsed_result = result[0] if isinstance(result[0], dict) else result
324
  else:
325
  parsed_result = result
326
 
327
+ if not parsed_result or not isinstance(parsed_result, dict):
328
+ raise HTTPException(status_code=500, detail="Invalid response format from MCP tool")
329
 
330
  # Construire la réponse structurée
331
  response = DetectStanceResponse(
 
340
 
341
  except HTTPException:
342
  raise
343
+ except KeyError as e:
344
+ logger.error(f"Missing key in detect_stance response: {e}")
345
+ raise HTTPException(status_code=500, detail=f"Invalid response format: missing {e}")
346
  except Exception as e:
347
  logger.error(f"Error in detect_stance: {e}")
348
  raise HTTPException(status_code=500, detail=f"Error executing tool detect_stance: {e}")
 
356
  "key_point": request.key_point
357
  })
358
 
359
+ # FastMCP avec json_response=True retourne directement le dict
360
  parsed_result = None
361
  if isinstance(result, dict):
362
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
 
366
  parsed_result = json.loads(content_block.text)
367
  except json.JSONDecodeError:
368
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
369
+ else:
370
+ parsed_result = result
371
  else:
372
  parsed_result = result
373
  elif isinstance(result, (list, tuple)) and len(result) > 0:
 
376
  parsed_result = json.loads(result[0].text)
377
  except json.JSONDecodeError:
378
  raise HTTPException(status_code=500, detail="Invalid JSON response from MCP tool")
379
+ else:
380
+ parsed_result = result[0] if isinstance(result[0], dict) else result
381
  else:
382
  parsed_result = result
383
 
384
+ if not parsed_result or not isinstance(parsed_result, dict):
385
+ raise HTTPException(status_code=500, detail="Invalid response format from MCP tool")
386
 
387
  # Construire la réponse structurée
388
  response = MatchKeypointResponse(
 
397
 
398
  except HTTPException:
399
  raise
400
+ except KeyError as e:
401
+ logger.error(f"Missing key in match_keypoint response: {e}")
402
+ raise HTTPException(status_code=500, detail=f"Invalid response format: missing {e}")
403
  except Exception as e:
404
  logger.error(f"Error in match_keypoint_argument: {e}")
405
  raise HTTPException(status_code=500, detail=f"Error executing tool match_keypoint_argument: {e}")
406
 
407
  @router.post("/tools/transcribe-audio", response_model=TranscribeAudioResponse, summary="Transcrire un audio en texte")
408
+ async def mcp_transcribe_audio(file: UploadFile = File(...)):
409
+ """Convertit un fichier audio en texte (upload de fichier)"""
410
+ # Vérifier le type de fichier
411
+ if not file.content_type or not file.content_type.startswith('audio/'):
412
+ raise HTTPException(status_code=400, detail="File must be an audio file")
413
+
414
+ # Créer un fichier temporaire
415
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
416
+ temp_path = temp_file.name
417
+ content = await file.read()
418
+
419
+ if len(content) == 0:
420
+ os.unlink(temp_path)
421
+ raise HTTPException(status_code=400, detail="Audio file is empty")
422
+
423
+ temp_file.write(content)
424
+
425
  try:
426
+ # Appeler le service MCP avec le chemin temporaire
427
  result = await mcp_server.call_tool("transcribe_audio", {
428
+ "audio_path": temp_path
429
  })
430
 
431
  # Extraire le texte du résultat MCP
 
462
  except Exception as e:
463
  logger.error(f"Error in transcribe_audio: {e}")
464
  raise HTTPException(status_code=500, detail=f"Error executing tool transcribe_audio: {e}")
465
+ finally:
466
+ # Nettoyer le fichier temporaire
467
+ if os.path.exists(temp_path):
468
+ os.unlink(temp_path)
469
 
470
+ @router.post("/tools/generate-speech", summary="Générer de la parole à partir de texte")
471
  async def mcp_generate_speech(request: GenerateSpeechRequest):
472
+ """Convertit du texte en fichier audio (téléchargeable)"""
473
  try:
474
  result = await mcp_server.call_tool("generate_speech", {
475
  "text": request.text,
 
496
  else:
497
  audio_path = str(result)
498
 
499
+ # Nettoyer le chemin si c'est une représentation string d'objet
500
+ if audio_path and isinstance(audio_path, str):
501
+ # Si c'est une représentation d'objet TextContent, extraire le chemin
502
+ if "text='" in audio_path and ".wav" in audio_path:
503
+ import re
504
+ match = re.search(r"text='([^']+)'", audio_path)
505
+ if match:
506
+ audio_path = match.group(1)
507
+
508
  if not audio_path:
509
  raise HTTPException(status_code=500, detail="Empty audio path from MCP tool")
510
 
511
+ # Vérifier que le fichier existe
512
+ if not Path(audio_path).exists():
513
+ raise HTTPException(status_code=500, detail=f"Audio file not found: {audio_path}")
514
+
515
+ # Déterminer le type MIME
516
+ media_type = "audio/wav" if request.format == "wav" else "audio/mpeg"
517
+
518
+ # Retourner le fichier pour téléchargement
519
  logger.info(f"Speech generated: {audio_path}")
520
+ return FileResponse(
521
+ path=audio_path,
522
+ filename=f"speech.{request.format}",
523
+ media_type=media_type,
524
+ headers={
525
+ "Content-Disposition": f"attachment; filename=speech.{request.format}"
526
+ }
527
+ )
528
 
529
  except HTTPException:
530
  raise
 
541
  "position": request.position
542
  })
543
 
544
+ # FastMCP avec json_response=True retourne directement le dict
545
  parsed_result = None
546
  if isinstance(result, dict):
547
  if "result" in result and isinstance(result["result"], list) and len(result["result"]) > 0:
 
550
  try:
551
  parsed_result = json.loads(content_block.text)
552
  except json.JSONDecodeError:
553
+ # Si ce n'est pas du JSON, c'est peut-être juste le texte
554
+ parsed_result = {"argument": content_block.text}
555
+ else:
556
+ parsed_result = result
557
  else:
558
  parsed_result = result
559
  elif isinstance(result, (list, tuple)) and len(result) > 0:
 
561
  try:
562
  parsed_result = json.loads(result[0].text)
563
  except json.JSONDecodeError:
564
+ parsed_result = {"argument": result[0].text}
565
+ else:
566
+ parsed_result = result[0] if isinstance(result[0], dict) else result
567
  else:
568
  parsed_result = result
569
 
570
+ if not parsed_result or not isinstance(parsed_result, dict):
571
+ raise HTTPException(status_code=500, detail="Invalid response format from MCP tool")
572
+
573
+ # Extraire l'argument (peut être dans "argument" ou directement dans le dict)
574
+ argument_text = parsed_result.get("argument", "")
575
+ if not argument_text:
576
+ # Essayer de trouver le texte ailleurs dans la réponse
577
+ argument_text = str(parsed_result)
578
 
579
  # Construire la réponse structurée
580
  response = GenerateResponse(
581
  topic=parsed_result.get("topic", request.topic),
582
  position=parsed_result.get("position", request.position),
583
+ argument=argument_text,
584
  timestamp=datetime.now().isoformat()
585
  )
586