Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

Yassine Mhirsi commited on 2 days ago

Commit

430b54f

1 Parent(s): 6e8d513

feat: Add analysis endpoints and service for processing arguments, extracting topics, and predicting stance

Browse files

Files changed (6) hide show

models/__init__.py +15 -0
models/analysis.py +100 -0
routes/__init__.py +2 -1
routes/analysis.py +189 -0
services/__init__.py +3 -0
services/analysis_service.py +165 -0

models/__init__.py CHANGED Viewed

@@ -44,6 +44,15 @@ from .user import (
     UserGetRequest,
 )
 # Import MCP-related schemas
 from .mcp_models import (
     ToolCallRequest,
@@ -85,6 +94,12 @@ __all__ = [
     "UserResponse",
     "UserUpdateNameRequest",
     "UserGetRequest",
     # MCP schemas
     "ToolCallRequest",
     "ToolCallResponse",

     UserGetRequest,
 )
+# Import analysis-related schemas
+from .analysis import (
+    AnalysisRequest,
+    AnalysisResponse,
+    AnalysisResult,
+    GetAnalysisRequest,
+    GetAnalysisResponse,
+)
 # Import MCP-related schemas
 from .mcp_models import (
     ToolCallRequest,
     "UserResponse",
     "UserUpdateNameRequest",
     "UserGetRequest",
+    # Analysis schemas
+    "AnalysisRequest",
+    "AnalysisResponse",
+    "AnalysisResult",
+    "GetAnalysisRequest",
+    "GetAnalysisResponse",
     # MCP schemas
     "ToolCallRequest",
     "ToolCallResponse",

models/analysis.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""Pydantic models for analysis endpoints"""
+from pydantic import BaseModel, Field, ConfigDict
+from typing import List, Optional
+class AnalysisRequest(BaseModel):
+    """Request model for analysis with arguments"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "arguments": [
+                    "Social media companies must NOT be allowed to track people across websites.",
+                    "I don't think universal basic income is a good idea — it'll disincentivize work.",
+                    "We must invest in renewable energy to combat climate change."
+                ]
+            }
+        }
+    )
+    arguments: List[str] = Field(
+        ..., min_length=1, max_length=100,
+        description="List of argument texts to analyze (max 100)"
+    )
+class AnalysisResult(BaseModel):
+    """Model for a single analysis result"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "id": "123e4567-e89b-12d3-a456-426614174000",
+                "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                "argument": "Social media companies must NOT be allowed to track people across websites.",
+                "topic": "social media tracking and cross-website user privacy",
+                "predicted_stance": "CON",
+                "confidence": 0.9234,
+                "probability_con": 0.9234,
+                "probability_pro": 0.0766,
+                "created_at": "2024-01-01T12:00:00Z",
+                "updated_at": "2024-01-01T12:00:00Z"
+            }
+        }
+    )
+    id: str = Field(..., description="Analysis result UUID")
+    user_id: str = Field(..., description="User UUID")
+    argument: str = Field(..., description="The argument text")
+    topic: str = Field(..., description="Extracted topic")
+    predicted_stance: str = Field(..., description="PRO or CON")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
+    probability_con: float = Field(..., ge=0.0, le=1.0, description="Probability of CON")
+    probability_pro: float = Field(..., ge=0.0, le=1.0, description="Probability of PRO")
+    created_at: str = Field(..., description="Creation timestamp")
+    updated_at: str = Field(..., description="Last update timestamp")
+class AnalysisResponse(BaseModel):
+    """Response model for analysis endpoint"""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "results": [
+                    {
+                        "id": "123e4567-e89b-12d3-a456-426614174000",
+                        "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "argument": "Social media companies must NOT be allowed to track people across websites.",
+                        "topic": "social media tracking and cross-website user privacy",
+                        "predicted_stance": "CON",
+                        "confidence": 0.9234,
+                        "probability_con": 0.9234,
+                        "probability_pro": 0.0766,
+                        "created_at": "2024-01-01T12:00:00Z",
+                        "updated_at": "2024-01-01T12:00:00Z"
+                    }
+                ],
+                "total_processed": 1,
+                "timestamp": "2024-01-01T12:00:00Z"
+            }
+        }
+    )
+    results: List[AnalysisResult] = Field(..., description="List of analysis results")
+    total_processed: int = Field(..., description="Number of arguments processed")
+    timestamp: str = Field(..., description="Analysis timestamp")
+class GetAnalysisRequest(BaseModel):
+    """Request model for getting user's analysis results"""
+    limit: Optional[int] = Field(100, ge=1, le=1000, description="Maximum number of results")
+    offset: Optional[int] = Field(0, ge=0, description="Number of results to skip")
+class GetAnalysisResponse(BaseModel):
+    """Response model for getting analysis results"""
+    results: List[AnalysisResult] = Field(..., description="List of analysis results")
+    total: int = Field(..., description="Total number of results")
+    limit: int = Field(..., description="Limit used")
+    offset: int = Field(..., description="Offset used")

routes/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """API route handlers"""
 from fastapi import APIRouter
-from . import root, health, stance, label, generate, topic, user
 from routes.tts_routes import router as audio_router
 # Create main router
 api_router = APIRouter()
@@ -14,6 +14,7 @@ api_router.include_router(label.router, prefix="/label")
 api_router.include_router(generate.router, prefix="/generate")
 api_router.include_router(topic.router, prefix="/topic")
 api_router.include_router(user.router, prefix="/user")
 api_router.include_router(audio_router)
 __all__ = ["api_router"]

 """API route handlers"""
 from fastapi import APIRouter
+from . import root, health, stance, label, generate, topic, user, analysis
 from routes.tts_routes import router as audio_router
 # Create main router
 api_router = APIRouter()
 api_router.include_router(generate.router, prefix="/generate")
 api_router.include_router(topic.router, prefix="/topic")
 api_router.include_router(user.router, prefix="/user")
+api_router.include_router(analysis.router, prefix="")
 api_router.include_router(audio_router)
 __all__ = ["api_router"]

routes/analysis.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""Analysis endpoints for processing arguments, extracting topics, and predicting stance"""
+from fastapi import APIRouter, HTTPException, Header, UploadFile, File
+from typing import Optional
+import logging
+import csv
+import io
+from datetime import datetime
+from services.analysis_service import analysis_service
+from models.analysis import (
+    AnalysisRequest,
+    AnalysisResponse,
+    AnalysisResult,
+    GetAnalysisRequest,
+    GetAnalysisResponse,
+)
+router = APIRouter()
+logger = logging.getLogger(__name__)
+def parse_csv_file(file_content: bytes) -> List[str]:
+    """
+    Parse CSV file and extract arguments
+    Args:
+        file_content: CSV file content as bytes
+    Returns:
+        List of argument strings
+    """
+    try:
+        # Decode bytes to string
+        content = file_content.decode('utf-8')
+        # Parse CSV
+        csv_reader = csv.reader(io.StringIO(content))
+        arguments = []
+        # Skip header row if present, extract arguments from first column or 'argument' column
+        rows = list(csv_reader)
+        if len(rows) == 0:
+            return []
+        # Check if first row is header
+        header = rows[0] if rows else []
+        start_idx = 1 if any(col.lower() in ['argument', 'text', 'content'] for col in header) else 0
+        # Find argument column index
+        arg_col_idx = 0
+        if start_idx == 1:
+            for idx, col in enumerate(header):
+                if col.lower() in ['argument', 'text', 'content']:
+                    arg_col_idx = idx
+                    break
+        # Extract arguments
+        for row in rows[start_idx:]:
+            if row and len(row) > arg_col_idx:
+                arg = row[arg_col_idx].strip()
+                if arg:  # Only add non-empty arguments
+                    arguments.append(arg)
+        return arguments
+    except Exception as e:
+        logger.error(f"Error parsing CSV file: {str(e)}")
+        raise ValueError(f"Failed to parse CSV file: {str(e)}")
+@router.post("/analyse", response_model=AnalysisResponse, tags=["Analysis"])
+async def analyse_arguments(
+    request: Optional[AnalysisRequest] = None,
+    file: Optional[UploadFile] = File(None),
+    x_user_id: Optional[str] = Header(None, alias="X-User-ID")
+):
+    """
+    Analyze arguments: extract topics and predict stance
+    Accepts either:
+    - JSON body with `arguments` array, OR
+    - CSV file upload with arguments
+    - **X-User-ID**: User UUID (required in header)
+    - **arguments**: List of argument texts (if using JSON)
+    - **file**: CSV file with arguments (if using file upload)
+    Returns analysis results with extracted topics and stance predictions
+    """
+    if not x_user_id:
+        raise HTTPException(status_code=400, detail="X-User-ID header is required")
+    try:
+        arguments = []
+        # Get arguments from either JSON body or CSV file
+        if file:
+            # Read CSV file
+            file_content = await file.read()
+            arguments = parse_csv_file(file_content)
+            if not arguments:
+                raise HTTPException(
+                    status_code=400,
+                    detail="CSV file is empty or contains no valid arguments"
+                )
+            logger.info(f"Parsed {len(arguments)} arguments from CSV file")
+        elif request and request.arguments:
+            arguments = request.arguments
+            logger.info(f"Received {len(arguments)} arguments from JSON body")
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="Either 'arguments' in JSON body or CSV 'file' must be provided"
+            )
+        # Analyze arguments
+        results = analysis_service.analyze_arguments(
+            user_id=x_user_id,
+            arguments=arguments
+        )
+        # Convert to response models
+        analysis_results = [
+            AnalysisResult(**result) for result in results
+        ]
+        logger.info(f"Analysis completed: {len(analysis_results)} results")
+        return AnalysisResponse(
+            results=analysis_results,
+            total_processed=len(analysis_results),
+            timestamp=datetime.now().isoformat()
+        )
+    except HTTPException:
+        raise
+    except ValueError as e:
+        logger.error(f"Validation error: {str(e)}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Analysis error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@router.get("/analyse", response_model=GetAnalysisResponse, tags=["Analysis"])
+async def get_analysis_results(
+    limit: int = 100,
+    offset: int = 0,
+    x_user_id: Optional[str] = Header(None, alias="X-User-ID")
+):
+    """
+    Get user's analysis results
+    - **X-User-ID**: User UUID (required in header)
+    - **limit**: Maximum number of results to return (default: 100, max: 1000)
+    - **offset**: Number of results to skip (default: 0)
+    Returns paginated analysis results
+    """
+    if not x_user_id:
+        raise HTTPException(status_code=400, detail="X-User-ID header is required")
+    try:
+        results = analysis_service.get_user_analysis_results(
+            user_id=x_user_id,
+            limit=limit,
+            offset=offset
+        )
+        # Convert to response models
+        analysis_results = [
+            AnalysisResult(**result) for result in results
+        ]
+        return GetAnalysisResponse(
+            results=analysis_results,
+            total=len(analysis_results),
+            limit=limit,
+            offset=offset
+        )
+    except Exception as e:
+        logger.error(f"Error getting analysis results: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to get analysis results: {str(e)}")

services/__init__.py CHANGED Viewed

@@ -10,6 +10,7 @@ from .tts_service import text_to_speech
 from .topic_service import TopicService, topic_service
 from .database_service import DatabaseService, database_service
 from .user_service import UserService, user_service
 __all__ = [
     "StanceModelManager",
@@ -24,6 +25,8 @@ __all__ = [
     "database_service",
     "UserService",
     "user_service",
     # NEW exports
     "speech_to_text",

 from .topic_service import TopicService, topic_service
 from .database_service import DatabaseService, database_service
 from .user_service import UserService, user_service
+from .analysis_service import AnalysisService, analysis_service
 __all__ = [
     "StanceModelManager",
     "database_service",
     "UserService",
     "user_service",
+    "AnalysisService",
+    "analysis_service",
     # NEW exports
     "speech_to_text",

services/analysis_service.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""Service for analysis operations: processing arguments, extracting topics, and predicting stance"""
+import logging
+from typing import List, Dict, Optional
+from datetime import datetime
+from services.database_service import database_service
+from services.topic_service import topic_service
+from services.stance_model_manager import stance_model_manager
+logger = logging.getLogger(__name__)
+class AnalysisService:
+    """Service for analyzing arguments with topic extraction and stance prediction"""
+    def __init__(self):
+        self.table_name = "analysis_results"
+    def _get_client(self):
+        """Get Supabase client"""
+        return database_service.get_client()
+    def analyze_arguments(
+        self,
+        user_id: str,
+        arguments: List[str]
+    ) -> List[Dict]:
+        """
+        Analyze arguments: extract topics, predict stance, and save to database
+        Args:
+            user_id: User UUID
+            arguments: List of argument texts
+        Returns:
+            List of analysis results with argument, topic, and stance prediction
+        """
+        try:
+            if not arguments or len(arguments) == 0:
+                raise ValueError("Arguments list cannot be empty")
+            logger.info(f"Starting analysis for {len(arguments)} arguments for user {user_id}")
+            # Step 1: Extract topics for all arguments
+            logger.info("Step 1: Extracting topics...")
+            topics = topic_service.batch_extract_topics(arguments)
+            if len(topics) != len(arguments):
+                raise RuntimeError(f"Topic extraction returned {len(topics)} topics but expected {len(arguments)}")
+            # Step 2: Predict stance for each argument-topic pair
+            logger.info("Step 2: Predicting stance for argument-topic pairs...")
+            stance_results = []
+            for arg, topic in zip(arguments, topics):
+                if topic is None:
+                    logger.warning(f"Skipping argument with null topic: {arg[:50]}...")
+                    continue
+                stance_result = stance_model_manager.predict(topic, arg)
+                stance_results.append({
+                    "argument": arg,
+                    "topic": topic,
+                    "predicted_stance": stance_result["predicted_stance"],
+                    "confidence": stance_result["confidence"],
+                    "probability_con": stance_result["probability_con"],
+                    "probability_pro": stance_result["probability_pro"],
+                })
+            # Step 3: Save all results to database
+            logger.info(f"Step 3: Saving {len(stance_results)} results to database...")
+            saved_results = self._save_analysis_results(user_id, stance_results)
+            logger.info(f"Analysis completed: {len(saved_results)} results saved")
+            return saved_results
+        except Exception as e:
+            logger.error(f"Error in analyze_arguments: {str(e)}")
+            raise RuntimeError(f"Analysis failed: {str(e)}")
+    def _save_analysis_results(
+        self,
+        user_id: str,
+        results: List[Dict]
+    ) -> List[Dict]:
+        """
+        Save analysis results to database
+        Args:
+            user_id: User UUID
+            results: List of analysis result dictionaries
+        Returns:
+            List of saved results with database IDs
+        """
+        try:
+            client = self._get_client()
+            # Prepare data for batch insert
+            insert_data = []
+            for result in results:
+                insert_data.append({
+                    "user_id": user_id,
+                    "argument": result["argument"],
+                    "topic": result["topic"],
+                    "predicted_stance": result["predicted_stance"],
+                    "confidence": result["confidence"],
+                    "probability_con": result["probability_con"],
+                    "probability_pro": result["probability_pro"],
+                    "created_at": datetime.utcnow().isoformat(),
+                    "updated_at": datetime.utcnow().isoformat()
+                })
+            # Batch insert
+            response = client.table(self.table_name).insert(insert_data).execute()
+            if response.data:
+                logger.info(f"Successfully saved {len(response.data)} analysis results")
+                return response.data
+            else:
+                raise RuntimeError("Failed to save analysis results: no data returned")
+        except Exception as e:
+            logger.error(f"Error saving analysis results: {str(e)}")
+            raise RuntimeError(f"Failed to save analysis results: {str(e)}")
+    def get_user_analysis_results(
+        self,
+        user_id: str,
+        limit: Optional[int] = 100,
+        offset: Optional[int] = 0
+    ) -> List[Dict]:
+        """
+        Get analysis results for a user
+        Args:
+            user_id: User UUID
+            limit: Maximum number of results to return
+            offset: Number of results to skip
+        Returns:
+            List of analysis results
+        """
+        try:
+            client = self._get_client()
+            query = client.table(self.table_name)\
+                .select("*")\
+                .eq("user_id", user_id)\
+                .order("created_at", desc=True)\
+                .limit(limit)\
+                .offset(offset)
+            result = query.execute()
+            return result.data if result.data else []
+        except Exception as e:
+            logger.error(f"Error getting user analysis results: {str(e)}")
+            raise RuntimeError(f"Failed to get analysis results: {str(e)}")
+# Initialize singleton instance
+analysis_service = AnalysisService()