""" Sentiment Analysis Module Analyzes emotional tone and sentiment evolution in messages. """ from textblob import TextBlob from typing import List, Dict, Any import re from datetime import datetime class SentimentAnalyzer: """Analyzes sentiment evolution across messages.""" def __init__(self): """Initialize sentiment analyzer.""" # Extended keyword lists with Spanish and English self.positive_words = { 'love', 'excellent', 'amazing', 'fantastic', 'wonderful', 'great', 'good', 'perfect', 'best', 'awesome', 'brilliant', 'outstanding', 'superb', 'trust', 'confident', 'happy', 'thrilled', 'delighted', 'impressed', 'satisfied', 'encanta', 'excelente', 'perfecto', 'increible', 'genial', 'bueno', 'maravilloso', 'fantastico', 'sobresaliente', 'impresionado', 'satisfecho', 'love', 'adoro', 'me encanta', 'fantástico', 'fabuloso', 'me gusta', 'bien', 'obra' } self.negative_words = { 'hate', 'terrible', 'awful', 'horrible', 'bad', 'poor', 'worst', 'disappointed', 'frustrated', 'angry', 'annoyed', 'upset', 'problem', 'issue', 'bug', 'slow', 'expensive', 'difficult', 'fail', 'cancel', 'doubt', 'concern', 'worried', 'unsure', 'alternative', 'competitor', 'odio', 'terrible', 'horrible', 'malo', 'peor', 'problema', 'bugs', 'caro', 'lento', 'difícil', 'fracaso', 'cancelar', 'competencia', 'competidor', 'preocupacion', 'inquietud', 'alternativa', 'dudoso', 'cambiar', 'adios', 'adiós', 'otros developers', 'más barato', 'renunciar', 'renuncia', 'renuncie', 'partir', 'irme', 'me voy', 'dejar', 'abandonar', 'salir', 'terminar', 'fin', 'otro trabajo', 'mejor oferta', 'buscar', 'explorar', 'mejores', 'mejores roles' } def analyze_evolution(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: """ Analyze how sentiment evolves across messages. Args: messages: List of {'timestamp': str, 'text': str, 'sender': str} Returns: Dictionary with sentiment evolution analysis """ if not messages: return self._empty_analysis() # Analyze each message timeline = [] sentiments = [] for i, msg in enumerate(messages): # Handle both strings and dicts if isinstance(msg, dict): text = msg.get('text', '') timestamp = msg.get('timestamp', f'Message {i+1}') elif isinstance(msg, str): text = msg timestamp = f'Message {i+1}' else: text = str(msg) timestamp = f'Message {i+1}' sentiment_score = self._calculate_sentiment(text) sentiments.append(sentiment_score) timeline.append({ 'timestamp': timestamp, 'text': text[:100] + '...' if len(text) > 100 else text, 'sentiment_score': round(sentiment_score, 2), 'sentiment_state': self._sentiment_state(sentiment_score), 'message_index': i + 1 }) # Calculate trend trend = self._calculate_trend(sentiments) turning_points = self._find_turning_points(sentiments, timeline) overall_change = sentiments[-1] - sentiments[0] if sentiments else 0 # Generate interpretation interpretation = self._generate_interpretation( sentiments, trend, turning_points ) return { 'timeline': timeline, 'current_sentiment': round(sentiments[-1], 2) if sentiments else 0, 'initial_sentiment': round(sentiments[0], 2) if sentiments else 0, 'trend': trend, 'turning_points': turning_points, 'overall_change': round(overall_change, 2), 'interpretation': interpretation, 'message_count': len(messages) } def _calculate_sentiment(self, text: str) -> float: """ Calculate sentiment score from 0-100. 0 = very negative, 50 = neutral, 100 = very positive Uses keyword matching primarily, TextBlob for fine-tuning. """ if not text: return 50.0 text_lower = text.lower() # Primary: Count positive and negative keywords positive_count = sum(1 for word in self.positive_words if word in text_lower) negative_count = sum(1 for word in self.negative_words if word in text_lower) # Base score from keywords keyword_score = 50 + (positive_count * 10) - (negative_count * 10) # Use TextBlob for fine-tuning blob = TextBlob(text) polarity = blob.sentiment.polarity # -1 to 1 textblob_score = (polarity + 1) * 50 # Combine: 70% keywords, 30% TextBlob # Keywords are more reliable for detecting sentiment in conversations final_score = (keyword_score * 0.7) + (textblob_score * 0.3) # Ensure score is in valid range return min(100, max(0, final_score)) def _sentiment_state(self, score: float) -> str: """Classify sentiment into states.""" if score >= 80: return "EXTREMELY_POSITIVE" elif score >= 60: return "POSITIVE" elif score >= 40: return "NEUTRAL" elif score >= 20: return "NEGATIVE" else: return "EXTREMELY_NEGATIVE" def _calculate_trend(self, sentiments: List[float]) -> str: """Determine overall trend.""" if len(sentiments) < 2: return "insufficient_data" # Calculate slope first_half = sum(sentiments[:len(sentiments)//2]) / max(1, len(sentiments)//2) second_half = sum(sentiments[len(sentiments)//2:]) / max(1, len(sentiments) - len(sentiments)//2) diff = second_half - first_half if diff > 10: return "IMPROVING" elif diff < -10: return "DECLINING" else: return "STABLE" def _find_turning_points(self, sentiments: List[float], timeline: List[Dict]) -> List[Dict]: """Find significant sentiment changes.""" turning_points = [] for i in range(1, len(sentiments)): change = abs(sentiments[i] - sentiments[i-1]) # Significant change: > 20 points if change > 20: turning_points.append({ 'index': i, 'timestamp': timeline[i]['timestamp'], 'from_state': self._sentiment_state(sentiments[i-1]), 'to_state': self._sentiment_state(sentiments[i]), 'change_magnitude': round(change, 2), 'severity': 'CRITICAL' if change > 40 else 'HIGH' if change > 30 else 'MEDIUM' }) return turning_points def _generate_interpretation(self, sentiments: List[float], trend: str, turning_points: List[Dict]) -> str: """Generate human-readable interpretation.""" if not sentiments: return "No messages to analyze." current = sentiments[-1] initial = sentiments[0] # Base interpretation if trend == "DECLINING": base = f"Sentiment is DECLINING overall (from {initial:.0f} to {current:.0f})" elif trend == "IMPROVING": base = f"Sentiment is IMPROVING overall (from {initial:.0f} to {current:.0f})" else: base = f"Sentiment is STABLE (around {current:.0f})" # Add turning point info if turning_points: critical_points = [p for p in turning_points if p['severity'] == 'CRITICAL'] if critical_points: base += f". WARNING: {len(critical_points)} critical sentiment shift(s) detected." # Final assessment if current < 30: base += " RISK LEVEL: CRITICAL - Immediate intervention recommended." elif current < 50: base += " RISK LEVEL: HIGH - Attention needed soon." elif current > 70: base += " Status: POSITIVE - No immediate action needed." return base def _empty_analysis(self) -> Dict[str, Any]: """Return empty analysis structure.""" return { 'timeline': [], 'current_sentiment': 0, 'initial_sentiment': 0, 'trend': 'unknown', 'turning_points': [], 'overall_change': 0, 'interpretation': 'No data provided', 'message_count': 0 }