Spaces:

A-R-F
/

Agentic-Reliability-Framework-API

Running

App Files Files Community

petter2025 commited on Feb 18

Commit

7cfde2b

verified ·

1 Parent(s): a0bb7d7

Update hf_demo.py

Browse files

Files changed (1) hide show

hf_demo.py +311 -325

hf_demo.py CHANGED Viewed

@@ -7,6 +7,8 @@ import os
 # 🔥 CRITICAL: Force Gradio to use port 7860 for Hugging Face Spaces
 os.environ['GRADIO_SERVER_PORT'] = '7860'
 os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0'
 import json
 import uuid
@@ -23,38 +25,50 @@ from dataclasses import dataclass, asdict
 from enum import Enum
 import gradio as gr
-from fastapi import FastAPI, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from pydantic import BaseModel, Field, field_validator  # Changed from validator
 from gradio import mount_gradio_app
-# ============== CONFIGURATION ==============
-class Settings:
-    """Centralized configuration - easy to modify"""
     # Hugging Face settings
-    HF_SPACE_ID = os.environ.get('SPACE_ID', 'local')
-    HF_TOKEN = os.environ.get('HF_TOKEN', '')
     # Persistence - HF persistent storage
-    DATA_DIR = '/data' if os.path.exists('/data') else './data'
-    os.makedirs(DATA_DIR, exist_ok=True)
     # Lead generation
-    LEAD_EMAIL = "petter2025us@outlook.com"
-    CALENDLY_URL = "https://calendly.com/petter2025us/arf-demo"
     # Webhook for lead alerts (set in HF secrets)
-    SLACK_WEBHOOK = os.environ.get('SLACK_WEBHOOK', '')
-    SENDGRID_API_KEY = os.environ.get('SENDGRID_API_KEY', '')
     # Security
-    API_KEY = os.environ.get('ARF_API_KEY', str(uuid.uuid4()))
     # ARF defaults
-    DEFAULT_CONFIDENCE_THRESHOLD = 0.9
-    DEFAULT_MAX_RISK = "MEDIUM"
 settings = Settings()
@@ -98,54 +112,58 @@ class BayesianRiskEngine:
     def __init__(self):
         # Beta-Binomial conjugate prior
-        # Prior represents belief about risk before seeing evidence
-        self.prior_alpha = 2.0  # Pseudocounts for "safe" outcomes
-        self.prior_beta = 5.0    # Pseudocounts for "risky" outcomes
-        # Action type priors (learned from industry data)
         self.action_priors = {
-            'database': {'alpha': 1.5, 'beta': 8.0},    # DB ops are risky
-            'network': {'alpha': 3.0, 'beta': 4.0},     # Network ops medium risk
-            'compute': {'alpha': 4.0, 'beta': 3.0},      # Compute ops safer
-            'security': {'alpha': 2.0, 'beta': 6.0},     # Security ops risky
             'default': {'alpha': 2.0, 'beta': 5.0}
         }
-        # Load historical evidence from persistent storage
         self.evidence_db = f"{settings.DATA_DIR}/evidence.db"
         self._init_db()
     def _init_db(self):
         """Initialize SQLite DB for evidence storage"""
-        with self._get_db() as conn:
-            conn.execute('''
-                CREATE TABLE IF NOT EXISTS evidence (
-                    id TEXT PRIMARY KEY,
-                    action_type TEXT,
-                    action_hash TEXT,
-                    success INTEGER,
-                    total INTEGER,
-                    timestamp TEXT,
-                    metadata TEXT
-                )
-            ''')
-            conn.execute('''
-                CREATE INDEX IF NOT EXISTS idx_action_hash
-                ON evidence(action_hash)
-            ''')
     @contextmanager
     def _get_db(self):
-        conn = sqlite3.connect(self.evidence_db)
         try:
             yield conn
         finally:
-            conn.close()
     def classify_action(self, action_text: str) -> str:
-        """Classify action type for appropriate prior"""
         action_lower = action_text.lower()
         if any(word in action_lower for word in ['database', 'db', 'sql', 'table', 'drop', 'delete']):
             return 'database'
         elif any(word in action_lower for word in ['network', 'firewall', 'load balancer']):
@@ -158,27 +176,26 @@ class BayesianRiskEngine:
             return 'default'
     def get_prior(self, action_type: str) -> Tuple[float, float]:
-        """Get prior parameters for action type"""
         prior = self.action_priors.get(action_type, self.action_priors['default'])
         return prior['alpha'], prior['beta']
     def get_evidence(self, action_hash: str) -> Tuple[int, int]:
-        """Get historical evidence for similar actions"""
-        with self._get_db() as conn:
-            cursor = conn.execute(
-                'SELECT SUM(success), SUM(total) FROM evidence WHERE action_hash = ?',
-                (action_hash[:50],)
-            )
-            row = cursor.fetchone()
-            return (row[0] or 0, row[1] or 0) if row else (0, 0)
     def calculate_posterior(self,
                            action_text: str,
                            context: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        True Bayesian posterior calculation
-        P(risk | action, context) ∝ P(action, context | risk) * P(risk)
-        """
         # 1. Classify action for appropriate prior
         action_type = self.classify_action(action_text)
         alpha0, beta0 = self.get_prior(action_type)
@@ -201,8 +218,7 @@ class BayesianRiskEngine:
         risk_score = posterior_mean * context_multiplier
         risk_score = min(0.99, max(0.01, risk_score))
-        # 7. 95% credible interval (Beta distribution quantiles)
-        # Using approximation for computational efficiency
         variance = (alpha_n * beta_n) / ((alpha_n + beta_n)**2 * (alpha_n + beta_n + 1))
         std_dev = variance ** 0.5
         ci_lower = max(0.01, posterior_mean - 1.96 * std_dev)
@@ -234,60 +250,46 @@ class BayesianRiskEngine:
         }
     def _context_likelihood(self, context: Dict) -> float:
-        """Calculate likelihood multiplier from context"""
         multiplier = 1.0
-        # Environment
         if context.get('environment') == 'production':
             multiplier *= 1.5
         elif context.get('environment') == 'staging':
             multiplier *= 0.8
-        # Time
         hour = datetime.now().hour
-        if hour < 6 or hour > 22:  # Off-hours
             multiplier *= 1.3
-        # User seniority
         if context.get('user_role') == 'junior':
             multiplier *= 1.4
         elif context.get('user_role') == 'senior':
             multiplier *= 0.9
-        # Backup status
         if not context.get('backup_available', True):
             multiplier *= 1.6
         return multiplier
     def record_outcome(self, action_text: str, success: bool):
-        """Record actual outcome for future Bayesian updates"""
         action_hash = hashlib.sha256(action_text.encode()).hexdigest()
         action_type = self.classify_action(action_text)
-        with self._get_db() as conn:
-            conn.execute('''
-                INSERT INTO evidence (id, action_type, action_hash, success, total, timestamp)
-                VALUES (?, ?, ?, ?, ?, ?)
-            ''', (
-                str(uuid.uuid4()),
-                action_type,
-                action_hash[:50],
-                1 if success else 0,
-                1,
-                datetime.utcnow().isoformat()
-            ))
-            conn.commit()
-        logger.info(f"Recorded outcome for {action_type}: success={success}")
 # ============== POLICY ENGINE ==============
 class PolicyEngine:
-    """
-    Deterministic OSS policies - advisory only
-    Matches ARF OSS healing_policies.py
-    """
     def __init__(self):
         self.config = {
             "confidence_threshold": settings.DEFAULT_CONFIDENCE_THRESHOLD,
@@ -316,10 +318,7 @@ class PolicyEngine:
                 action: str,
                 risk: Dict[str, Any],
                 confidence: float) -> Dict[str, Any]:
-        """
-        Evaluate action against policies
-        Returns gate results and final decision
-        """
         gates = []
         # Gate 1: Confidence threshold
@@ -391,7 +390,6 @@ class PolicyEngine:
         # Overall decision
         all_passed = all(g["passed"] for g in gates)
-        # Determine required level
         if not all_passed:
             required_level = ExecutionLevel.OPERATOR_REVIEW
         elif risk["level"] == RiskLevel.LOW:
@@ -410,7 +408,6 @@ class PolicyEngine:
         }
     def update_config(self, key: str, value: Any):
-        """Live policy updates"""
         if key in self.config:
             self.config[key] = value
             logger.info(f"Policy updated: {key} = {value}")
@@ -419,83 +416,76 @@ class PolicyEngine:
 # ============== RAG MEMORY WITH PERSISTENCE ==============
 class RAGMemory:
-    """
-    Persistent RAG memory using SQLite + vector embeddings
-    Survives HF Space restarts
-    """
     def __init__(self):
         self.db_path = f"{settings.DATA_DIR}/memory.db"
         self._init_db()
         self.embedding_cache = {}
     def _init_db(self):
-        """Initialize memory tables"""
-        with self._get_db() as conn:
-            # Incidents table
-            conn.execute('''
-                CREATE TABLE IF NOT EXISTS incidents (
-                    id TEXT PRIMARY KEY,
-                    action TEXT,
-                    action_hash TEXT,
-                    risk_score REAL,
-                    risk_level TEXT,
-                    confidence REAL,
-                    allowed BOOLEAN,
-                    gates TEXT,
-                    timestamp TEXT,
-                    embedding TEXT
-                )
-            ''')
-            # Enterprise signals table
-            conn.execute('''
-                CREATE TABLE IF NOT EXISTS signals (
-                    id TEXT PRIMARY KEY,
-                    signal_type TEXT,
-                    action TEXT,
-                    risk_score REAL,
-                    metadata TEXT,
-                    timestamp TEXT,
-                    contacted BOOLEAN DEFAULT 0
-                )
-            ''')
-            # Create indexes
-            conn.execute('CREATE INDEX IF NOT EXISTS idx_action_hash ON incidents(action_hash)')
-            conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_type ON signals(signal_type)')
-            conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_contacted ON signals(contacted)')
     @contextmanager
     def _get_db(self):
-        conn = sqlite3.connect(self.db_path)
-        conn.row_factory = sqlite3.Row
         try:
             yield conn
         finally:
-            conn.close()
     def _simple_embedding(self, text: str) -> List[float]:
-        """Simple bag-of-words embedding for demo"""
-        # Cache embeddings
         if text in self.embedding_cache:
             return self.embedding_cache[text]
-        # Simple character trigram embedding
         words = text.lower().split()
         trigrams = set()
         for word in words:
             for i in range(len(word) - 2):
                 trigrams.add(word[i:i+3])
-        # Convert to fixed-size vector (simplified)
-        # In production, use sentence-transformers
         vector = [hash(t) % 1000 / 1000.0 for t in sorted(trigrams)[:100]]
-        # Pad to fixed length
         while len(vector) < 100:
             vector.append(0.0)
         vector = vector[:100]
         self.embedding_cache[text] = vector
         return vector
@@ -506,77 +496,67 @@ class RAGMemory:
                       confidence: float,
                       allowed: bool,
                       gates: List[Dict]):
-        """Store incident in persistent memory"""
         action_hash = hashlib.sha256(action.encode()).hexdigest()[:50]
         embedding = json.dumps(self._simple_embedding(action))
-        with self._get_db() as conn:
-            conn.execute('''
-                INSERT INTO incidents
-                (id, action, action_hash, risk_score, risk_level, confidence, allowed, gates, timestamp, embedding)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            ''', (
-                str(uuid.uuid4()),
-                action[:500],
-                action_hash,
-                risk_score,
-                risk_level.value,
-                confidence,
-                1 if allowed else 0,
-                json.dumps(gates),
-                datetime.utcnow().isoformat(),
-                embedding
-            ))
-            conn.commit()
     def find_similar(self, action: str, limit: int = 5) -> List[Dict]:
-        """Find similar incidents using cosine similarity"""
         query_embedding = self._simple_embedding(action)
-        with self._get_db() as conn:
-            # Get all recent incidents
-            cursor = conn.execute('''
-                SELECT * FROM incidents
-                ORDER BY timestamp DESC
-                LIMIT 100
-            ''')
-            incidents = []
-            for row in cursor.fetchall():
-                stored_embedding = json.loads(row['embedding'])
-                # Cosine similarity
-                dot = sum(q * s for q, s in zip(query_embedding, stored_embedding))
-                norm_q = sum(q*q for q in query_embedding) ** 0.5
-                norm_s = sum(s*s for s in stored_embedding) ** 0.5
-                if norm_q > 0 and norm_s > 0:
-                    similarity = dot / (norm_q * norm_s)
-                else:
-                    similarity = 0
-                incidents.append({
-                    'id': row['id'],
-                    'action': row['action'],
-                    'risk_score': row['risk_score'],
-                    'risk_level': row['risk_level'],
-                    'confidence': row['confidence'],
-                    'allowed': bool(row['allowed']),
-                    'timestamp': row['timestamp'],
-                    'similarity': similarity
-                })
-            # Sort by similarity and return top k
-            incidents.sort(key=lambda x: x['similarity'], reverse=True)
-            return incidents[:limit]
     def track_enterprise_signal(self,
                                signal_type: LeadSignal,
                                action: str,
                                risk_score: float,
                                metadata: Dict = None):
-        """Track enterprise interest signals with persistence"""
         signal = {
             'id': str(uuid.uuid4()),
             'signal_type': signal_type.value,
@@ -586,35 +566,32 @@ class RAGMemory:
             'timestamp': datetime.utcnow().isoformat(),
             'contacted': 0
         }
-        with self._get_db() as conn:
-            conn.execute('''
-                INSERT INTO signals
-                (id, signal_type, action, risk_score, metadata, timestamp, contacted)
-                VALUES (?, ?, ?, ?, ?, ?, ?)
-            ''', (
-                signal['id'],
-                signal['signal_type'],
-                signal['action'],
-                signal['risk_score'],
-                signal['metadata'],
-                signal['timestamp'],
-                signal['contacted']
-            ))
-            conn.commit()
         logger.info(f"🔔 Enterprise signal: {signal_type.value} - {action[:50]}...")
-        # Trigger immediate notification for high-value signals
         if signal_type in [LeadSignal.HIGH_RISK_BLOCKED, LeadSignal.NOVEL_ACTION]:
             self._notify_sales_team(signal)
         return signal
     def _notify_sales_team(self, signal: Dict):
-        """Real-time notification to sales team"""
-        # Slack webhook
         if settings.SLACK_WEBHOOK:
             try:
                 requests.post(settings.SLACK_WEBHOOK, json={
@@ -624,49 +601,52 @@ class RAGMemory:
                            f"Risk Score: {signal['risk_score']:.2f}\n"
                            f"Time: {signal['timestamp']}\n"
                            f"Contact: {settings.LEAD_EMAIL}"
-                })
-            except:
-                pass
-        # Email via SendGrid (if configured)
-        if settings.SENDGRID_API_KEY:
-            # Send email logic here
-            pass
     def get_uncontacted_signals(self) -> List[Dict]:
-        """Get signals that haven't been followed up"""
-        with self._get_db() as conn:
-            cursor = conn.execute('''
-                SELECT * FROM signals
-                WHERE contacted = 0
-                ORDER BY timestamp DESC
-            ''')
-            signals = []
-            for row in cursor.fetchall():
-                signals.append({
-                    'id': row['id'],
-                    'signal_type': row['signal_type'],
-                    'action': row['action'],
-                    'risk_score': row['risk_score'],
-                    'metadata': json.loads(row['metadata']),
-                    'timestamp': row['timestamp']
-                })
-            return signals
     def mark_contacted(self, signal_id: str):
-        """Mark signal as contacted"""
-        with self._get_db() as conn:
-            conn.execute('UPDATE signals SET contacted = 1 WHERE id = ?', (signal_id,))
-            conn.commit()
 # ============== AUTHENTICATION ==============
 security = HTTPBearer()
-def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
-    """Simple API key authentication for enterprise endpoints"""
     if credentials.credentials != settings.API_KEY:
-        raise HTTPException(status_code=403, detail="Invalid API key")
     return credentials.credentials
 # ============== PYDANTIC MODELS ==============
@@ -680,7 +660,6 @@ class ActionRequest(BaseModel):
     user_role: str = "devops"
     session_id: Optional[str] = None
-    # FIXED: Using Pydantic V2 field_validator instead of deprecated validator
     @field_validator('proposedAction')
     @classmethod
     def validate_action(cls, v: str) -> str:
@@ -742,10 +721,22 @@ risk_engine = BayesianRiskEngine()
 policy_engine = PolicyEngine()
 memory = RAGMemory()
-# ============== API ENDPOINTS ==============
-@app.get("/api/v1/config")
 async def get_config():
-    """Get current ARF configuration"""
     return {
         "confidenceThreshold": policy_engine.config["confidence_threshold"],
         "maxAutonomousRisk": policy_engine.config["max_autonomous_risk"],
@@ -754,20 +745,19 @@ async def get_config():
         "edition": "OSS"
     }
-@app.post("/api/v1/config")
 async def update_config(config: ConfigUpdateRequest):
-    """Update ARF configuration (live)"""
     if config.confidenceThreshold:
         policy_engine.update_config("confidence_threshold", config.confidenceThreshold)
     if config.maxAutonomousRisk:
         policy_engine.update_config("max_autonomous_risk", config.maxAutonomousRisk.value)
     return await get_config()
-@app.post("/api/v1/evaluate", response_model=EvaluationResponse)
 async def evaluate_action(request: ActionRequest):
     """
-    Real ARF OSS evaluation pipeline
-    Used by Replit UI frontend
     """
     try:
         # Build context
@@ -860,75 +850,66 @@ async def evaluate_action(request: ActionRequest):
     except Exception as e:
         logger.error(f"Evaluation failed: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/v1/enterprise/signals", dependencies=[Depends(verify_api_key)])
 async def get_enterprise_signals(contacted: bool = False):
     """
     Get enterprise lead signals (protected endpoint)
-    Requires API key from HF secrets
     """
-    if contacted:
-        signals = memory.get_uncontacted_signals()
-    else:
-        # Get all signals from last 30 days
-        with memory._get_db() as conn:
-            cursor = conn.execute('''
-                SELECT * FROM signals
-                WHERE datetime(timestamp) > datetime('now', '-30 days')
-                ORDER BY timestamp DESC
-            ''')
-            signals = []
-            for row in cursor.fetchall():
-                signals.append({
-                    'id': row['id'],
-                    'signal_type': row['signal_type'],
-                    'action': row['action'],
-                    'risk_score': row['risk_score'],
-                    'metadata': json.loads(row['metadata']),
-                    'timestamp': row['timestamp'],
-                    'contacted': bool(row['contacted'])
-                })
-    return {"signals": signals, "count": len(signals)}
-@app.post("/api/v1/enterprise/signals/{signal_id}/contact")
 async def mark_signal_contacted(signal_id: str):
-    """Mark a lead signal as contacted"""
     memory.mark_contacted(signal_id)
     return {"status": "success", "message": "Signal marked as contacted"}
-@app.get("/api/v1/memory/similar")
 async def get_similar_actions(action: str, limit: int = 5):
-    """Find similar historical actions"""
     similar = memory.find_similar(action, limit=limit)
     return {"similar": similar, "count": len(similar)}
-@app.post("/api/v1/feedback")
 async def record_outcome(action: str, success: bool):
     """
-    Record actual outcome for Bayesian updating
-    This is how ARF learns
     """
     risk_engine.record_outcome(action, success)
     return {"status": "success", "message": "Outcome recorded"}
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "version": "3.3.9",
-        "edition": "OSS",
-        "memory_entries": len(memory.get_uncontacted_signals()),
-        "timestamp": datetime.utcnow().isoformat()
-    }
 # ============== GRADIO LEAD GENERATION UI ==============
 def create_lead_gen_ui():
-    """Professional lead generation interface"""
-    # FIXED: Moved theme and css to launch() method
     with gr.Blocks(title="ARF OSS - Enterprise Reliability Intelligence") as ui:
         # Header
@@ -991,7 +972,7 @@ def create_lead_gen_ui():
                 </div>
                 """)
-        # Live Demo Stats - FIXED: Removed 'every' parameter for Gradio 4.x
         demo_stats = gr.JSON(
             label="📊 Live Demo Statistics",
             value={
@@ -1055,9 +1036,14 @@ app = mount_gradio_app(app, gradio_ui, path="/")
 if __name__ == "__main__":
     import uvicorn
-    # ✅ Use PORT environment variable (defaults to 7860 for HF Spaces)
     port = int(os.environ.get('PORT', 7860))
     logger.info("="*60)
     logger.info("🚀 ARF OSS v3.3.9 Starting")
     logger.info(f"📊 Data directory: {settings.DATA_DIR}")
@@ -1066,10 +1052,10 @@ if __name__ == "__main__":
     logger.info(f"🌐 Serving at: http://0.0.0.0:{port}")
     logger.info("="*60)
-    # ✅ Run on the correct port
     uvicorn.run(
-        app,
         host="0.0.0.0",
         port=port,
-        log_level="info"
     )

 # 🔥 CRITICAL: Force Gradio to use port 7860 for Hugging Face Spaces
 os.environ['GRADIO_SERVER_PORT'] = '7860'
 os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0'
+# 🔥 Prevent Gradio from auto-launching its own server
+os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 import json
 import uuid
 from enum import Enum
 import gradio as gr
+# 🔥 Close any existing Gradio instances immediately after import
+gr.close_all()
+from fastapi import FastAPI, HTTPException, Depends, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel, Field, field_validator
+from pydantic_settings import BaseSettings  # <-- NEW: Pydantic settings
 from gradio import mount_gradio_app
+# ============== CONFIGURATION (Pydantic) ==============
+class Settings(BaseSettings):
+    """Centralized configuration using Pydantic Settings"""
     # Hugging Face settings
+    HF_SPACE_ID: str = Field(default='local', env='SPACE_ID')
+    HF_TOKEN: str = Field(default='', env='HF_TOKEN')
     # Persistence - HF persistent storage
+    DATA_DIR: str = Field(default='/data' if os.path.exists('/data') else './data')
     # Lead generation
+    LEAD_EMAIL: str = "petter2025us@outlook.com"
+    CALENDLY_URL: str = "https://calendly.com/petter2025us/arf-demo"
     # Webhook for lead alerts (set in HF secrets)
+    SLACK_WEBHOOK: str = Field(default='', env='SLACK_WEBHOOK')
+    SENDGRID_API_KEY: str = Field(default='', env='SENDGRID_API_KEY')
     # Security
+    API_KEY: str = Field(default_factory=lambda: str(uuid.uuid4()), env='ARF_API_KEY')
     # ARF defaults
+    DEFAULT_CONFIDENCE_THRESHOLD: float = 0.9
+    DEFAULT_MAX_RISK: str = "MEDIUM"
+    class Config:
+        env_file = '.env'  # optionally load from .env file
+        extra = 'ignore'   # ignore extra env vars
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # Ensure data directory exists
+        os.makedirs(self.DATA_DIR, exist_ok=True)
 settings = Settings()
     def __init__(self):
         # Beta-Binomial conjugate prior
+        self.prior_alpha = 2.0
+        self.prior_beta = 5.0
         self.action_priors = {
+            'database': {'alpha': 1.5, 'beta': 8.0},
+            'network': {'alpha': 3.0, 'beta': 4.0},
+            'compute': {'alpha': 4.0, 'beta': 3.0},
+            'security': {'alpha': 2.0, 'beta': 6.0},
             'default': {'alpha': 2.0, 'beta': 5.0}
         }
         self.evidence_db = f"{settings.DATA_DIR}/evidence.db"
         self._init_db()
     def _init_db(self):
         """Initialize SQLite DB for evidence storage"""
+        try:
+            with self._get_db() as conn:
+                conn.execute('''
+                    CREATE TABLE IF NOT EXISTS evidence (
+                        id TEXT PRIMARY KEY,
+                        action_type TEXT,
+                        action_hash TEXT,
+                        success INTEGER,
+                        total INTEGER,
+                        timestamp TEXT,
+                        metadata TEXT
+                    )
+                ''')
+                conn.execute('''
+                    CREATE INDEX IF NOT EXISTS idx_action_hash
+                    ON evidence(action_hash)
+                ''')
+        except sqlite3.Error as e:
+            logger.error(f"Failed to initialize evidence database: {e}")
+            raise RuntimeError("Could not initialize evidence storage") from e
     @contextmanager
     def _get_db(self):
+        conn = None
         try:
+            conn = sqlite3.connect(self.evidence_db)
             yield conn
+        except sqlite3.Error as e:
+            logger.error(f"Database error: {e}")
+            raise
         finally:
+            if conn:
+                conn.close()
     def classify_action(self, action_text: str) -> str:
         action_lower = action_text.lower()
         if any(word in action_lower for word in ['database', 'db', 'sql', 'table', 'drop', 'delete']):
             return 'database'
         elif any(word in action_lower for word in ['network', 'firewall', 'load balancer']):
             return 'default'
     def get_prior(self, action_type: str) -> Tuple[float, float]:
         prior = self.action_priors.get(action_type, self.action_priors['default'])
         return prior['alpha'], prior['beta']
     def get_evidence(self, action_hash: str) -> Tuple[int, int]:
+        try:
+            with self._get_db() as conn:
+                cursor = conn.execute(
+                    'SELECT SUM(success), SUM(total) FROM evidence WHERE action_hash = ?',
+                    (action_hash[:50],)
+                )
+                row = cursor.fetchone()
+                return (row[0] or 0, row[1] or 0) if row else (0, 0)
+        except sqlite3.Error as e:
+            logger.error(f"Failed to retrieve evidence: {e}")
+            return (0, 0)  # fallback to no evidence
     def calculate_posterior(self,
                            action_text: str,
                            context: Dict[str, Any]) -> Dict[str, Any]:
+        # ... (same as before, no changes needed) ...
         # 1. Classify action for appropriate prior
         action_type = self.classify_action(action_text)
         alpha0, beta0 = self.get_prior(action_type)
         risk_score = posterior_mean * context_multiplier
         risk_score = min(0.99, max(0.01, risk_score))
+        # 7. 95% credible interval (approximation)
         variance = (alpha_n * beta_n) / ((alpha_n + beta_n)**2 * (alpha_n + beta_n + 1))
         std_dev = variance ** 0.5
         ci_lower = max(0.01, posterior_mean - 1.96 * std_dev)
         }
     def _context_likelihood(self, context: Dict) -> float:
         multiplier = 1.0
         if context.get('environment') == 'production':
             multiplier *= 1.5
         elif context.get('environment') == 'staging':
             multiplier *= 0.8
         hour = datetime.now().hour
+        if hour < 6 or hour > 22:
             multiplier *= 1.3
         if context.get('user_role') == 'junior':
             multiplier *= 1.4
         elif context.get('user_role') == 'senior':
             multiplier *= 0.9
         if not context.get('backup_available', True):
             multiplier *= 1.6
         return multiplier
     def record_outcome(self, action_text: str, success: bool):
         action_hash = hashlib.sha256(action_text.encode()).hexdigest()
         action_type = self.classify_action(action_text)
+        try:
+            with self._get_db() as conn:
+                conn.execute('''
+                    INSERT INTO evidence (id, action_type, action_hash, success, total, timestamp)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                ''', (
+                    str(uuid.uuid4()),
+                    action_type,
+                    action_hash[:50],
+                    1 if success else 0,
+                    1,
+                    datetime.utcnow().isoformat()
+                ))
+                conn.commit()
+            logger.info(f"Recorded outcome for {action_type}: success={success}")
+        except sqlite3.Error as e:
+            logger.error(f"Failed to record outcome: {e}")
 # ============== POLICY ENGINE ==============
 class PolicyEngine:
+    # ... (unchanged) ...
     def __init__(self):
         self.config = {
             "confidence_threshold": settings.DEFAULT_CONFIDENCE_THRESHOLD,
                 action: str,
                 risk: Dict[str, Any],
                 confidence: float) -> Dict[str, Any]:
+        # ... unchanged ...
         gates = []
         # Gate 1: Confidence threshold
         # Overall decision
         all_passed = all(g["passed"] for g in gates)
         if not all_passed:
             required_level = ExecutionLevel.OPERATOR_REVIEW
         elif risk["level"] == RiskLevel.LOW:
         }
     def update_config(self, key: str, value: Any):
         if key in self.config:
             self.config[key] = value
             logger.info(f"Policy updated: {key} = {value}")
 # ============== RAG MEMORY WITH PERSISTENCE ==============
 class RAGMemory:
+    # ... (unchanged except error handling) ...
     def __init__(self):
         self.db_path = f"{settings.DATA_DIR}/memory.db"
         self._init_db()
         self.embedding_cache = {}
     def _init_db(self):
+        try:
+            with self._get_db() as conn:
+                conn.execute('''
+                    CREATE TABLE IF NOT EXISTS incidents (
+                        id TEXT PRIMARY KEY,
+                        action TEXT,
+                        action_hash TEXT,
+                        risk_score REAL,
+                        risk_level TEXT,
+                        confidence REAL,
+                        allowed BOOLEAN,
+                        gates TEXT,
+                        timestamp TEXT,
+                        embedding TEXT
+                    )
+                ''')
+                conn.execute('''
+                    CREATE TABLE IF NOT EXISTS signals (
+                        id TEXT PRIMARY KEY,
+                        signal_type TEXT,
+                        action TEXT,
+                        risk_score REAL,
+                        metadata TEXT,
+                        timestamp TEXT,
+                        contacted BOOLEAN DEFAULT 0
+                    )
+                ''')
+                conn.execute('CREATE INDEX IF NOT EXISTS idx_action_hash ON incidents(action_hash)')
+                conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_type ON signals(signal_type)')
+                conn.execute('CREATE INDEX IF NOT EXISTS idx_signal_contacted ON signals(contacted)')
+        except sqlite3.Error as e:
+            logger.error(f"Failed to initialize memory database: {e}")
+            raise RuntimeError("Could not initialize memory storage") from e
     @contextmanager
     def _get_db(self):
+        conn = None
         try:
+            conn = sqlite3.connect(self.db_path)
+            conn.row_factory = sqlite3.Row
             yield conn
+        except sqlite3.Error as e:
+            logger.error(f"Database error in memory: {e}")
+            raise
         finally:
+            if conn:
+                conn.close()
     def _simple_embedding(self, text: str) -> List[float]:
+        # ... unchanged ...
         if text in self.embedding_cache:
             return self.embedding_cache[text]
         words = text.lower().split()
         trigrams = set()
         for word in words:
             for i in range(len(word) - 2):
                 trigrams.add(word[i:i+3])
         vector = [hash(t) % 1000 / 1000.0 for t in sorted(trigrams)[:100]]
         while len(vector) < 100:
             vector.append(0.0)
         vector = vector[:100]
         self.embedding_cache[text] = vector
         return vector
                       confidence: float,
                       allowed: bool,
                       gates: List[Dict]):
         action_hash = hashlib.sha256(action.encode()).hexdigest()[:50]
         embedding = json.dumps(self._simple_embedding(action))
+        try:
+            with self._get_db() as conn:
+                conn.execute('''
+                    INSERT INTO incidents
+                    (id, action, action_hash, risk_score, risk_level, confidence, allowed, gates, timestamp, embedding)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                ''', (
+                    str(uuid.uuid4()),
+                    action[:500],
+                    action_hash,
+                    risk_score,
+                    risk_level.value,
+                    confidence,
+                    1 if allowed else 0,
+                    json.dumps(gates),
+                    datetime.utcnow().isoformat(),
+                    embedding
+                ))
+                conn.commit()
+        except sqlite3.Error as e:
+            logger.error(f"Failed to store incident: {e}")
     def find_similar(self, action: str, limit: int = 5) -> List[Dict]:
         query_embedding = self._simple_embedding(action)
+        try:
+            with self._get_db() as conn:
+                cursor = conn.execute('''
+                    SELECT * FROM incidents
+                    ORDER BY timestamp DESC
+                    LIMIT 100
+                ''')
+                incidents = []
+                for row in cursor.fetchall():
+                    stored_embedding = json.loads(row['embedding'])
+                    dot = sum(q * s for q, s in zip(query_embedding, stored_embedding))
+                    norm_q = sum(q*q for q in query_embedding) ** 0.5
+                    norm_s = sum(s*s for s in stored_embedding) ** 0.5
+                    similarity = dot / (norm_q * norm_s) if (norm_q > 0 and norm_s > 0) else 0
+                    incidents.append({
+                        'id': row['id'],
+                        'action': row['action'],
+                        'risk_score': row['risk_score'],
+                        'risk_level': row['risk_level'],
+                        'confidence': row['confidence'],
+                        'allowed': bool(row['allowed']),
+                        'timestamp': row['timestamp'],
+                        'similarity': similarity
+                    })
+                incidents.sort(key=lambda x: x['similarity'], reverse=True)
+                return incidents[:limit]
+        except sqlite3.Error as e:
+            logger.error(f"Failed to find similar incidents: {e}")
+            return []
     def track_enterprise_signal(self,
                                signal_type: LeadSignal,
                                action: str,
                                risk_score: float,
                                metadata: Dict = None):
         signal = {
             'id': str(uuid.uuid4()),
             'signal_type': signal_type.value,
             'timestamp': datetime.utcnow().isoformat(),
             'contacted': 0
         }
+        try:
+            with self._get_db() as conn:
+                conn.execute('''
+                    INSERT INTO signals
+                    (id, signal_type, action, risk_score, metadata, timestamp, contacted)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                ''', (
+                    signal['id'],
+                    signal['signal_type'],
+                    signal['action'],
+                    signal['risk_score'],
+                    signal['metadata'],
+                    signal['timestamp'],
+                    signal['contacted']
+                ))
+                conn.commit()
+        except sqlite3.Error as e:
+            logger.error(f"Failed to track signal: {e}")
+            return None
         logger.info(f"🔔 Enterprise signal: {signal_type.value} - {action[:50]}...")
         if signal_type in [LeadSignal.HIGH_RISK_BLOCKED, LeadSignal.NOVEL_ACTION]:
             self._notify_sales_team(signal)
         return signal
     def _notify_sales_team(self, signal: Dict):
         if settings.SLACK_WEBHOOK:
             try:
                 requests.post(settings.SLACK_WEBHOOK, json={
                            f"Risk Score: {signal['risk_score']:.2f}\n"
                            f"Time: {signal['timestamp']}\n"
                            f"Contact: {settings.LEAD_EMAIL}"
+                }, timeout=5)
+            except requests.RequestException as e:
+                logger.error(f"Slack notification failed: {e}")
+        # Email via SendGrid (if configured) could be added similarly
     def get_uncontacted_signals(self) -> List[Dict]:
+        try:
+            with self._get_db() as conn:
+                cursor = conn.execute('''
+                    SELECT * FROM signals
+                    WHERE contacted = 0
+                    ORDER BY timestamp DESC
+                ''')
+                signals = []
+                for row in cursor.fetchall():
+                    signals.append({
+                        'id': row['id'],
+                        'signal_type': row['signal_type'],
+                        'action': row['action'],
+                        'risk_score': row['risk_score'],
+                        'metadata': json.loads(row['metadata']),
+                        'timestamp': row['timestamp']
+                    })
+                return signals
+        except sqlite3.Error as e:
+            logger.error(f"Failed to get uncontacted signals: {e}")
+            return []
     def mark_contacted(self, signal_id: str):
+        try:
+            with self._get_db() as conn:
+                conn.execute('UPDATE signals SET contacted = 1 WHERE id = ?', (signal_id,))
+                conn.commit()
+        except sqlite3.Error as e:
+            logger.error(f"Failed to mark signal as contacted: {e}")
 # ============== AUTHENTICATION ==============
 security = HTTPBearer()
+async def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Verify API key for protected endpoints"""
     if credentials.credentials != settings.API_KEY:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Invalid API key"
+        )
     return credentials.credentials
 # ============== PYDANTIC MODELS ==============
     user_role: str = "devops"
     session_id: Optional[str] = None
     @field_validator('proposedAction')
     @classmethod
     def validate_action(cls, v: str) -> str:
 policy_engine = PolicyEngine()
 memory = RAGMemory()
+# ============== API ENDPOINTS (with authentication) ==============
+@app.get("/health")
+async def health_check():
+    """Public health check endpoint (no auth required)"""
+    return {
+        "status": "healthy",
+        "version": "3.3.9",
+        "edition": "OSS",
+        "memory_entries": len(memory.get_uncontacted_signals()),
+        "timestamp": datetime.utcnow().isoformat()
+    }
+@app.get("/api/v1/config", dependencies=[Depends(verify_api_key)])
 async def get_config():
+    """Get current ARF configuration (protected)"""
     return {
         "confidenceThreshold": policy_engine.config["confidence_threshold"],
         "maxAutonomousRisk": policy_engine.config["max_autonomous_risk"],
         "edition": "OSS"
     }
+@app.post("/api/v1/config", dependencies=[Depends(verify_api_key)])
 async def update_config(config: ConfigUpdateRequest):
+    """Update ARF configuration (protected)"""
     if config.confidenceThreshold:
         policy_engine.update_config("confidence_threshold", config.confidenceThreshold)
     if config.maxAutonomousRisk:
         policy_engine.update_config("max_autonomous_risk", config.maxAutonomousRisk.value)
     return await get_config()
+@app.post("/api/v1/evaluate", dependencies=[Depends(verify_api_key)], response_model=EvaluationResponse)
 async def evaluate_action(request: ActionRequest):
     """
+    Real ARF OSS evaluation pipeline (protected)
     """
     try:
         # Build context
     except Exception as e:
         logger.error(f"Evaluation failed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Internal server error during evaluation"
+        )
 @app.get("/api/v1/enterprise/signals", dependencies=[Depends(verify_api_key)])
 async def get_enterprise_signals(contacted: bool = False):
     """
     Get enterprise lead signals (protected endpoint)
     """
+    try:
+        if contacted:
+            signals = memory.get_uncontacted_signals()
+        else:
+            with memory._get_db() as conn:
+                cursor = conn.execute('''
+                    SELECT * FROM signals
+                    WHERE datetime(timestamp) > datetime('now', '-30 days')
+                    ORDER BY timestamp DESC
+                ''')
+                signals = []
+                for row in cursor.fetchall():
+                    signals.append({
+                        'id': row['id'],
+                        'signal_type': row['signal_type'],
+                        'action': row['action'],
+                        'risk_score': row['risk_score'],
+                        'metadata': json.loads(row['metadata']),
+                        'timestamp': row['timestamp'],
+                        'contacted': bool(row['contacted'])
+                    })
+        return {"signals": signals, "count": len(signals)}
+    except Exception as e:
+        logger.error(f"Failed to retrieve signals: {e}")
+        raise HTTPException(status_code=500, detail="Could not retrieve signals")
+@app.post("/api/v1/enterprise/signals/{signal_id}/contact", dependencies=[Depends(verify_api_key)])
 async def mark_signal_contacted(signal_id: str):
+    """Mark a lead signal as contacted (protected)"""
     memory.mark_contacted(signal_id)
     return {"status": "success", "message": "Signal marked as contacted"}
+@app.get("/api/v1/memory/similar", dependencies=[Depends(verify_api_key)])
 async def get_similar_actions(action: str, limit: int = 5):
+    """Find similar historical actions (protected)"""
     similar = memory.find_similar(action, limit=limit)
     return {"similar": similar, "count": len(similar)}
+@app.post("/api/v1/feedback", dependencies=[Depends(verify_api_key)])
 async def record_outcome(action: str, success: bool):
     """
+    Record actual outcome for Bayesian updating (protected)
     """
     risk_engine.record_outcome(action, success)
     return {"status": "success", "message": "Outcome recorded"}
 # ============== GRADIO LEAD GENERATION UI ==============
 def create_lead_gen_ui():
+    """Professional lead generation interface (no auth needed for UI)"""
+    # ... (unchanged) ...
     with gr.Blocks(title="ARF OSS - Enterprise Reliability Intelligence") as ui:
         # Header
                 </div>
                 """)
+        # Live Demo Stats
         demo_stats = gr.JSON(
             label="📊 Live Demo Statistics",
             value={
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get('PORT', 7860))
+    # 🔥 Ensure any lingering Gradio servers are closed before starting
+    try:
+        gr.close_all()
+    except:
+        pass
     logger.info("="*60)
     logger.info("🚀 ARF OSS v3.3.9 Starting")
     logger.info(f"📊 Data directory: {settings.DATA_DIR}")
     logger.info(f"🌐 Serving at: http://0.0.0.0:{port}")
     logger.info("="*60)
     uvicorn.run(
+        "hf_demo:app",
         host="0.0.0.0",
         port=port,
+        log_level="info",
+        reload=False
     )