Spaces:

A-R-F
/

Agentic-Reliability-Framework-API

Running

App Files Files Community

petter2025 commited on 14 days ago

Commit

f6fe53e

verified ·

1 Parent(s): b635d04

Update hf_demo.py

Browse files

Files changed (1) hide show

hf_demo.py +122 -92

hf_demo.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # hf_demo.py – ARF v4 dashboard for Hugging Face Spaces
 import logging
 from datetime import datetime, timezone
 from fastapi import FastAPI, HTTPException
@@ -16,7 +17,7 @@ logger = logging.getLogger(__name__)
 app = FastAPI(title="ARF v4 API with Memory")
-# Enable CORS for your frontend
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["https://arf-frontend-sandy.vercel.app"],
@@ -27,15 +28,23 @@ app.add_middleware(
 # ---------------------------------------------------------------------------
 # Initialize ARF components
 # ---------------------------------------------------------------------------
 risk_engine = RiskEngine()
-# Create FAISS index and memory (using default dimension from constants)
 faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
 memory = RAGGraphMemory(faiss_index)
 # ---------------------------------------------------------------------------
 # API Endpoints
 # ---------------------------------------------------------------------------
 @app.get("/")
 async def root():
     return {
@@ -49,107 +58,105 @@ async def root():
 async def health():
     return {"status": "ok", "version": "4.0.0"}
-@app.get("/api/v1/get_risk")
-async def get_risk():
-    """
-    Compute a safe risk snapshot using the supported RiskEngine.calculate_risk()
-    API. This avoids calling the removed get_current_risk() method.
-    """
-    try:
-        score = _calculate_demo_risk()
-        return {
-            "system_risk": score["risk"],
-            "status": "critical" if score["risk"] > 0.8 else "normal",
-            "details": score,
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/v1/incident")
-async def store_incident(event_data: dict, analysis: dict):
-    try:
-        incident_id = memory.store_incident(event_data, analysis)
-        return {"incident_id": incident_id}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/api/v1/memory/similar")
-async def find_similar_incidents(action: str, k: int = 5):
-    class DummyEvent:
-        def __init__(self, action: str):
-            self.component = "user_action"
-            self.latency_p99 = 0.0
-            self.error_rate = 0.0
-            self.throughput = 0
-            self.cpu_util = 0.0
-            self.memory_util = 0.0
-            self.timestamp = datetime.now()
-            self.severity = "low"
-            self.action = action
-    event = DummyEvent(action)
-    analysis = {"action": action}
-    similar = memory.find_similar(event, analysis, k=k)
-    results = []
-    for node in similar:
-        results.append(
-            {
-                "incident_id": node.incident_id,
-                "component": node.component,
-                "severity": node.severity,
-                "timestamp": node.timestamp,
-                "metrics": node.metrics,
-                "agent_analysis": node.agent_analysis,
-                "similarity_score": node.metadata.get("similarity_score", 0.0),
-            }
-        )
-    return {"similar": results, "count": len(results)}
-@app.get("/api/v1/memory/stats")
-async def memory_stats():
-    return memory.get_graph_stats()
 # ---------------------------------------------------------------------------
-# Gradio dashboard
 # ---------------------------------------------------------------------------
 class _DemoIntent:
-    """
-    Minimal intent object for demo-only risk snapshots.
-    RiskEngine.categorize_intent() will fall back to DEFAULT for this object.
-    """
     environment = "dev"
     deployment_target = "dev"
     service_name = "demo"
 def _calculate_demo_risk():
-    """
-    Use the supported RiskEngine.calculate_risk() API.
-    Avoids the removed get_current_risk() method.
-    """
     intent = _DemoIntent()
     risk_value, explanation, contributions = risk_engine.calculate_risk(
         intent=intent,
         cost_estimate=None,
         policy_violations=[],
     )
     return {
         "risk": float(risk_value),
         "status": "critical" if risk_value > 0.8 else "normal",
         "explanation": explanation,
         "contributions": contributions,
     }
 def get_risk_snapshot():
     try:
         snapshot = _calculate_demo_risk()
         snapshot["timestamp"] = datetime.now(timezone.utc).isoformat()
         return snapshot
     except Exception as e:
         logger.exception("Failed to compute risk snapshot")
         return {
             "status": "error",
             "error": str(e),
@@ -157,45 +164,55 @@ def get_risk_snapshot():
         }
 def get_health_snapshot():
-    try:
-        return {
-            "status": "ok",
-            "version": "4.0.0",
-            "service": "ARF OSS API",
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        }
-    except Exception as e:
-        return {
-            "status": "error",
-            "error": str(e),
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        }
 def get_memory_snapshot():
     try:
         if memory.has_historical_data():
             stats = memory.get_graph_stats()
             return {
                 "status": "ok",
                 "memory_stats": stats,
                 "timestamp": datetime.now(timezone.utc).isoformat(),
             }
         return {
             "status": "empty",
             "memory_stats": "No historical memory yet.",
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
     except Exception as e:
         logger.exception("Failed to compute memory snapshot")
         return {
             "status": "error",
             "error": str(e),
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
 with gr.Blocks(title="ARF v4 Demo") as demo:
     gr.Markdown("# Agentic Reliability Framework v4")
-    gr.Markdown("### Status dashboard")
     with gr.Row():
         health_output = gr.JSON(label="Health")
@@ -205,20 +222,33 @@ with gr.Blocks(title="ARF v4 Demo") as demo:
         memory_output = gr.JSON(label="Memory Stats")
     with gr.Row():
-        refresh_btn = gr.Button("Refresh Risk")
         health_btn = gr.Button("Refresh Health")
         memory_btn = gr.Button("Refresh Memory")
-    refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
     health_btn.click(fn=get_health_snapshot, outputs=health_output)
     memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
-    # Load initial state after startup, not during import.
     demo.load(fn=get_health_snapshot, outputs=health_output)
     demo.load(fn=get_risk_snapshot, outputs=risk_output)
     demo.load(fn=get_memory_snapshot, outputs=memory_output)
-# ============== MAIN ENTRY POINT ==============
 if __name__ == "__main__":
-    # Launch Gradio directly to keep the Space alive and avoid the startup crash.
     demo.launch(server_name="0.0.0.0")

 # hf_demo.py – ARF v4 dashboard for Hugging Face Spaces
 import logging
+import uuid
 from datetime import datetime, timezone
 from fastapi import FastAPI, HTTPException
 app = FastAPI(title="ARF v4 API with Memory")
+# Enable CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["https://arf-frontend-sandy.vercel.app"],
 # ---------------------------------------------------------------------------
 # Initialize ARF components
 # ---------------------------------------------------------------------------
 risk_engine = RiskEngine()
 faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
 memory = RAGGraphMemory(faiss_index)
+# ---------------------------------------------------------------------------
+# Decision Memory (NEW)
+# ---------------------------------------------------------------------------
+decision_history = []
+risk_history = []
 # ---------------------------------------------------------------------------
 # API Endpoints
 # ---------------------------------------------------------------------------
 @app.get("/")
 async def root():
     return {
 async def health():
     return {"status": "ok", "version": "4.0.0"}
 # ---------------------------------------------------------------------------
+# Demo Intent Object
 # ---------------------------------------------------------------------------
 class _DemoIntent:
     environment = "dev"
     deployment_target = "dev"
     service_name = "demo"
+# ---------------------------------------------------------------------------
+# Risk Calculation
+# ---------------------------------------------------------------------------
 def _calculate_demo_risk():
     intent = _DemoIntent()
     risk_value, explanation, contributions = risk_engine.calculate_risk(
         intent=intent,
         cost_estimate=None,
         policy_violations=[],
     )
+    decision = "approve"
+    if risk_value > 0.8:
+        decision = "deny"
+    elif risk_value > 0.2:
+        decision = "escalate"
+    decision_id = str(uuid.uuid4())
+    decision_record = {
+        "id": decision_id,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "risk": float(risk_value),
+        "decision": decision
+    }
+    decision_history.append(decision_record)
+    risk_history.append(
+        {
+            "timestamp": decision_record["timestamp"],
+            "risk": float(risk_value)
+        }
+    )
     return {
         "risk": float(risk_value),
         "status": "critical" if risk_value > 0.8 else "normal",
         "explanation": explanation,
         "contributions": contributions,
+        "decision_id": decision_id,
+        "decision": decision
     }
+# ---------------------------------------------------------------------------
+# Outcome Feedback
+# ---------------------------------------------------------------------------
+def record_outcome(success: bool):
+    if not decision_history:
+        return {"error": "no decisions yet"}
+    last_decision = decision_history[-1]
+    intent = _DemoIntent()
+    try:
+        risk_engine.update_outcome(intent, success)
+    except Exception as e:
+        logger.exception("Outcome update failed")
+    last_decision["outcome"] = "success" if success else "failure"
+    return {
+        "decision_id": last_decision["id"],
+        "outcome": last_decision["outcome"],
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+# ---------------------------------------------------------------------------
+# Snapshot functions
+# ---------------------------------------------------------------------------
 def get_risk_snapshot():
     try:
         snapshot = _calculate_demo_risk()
         snapshot["timestamp"] = datetime.now(timezone.utc).isoformat()
         return snapshot
     except Exception as e:
         logger.exception("Failed to compute risk snapshot")
         return {
             "status": "error",
             "error": str(e),
         }
 def get_health_snapshot():
+    return {
+        "status": "ok",
+        "version": "4.0.0",
+        "service": "ARF OSS API",
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+    }
 def get_memory_snapshot():
     try:
         if memory.has_historical_data():
             stats = memory.get_graph_stats()
             return {
                 "status": "ok",
                 "memory_stats": stats,
                 "timestamp": datetime.now(timezone.utc).isoformat(),
             }
         return {
             "status": "empty",
             "memory_stats": "No historical memory yet.",
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
     except Exception as e:
         logger.exception("Failed to compute memory snapshot")
         return {
             "status": "error",
             "error": str(e),
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
+def get_decision_history():
+    return decision_history[-10:]
+# ---------------------------------------------------------------------------
+# Gradio Dashboard
+# ---------------------------------------------------------------------------
 with gr.Blocks(title="ARF v4 Demo") as demo:
     gr.Markdown("# Agentic Reliability Framework v4")
+    gr.Markdown("### Probabilistic Infrastructure Governance")
     with gr.Row():
         health_output = gr.JSON(label="Health")
         memory_output = gr.JSON(label="Memory Stats")
     with gr.Row():
+        decision_output = gr.JSON(label="Recent Decisions")
+    with gr.Row():
+        refresh_btn = gr.Button("Evaluate Intent")
+        success_btn = gr.Button("Action Succeeded")
+        fail_btn = gr.Button("Action Failed")
+    refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
+    success_btn.click(fn=lambda: record_outcome(True), outputs=decision_output)
+    fail_btn.click(fn=lambda: record_outcome(False), outputs=decision_output)
+    with gr.Row():
         health_btn = gr.Button("Refresh Health")
         memory_btn = gr.Button("Refresh Memory")
+        history_btn = gr.Button("Show Decision History")
     health_btn.click(fn=get_health_snapshot, outputs=health_output)
     memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
+    history_btn.click(fn=get_decision_history, outputs=decision_output)
     demo.load(fn=get_health_snapshot, outputs=health_output)
     demo.load(fn=get_risk_snapshot, outputs=risk_output)
     demo.load(fn=get_memory_snapshot, outputs=memory_output)
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0")