petter2025 commited on
Commit
f6fe53e
·
verified ·
1 Parent(s): b635d04

Update hf_demo.py

Browse files
Files changed (1) hide show
  1. hf_demo.py +122 -92
hf_demo.py CHANGED
@@ -1,5 +1,6 @@
1
  # hf_demo.py – ARF v4 dashboard for Hugging Face Spaces
2
  import logging
 
3
  from datetime import datetime, timezone
4
 
5
  from fastapi import FastAPI, HTTPException
@@ -16,7 +17,7 @@ logger = logging.getLogger(__name__)
16
 
17
  app = FastAPI(title="ARF v4 API with Memory")
18
 
19
- # Enable CORS for your frontend
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["https://arf-frontend-sandy.vercel.app"],
@@ -27,15 +28,23 @@ app.add_middleware(
27
  # ---------------------------------------------------------------------------
28
  # Initialize ARF components
29
  # ---------------------------------------------------------------------------
 
30
  risk_engine = RiskEngine()
31
 
32
- # Create FAISS index and memory (using default dimension from constants)
33
  faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
34
  memory = RAGGraphMemory(faiss_index)
35
 
 
 
 
 
 
 
 
36
  # ---------------------------------------------------------------------------
37
  # API Endpoints
38
  # ---------------------------------------------------------------------------
 
39
  @app.get("/")
40
  async def root():
41
  return {
@@ -49,107 +58,105 @@ async def root():
49
  async def health():
50
  return {"status": "ok", "version": "4.0.0"}
51
 
52
- @app.get("/api/v1/get_risk")
53
- async def get_risk():
54
- """
55
- Compute a safe risk snapshot using the supported RiskEngine.calculate_risk()
56
- API. This avoids calling the removed get_current_risk() method.
57
- """
58
- try:
59
- score = _calculate_demo_risk()
60
- return {
61
- "system_risk": score["risk"],
62
- "status": "critical" if score["risk"] > 0.8 else "normal",
63
- "details": score,
64
- }
65
- except Exception as e:
66
- raise HTTPException(status_code=500, detail=str(e))
67
-
68
- @app.post("/api/v1/incident")
69
- async def store_incident(event_data: dict, analysis: dict):
70
- try:
71
- incident_id = memory.store_incident(event_data, analysis)
72
- return {"incident_id": incident_id}
73
- except Exception as e:
74
- raise HTTPException(status_code=500, detail=str(e))
75
-
76
- @app.get("/api/v1/memory/similar")
77
- async def find_similar_incidents(action: str, k: int = 5):
78
- class DummyEvent:
79
- def __init__(self, action: str):
80
- self.component = "user_action"
81
- self.latency_p99 = 0.0
82
- self.error_rate = 0.0
83
- self.throughput = 0
84
- self.cpu_util = 0.0
85
- self.memory_util = 0.0
86
- self.timestamp = datetime.now()
87
- self.severity = "low"
88
- self.action = action
89
-
90
- event = DummyEvent(action)
91
- analysis = {"action": action}
92
- similar = memory.find_similar(event, analysis, k=k)
93
-
94
- results = []
95
- for node in similar:
96
- results.append(
97
- {
98
- "incident_id": node.incident_id,
99
- "component": node.component,
100
- "severity": node.severity,
101
- "timestamp": node.timestamp,
102
- "metrics": node.metrics,
103
- "agent_analysis": node.agent_analysis,
104
- "similarity_score": node.metadata.get("similarity_score", 0.0),
105
- }
106
- )
107
-
108
- return {"similar": results, "count": len(results)}
109
-
110
- @app.get("/api/v1/memory/stats")
111
- async def memory_stats():
112
- return memory.get_graph_stats()
113
-
114
  # ---------------------------------------------------------------------------
115
- # Gradio dashboard
116
  # ---------------------------------------------------------------------------
117
 
118
  class _DemoIntent:
119
- """
120
- Minimal intent object for demo-only risk snapshots.
121
- RiskEngine.categorize_intent() will fall back to DEFAULT for this object.
122
- """
123
  environment = "dev"
124
  deployment_target = "dev"
125
  service_name = "demo"
126
 
 
 
 
 
127
  def _calculate_demo_risk():
128
- """
129
- Use the supported RiskEngine.calculate_risk() API.
130
- Avoids the removed get_current_risk() method.
131
- """
132
  intent = _DemoIntent()
 
133
  risk_value, explanation, contributions = risk_engine.calculate_risk(
134
  intent=intent,
135
  cost_estimate=None,
136
  policy_violations=[],
137
  )
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  return {
140
  "risk": float(risk_value),
141
  "status": "critical" if risk_value > 0.8 else "normal",
142
  "explanation": explanation,
143
  "contributions": contributions,
 
 
144
  }
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  def get_risk_snapshot():
 
147
  try:
148
  snapshot = _calculate_demo_risk()
 
149
  snapshot["timestamp"] = datetime.now(timezone.utc).isoformat()
 
150
  return snapshot
 
151
  except Exception as e:
 
152
  logger.exception("Failed to compute risk snapshot")
 
153
  return {
154
  "status": "error",
155
  "error": str(e),
@@ -157,45 +164,55 @@ def get_risk_snapshot():
157
  }
158
 
159
  def get_health_snapshot():
160
- try:
161
- return {
162
- "status": "ok",
163
- "version": "4.0.0",
164
- "service": "ARF OSS API",
165
- "timestamp": datetime.now(timezone.utc).isoformat(),
166
- }
167
- except Exception as e:
168
- return {
169
- "status": "error",
170
- "error": str(e),
171
- "timestamp": datetime.now(timezone.utc).isoformat(),
172
- }
173
 
174
  def get_memory_snapshot():
 
175
  try:
 
176
  if memory.has_historical_data():
 
177
  stats = memory.get_graph_stats()
 
178
  return {
179
  "status": "ok",
180
  "memory_stats": stats,
181
  "timestamp": datetime.now(timezone.utc).isoformat(),
182
  }
 
183
  return {
184
  "status": "empty",
185
  "memory_stats": "No historical memory yet.",
186
  "timestamp": datetime.now(timezone.utc).isoformat(),
187
  }
 
188
  except Exception as e:
 
189
  logger.exception("Failed to compute memory snapshot")
 
190
  return {
191
  "status": "error",
192
  "error": str(e),
193
  "timestamp": datetime.now(timezone.utc).isoformat(),
194
  }
195
 
 
 
 
 
 
 
 
196
  with gr.Blocks(title="ARF v4 Demo") as demo:
 
197
  gr.Markdown("# Agentic Reliability Framework v4")
198
- gr.Markdown("### Status dashboard")
199
 
200
  with gr.Row():
201
  health_output = gr.JSON(label="Health")
@@ -205,20 +222,33 @@ with gr.Blocks(title="ARF v4 Demo") as demo:
205
  memory_output = gr.JSON(label="Memory Stats")
206
 
207
  with gr.Row():
208
- refresh_btn = gr.Button("Refresh Risk")
 
 
 
 
 
 
 
 
 
 
 
209
  health_btn = gr.Button("Refresh Health")
210
  memory_btn = gr.Button("Refresh Memory")
 
211
 
212
- refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
213
  health_btn.click(fn=get_health_snapshot, outputs=health_output)
214
  memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
 
215
 
216
- # Load initial state after startup, not during import.
217
  demo.load(fn=get_health_snapshot, outputs=health_output)
218
  demo.load(fn=get_risk_snapshot, outputs=risk_output)
219
  demo.load(fn=get_memory_snapshot, outputs=memory_output)
220
 
221
- # ============== MAIN ENTRY POINT ==============
 
 
 
222
  if __name__ == "__main__":
223
- # Launch Gradio directly to keep the Space alive and avoid the startup crash.
224
  demo.launch(server_name="0.0.0.0")
 
1
  # hf_demo.py – ARF v4 dashboard for Hugging Face Spaces
2
  import logging
3
+ import uuid
4
  from datetime import datetime, timezone
5
 
6
  from fastapi import FastAPI, HTTPException
 
17
 
18
  app = FastAPI(title="ARF v4 API with Memory")
19
 
20
+ # Enable CORS
21
  app.add_middleware(
22
  CORSMiddleware,
23
  allow_origins=["https://arf-frontend-sandy.vercel.app"],
 
28
  # ---------------------------------------------------------------------------
29
  # Initialize ARF components
30
  # ---------------------------------------------------------------------------
31
+
32
  risk_engine = RiskEngine()
33
 
 
34
  faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
35
  memory = RAGGraphMemory(faiss_index)
36
 
37
+ # ---------------------------------------------------------------------------
38
+ # Decision Memory (NEW)
39
+ # ---------------------------------------------------------------------------
40
+
41
+ decision_history = []
42
+ risk_history = []
43
+
44
  # ---------------------------------------------------------------------------
45
  # API Endpoints
46
  # ---------------------------------------------------------------------------
47
+
48
  @app.get("/")
49
  async def root():
50
  return {
 
58
  async def health():
59
  return {"status": "ok", "version": "4.0.0"}
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # ---------------------------------------------------------------------------
62
+ # Demo Intent Object
63
  # ---------------------------------------------------------------------------
64
 
65
  class _DemoIntent:
 
 
 
 
66
  environment = "dev"
67
  deployment_target = "dev"
68
  service_name = "demo"
69
 
70
+ # ---------------------------------------------------------------------------
71
+ # Risk Calculation
72
+ # ---------------------------------------------------------------------------
73
+
74
  def _calculate_demo_risk():
75
+
 
 
 
76
  intent = _DemoIntent()
77
+
78
  risk_value, explanation, contributions = risk_engine.calculate_risk(
79
  intent=intent,
80
  cost_estimate=None,
81
  policy_violations=[],
82
  )
83
 
84
+ decision = "approve"
85
+ if risk_value > 0.8:
86
+ decision = "deny"
87
+ elif risk_value > 0.2:
88
+ decision = "escalate"
89
+
90
+ decision_id = str(uuid.uuid4())
91
+
92
+ decision_record = {
93
+ "id": decision_id,
94
+ "timestamp": datetime.now(timezone.utc).isoformat(),
95
+ "risk": float(risk_value),
96
+ "decision": decision
97
+ }
98
+
99
+ decision_history.append(decision_record)
100
+
101
+ risk_history.append(
102
+ {
103
+ "timestamp": decision_record["timestamp"],
104
+ "risk": float(risk_value)
105
+ }
106
+ )
107
+
108
  return {
109
  "risk": float(risk_value),
110
  "status": "critical" if risk_value > 0.8 else "normal",
111
  "explanation": explanation,
112
  "contributions": contributions,
113
+ "decision_id": decision_id,
114
+ "decision": decision
115
  }
116
 
117
+ # ---------------------------------------------------------------------------
118
+ # Outcome Feedback
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def record_outcome(success: bool):
122
+
123
+ if not decision_history:
124
+ return {"error": "no decisions yet"}
125
+
126
+ last_decision = decision_history[-1]
127
+
128
+ intent = _DemoIntent()
129
+
130
+ try:
131
+ risk_engine.update_outcome(intent, success)
132
+ except Exception as e:
133
+ logger.exception("Outcome update failed")
134
+
135
+ last_decision["outcome"] = "success" if success else "failure"
136
+
137
+ return {
138
+ "decision_id": last_decision["id"],
139
+ "outcome": last_decision["outcome"],
140
+ "timestamp": datetime.now(timezone.utc).isoformat()
141
+ }
142
+
143
+ # ---------------------------------------------------------------------------
144
+ # Snapshot functions
145
+ # ---------------------------------------------------------------------------
146
+
147
  def get_risk_snapshot():
148
+
149
  try:
150
  snapshot = _calculate_demo_risk()
151
+
152
  snapshot["timestamp"] = datetime.now(timezone.utc).isoformat()
153
+
154
  return snapshot
155
+
156
  except Exception as e:
157
+
158
  logger.exception("Failed to compute risk snapshot")
159
+
160
  return {
161
  "status": "error",
162
  "error": str(e),
 
164
  }
165
 
166
  def get_health_snapshot():
167
+
168
+ return {
169
+ "status": "ok",
170
+ "version": "4.0.0",
171
+ "service": "ARF OSS API",
172
+ "timestamp": datetime.now(timezone.utc).isoformat(),
173
+ }
 
 
 
 
 
 
174
 
175
  def get_memory_snapshot():
176
+
177
  try:
178
+
179
  if memory.has_historical_data():
180
+
181
  stats = memory.get_graph_stats()
182
+
183
  return {
184
  "status": "ok",
185
  "memory_stats": stats,
186
  "timestamp": datetime.now(timezone.utc).isoformat(),
187
  }
188
+
189
  return {
190
  "status": "empty",
191
  "memory_stats": "No historical memory yet.",
192
  "timestamp": datetime.now(timezone.utc).isoformat(),
193
  }
194
+
195
  except Exception as e:
196
+
197
  logger.exception("Failed to compute memory snapshot")
198
+
199
  return {
200
  "status": "error",
201
  "error": str(e),
202
  "timestamp": datetime.now(timezone.utc).isoformat(),
203
  }
204
 
205
+ def get_decision_history():
206
+ return decision_history[-10:]
207
+
208
+ # ---------------------------------------------------------------------------
209
+ # Gradio Dashboard
210
+ # ---------------------------------------------------------------------------
211
+
212
  with gr.Blocks(title="ARF v4 Demo") as demo:
213
+
214
  gr.Markdown("# Agentic Reliability Framework v4")
215
+ gr.Markdown("### Probabilistic Infrastructure Governance")
216
 
217
  with gr.Row():
218
  health_output = gr.JSON(label="Health")
 
222
  memory_output = gr.JSON(label="Memory Stats")
223
 
224
  with gr.Row():
225
+ decision_output = gr.JSON(label="Recent Decisions")
226
+
227
+ with gr.Row():
228
+ refresh_btn = gr.Button("Evaluate Intent")
229
+ success_btn = gr.Button("Action Succeeded")
230
+ fail_btn = gr.Button("Action Failed")
231
+
232
+ refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
233
+ success_btn.click(fn=lambda: record_outcome(True), outputs=decision_output)
234
+ fail_btn.click(fn=lambda: record_outcome(False), outputs=decision_output)
235
+
236
+ with gr.Row():
237
  health_btn = gr.Button("Refresh Health")
238
  memory_btn = gr.Button("Refresh Memory")
239
+ history_btn = gr.Button("Show Decision History")
240
 
 
241
  health_btn.click(fn=get_health_snapshot, outputs=health_output)
242
  memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
243
+ history_btn.click(fn=get_decision_history, outputs=decision_output)
244
 
 
245
  demo.load(fn=get_health_snapshot, outputs=health_output)
246
  demo.load(fn=get_risk_snapshot, outputs=risk_output)
247
  demo.load(fn=get_memory_snapshot, outputs=memory_output)
248
 
249
+ # ---------------------------------------------------------------------------
250
+ # Entry point
251
+ # ---------------------------------------------------------------------------
252
+
253
  if __name__ == "__main__":
 
254
  demo.launch(server_name="0.0.0.0")