petter2025 commited on
Commit
b635d04
·
verified ·
1 Parent(s): 54e37af

Update hf_demo.py

Browse files
Files changed (1) hide show
  1. hf_demo.py +133 -37
hf_demo.py CHANGED
@@ -1,15 +1,19 @@
1
- # hf_demo.py – ARF v4 API with Memory & Health Check
 
 
 
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import gradio as gr
5
- import numpy as np
6
- from datetime import datetime
7
 
8
  # ARF v4 imports
9
  from agentic_reliability_framework.core.governance.risk_engine import RiskEngine
10
  from agentic_reliability_framework.runtime.memory import create_faiss_index, RAGGraphMemory
11
  from agentic_reliability_framework.runtime.memory.constants import MemoryConstants
12
 
 
 
 
13
  app = FastAPI(title="ARF v4 API with Memory")
14
 
15
  # Enable CORS for your frontend
@@ -17,6 +21,7 @@ app.add_middleware(
17
  CORSMiddleware,
18
  allow_origins=["https://arf-frontend-sandy.vercel.app"],
19
  allow_methods=["*"],
 
20
  )
21
 
22
  # ---------------------------------------------------------------------------
@@ -37,7 +42,7 @@ async def root():
37
  "service": "ARF OSS API",
38
  "version": "4.0.0",
39
  "status": "operational",
40
- "memory_stats": memory.get_graph_stats() if memory.has_historical_data() else "empty"
41
  }
42
 
43
  @app.get("/health")
@@ -46,11 +51,19 @@ async def health():
46
 
47
  @app.get("/api/v1/get_risk")
48
  async def get_risk():
49
- risk_score = risk_engine.get_current_risk()
50
- return {
51
- "system_risk": risk_score.mean,
52
- "status": "critical" if risk_score.mean > 0.8 else "normal"
53
- }
 
 
 
 
 
 
 
 
54
 
55
  @app.post("/api/v1/incident")
56
  async def store_incident(event_data: dict, analysis: dict):
@@ -63,7 +76,7 @@ async def store_incident(event_data: dict, analysis: dict):
63
  @app.get("/api/v1/memory/similar")
64
  async def find_similar_incidents(action: str, k: int = 5):
65
  class DummyEvent:
66
- def __init__(self, action):
67
  self.component = "user_action"
68
  self.latency_p99 = 0.0
69
  self.error_rate = 0.0
@@ -72,21 +85,26 @@ async def find_similar_incidents(action: str, k: int = 5):
72
  self.memory_util = 0.0
73
  self.timestamp = datetime.now()
74
  self.severity = "low"
 
75
 
76
  event = DummyEvent(action)
77
  analysis = {"action": action}
78
  similar = memory.find_similar(event, analysis, k=k)
 
79
  results = []
80
  for node in similar:
81
- results.append({
82
- "incident_id": node.incident_id,
83
- "component": node.component,
84
- "severity": node.severity,
85
- "timestamp": node.timestamp,
86
- "metrics": node.metrics,
87
- "agent_analysis": node.agent_analysis,
88
- "similarity_score": node.metadata.get("similarity_score", 0.0)
89
- })
 
 
 
90
  return {"similar": results, "count": len(results)}
91
 
92
  @app.get("/api/v1/memory/stats")
@@ -94,35 +112,113 @@ async def memory_stats():
94
  return memory.get_graph_stats()
95
 
96
  # ---------------------------------------------------------------------------
97
- # Optional Gradio interface
98
  # ---------------------------------------------------------------------------
99
- def current_risk_text():
100
- return f"ARF v4 - Current risk: {risk_engine.get_current_risk().mean:.2f}"
101
 
102
- def health_check():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  return {
104
- "status": "running",
105
- "version": "ARF v4 Demo",
106
- "timestamp": datetime.utcnow().isoformat()
 
107
  }
108
 
109
- with gr.Blocks() as demo:
110
- gr.Markdown("## ARF v4 Demo & Health Check")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  with gr.Row():
113
- # Risk output
114
- gr.Markdown("### Current Risk")
115
- gr.Textbox(value=current_risk_text(), label="Risk Output", interactive=False)
116
 
117
  with gr.Row():
118
- # Health check
119
- gr.Markdown("### Health Check")
120
- health_output = gr.JSON()
121
- gr.Button("Check Health").click(fn=health_check, outputs=health_output)
 
 
 
 
 
 
122
 
123
- # Mount Gradio on FastAPI
124
- app = gr.mount_gradio_app(app, demo, path="/")
 
 
125
 
126
  # ============== MAIN ENTRY POINT ==============
127
  if __name__ == "__main__":
 
128
  demo.launch(server_name="0.0.0.0")
 
1
+ # hf_demo.py – ARF v4 dashboard for Hugging Face Spaces
2
+ import logging
3
+ from datetime import datetime, timezone
4
+
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
  import gradio as gr
 
 
8
 
9
  # ARF v4 imports
10
  from agentic_reliability_framework.core.governance.risk_engine import RiskEngine
11
  from agentic_reliability_framework.runtime.memory import create_faiss_index, RAGGraphMemory
12
  from agentic_reliability_framework.runtime.memory.constants import MemoryConstants
13
 
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
  app = FastAPI(title="ARF v4 API with Memory")
18
 
19
  # Enable CORS for your frontend
 
21
  CORSMiddleware,
22
  allow_origins=["https://arf-frontend-sandy.vercel.app"],
23
  allow_methods=["*"],
24
+ allow_headers=["*"],
25
  )
26
 
27
  # ---------------------------------------------------------------------------
 
42
  "service": "ARF OSS API",
43
  "version": "4.0.0",
44
  "status": "operational",
45
+ "memory_stats": memory.get_graph_stats() if memory.has_historical_data() else "empty",
46
  }
47
 
48
  @app.get("/health")
 
51
 
52
  @app.get("/api/v1/get_risk")
53
  async def get_risk():
54
+ """
55
+ Compute a safe risk snapshot using the supported RiskEngine.calculate_risk()
56
+ API. This avoids calling the removed get_current_risk() method.
57
+ """
58
+ try:
59
+ score = _calculate_demo_risk()
60
+ return {
61
+ "system_risk": score["risk"],
62
+ "status": "critical" if score["risk"] > 0.8 else "normal",
63
+ "details": score,
64
+ }
65
+ except Exception as e:
66
+ raise HTTPException(status_code=500, detail=str(e))
67
 
68
  @app.post("/api/v1/incident")
69
  async def store_incident(event_data: dict, analysis: dict):
 
76
  @app.get("/api/v1/memory/similar")
77
  async def find_similar_incidents(action: str, k: int = 5):
78
  class DummyEvent:
79
+ def __init__(self, action: str):
80
  self.component = "user_action"
81
  self.latency_p99 = 0.0
82
  self.error_rate = 0.0
 
85
  self.memory_util = 0.0
86
  self.timestamp = datetime.now()
87
  self.severity = "low"
88
+ self.action = action
89
 
90
  event = DummyEvent(action)
91
  analysis = {"action": action}
92
  similar = memory.find_similar(event, analysis, k=k)
93
+
94
  results = []
95
  for node in similar:
96
+ results.append(
97
+ {
98
+ "incident_id": node.incident_id,
99
+ "component": node.component,
100
+ "severity": node.severity,
101
+ "timestamp": node.timestamp,
102
+ "metrics": node.metrics,
103
+ "agent_analysis": node.agent_analysis,
104
+ "similarity_score": node.metadata.get("similarity_score", 0.0),
105
+ }
106
+ )
107
+
108
  return {"similar": results, "count": len(results)}
109
 
110
  @app.get("/api/v1/memory/stats")
 
112
  return memory.get_graph_stats()
113
 
114
  # ---------------------------------------------------------------------------
115
+ # Gradio dashboard
116
  # ---------------------------------------------------------------------------
 
 
117
 
118
+ class _DemoIntent:
119
+ """
120
+ Minimal intent object for demo-only risk snapshots.
121
+ RiskEngine.categorize_intent() will fall back to DEFAULT for this object.
122
+ """
123
+ environment = "dev"
124
+ deployment_target = "dev"
125
+ service_name = "demo"
126
+
127
+ def _calculate_demo_risk():
128
+ """
129
+ Use the supported RiskEngine.calculate_risk() API.
130
+ Avoids the removed get_current_risk() method.
131
+ """
132
+ intent = _DemoIntent()
133
+ risk_value, explanation, contributions = risk_engine.calculate_risk(
134
+ intent=intent,
135
+ cost_estimate=None,
136
+ policy_violations=[],
137
+ )
138
+
139
  return {
140
+ "risk": float(risk_value),
141
+ "status": "critical" if risk_value > 0.8 else "normal",
142
+ "explanation": explanation,
143
+ "contributions": contributions,
144
  }
145
 
146
+ def get_risk_snapshot():
147
+ try:
148
+ snapshot = _calculate_demo_risk()
149
+ snapshot["timestamp"] = datetime.now(timezone.utc).isoformat()
150
+ return snapshot
151
+ except Exception as e:
152
+ logger.exception("Failed to compute risk snapshot")
153
+ return {
154
+ "status": "error",
155
+ "error": str(e),
156
+ "timestamp": datetime.now(timezone.utc).isoformat(),
157
+ }
158
+
159
+ def get_health_snapshot():
160
+ try:
161
+ return {
162
+ "status": "ok",
163
+ "version": "4.0.0",
164
+ "service": "ARF OSS API",
165
+ "timestamp": datetime.now(timezone.utc).isoformat(),
166
+ }
167
+ except Exception as e:
168
+ return {
169
+ "status": "error",
170
+ "error": str(e),
171
+ "timestamp": datetime.now(timezone.utc).isoformat(),
172
+ }
173
+
174
+ def get_memory_snapshot():
175
+ try:
176
+ if memory.has_historical_data():
177
+ stats = memory.get_graph_stats()
178
+ return {
179
+ "status": "ok",
180
+ "memory_stats": stats,
181
+ "timestamp": datetime.now(timezone.utc).isoformat(),
182
+ }
183
+ return {
184
+ "status": "empty",
185
+ "memory_stats": "No historical memory yet.",
186
+ "timestamp": datetime.now(timezone.utc).isoformat(),
187
+ }
188
+ except Exception as e:
189
+ logger.exception("Failed to compute memory snapshot")
190
+ return {
191
+ "status": "error",
192
+ "error": str(e),
193
+ "timestamp": datetime.now(timezone.utc).isoformat(),
194
+ }
195
+
196
+ with gr.Blocks(title="ARF v4 Demo") as demo:
197
+ gr.Markdown("# Agentic Reliability Framework v4")
198
+ gr.Markdown("### Status dashboard")
199
 
200
  with gr.Row():
201
+ health_output = gr.JSON(label="Health")
202
+ risk_output = gr.JSON(label="Current Risk")
 
203
 
204
  with gr.Row():
205
+ memory_output = gr.JSON(label="Memory Stats")
206
+
207
+ with gr.Row():
208
+ refresh_btn = gr.Button("Refresh Risk")
209
+ health_btn = gr.Button("Refresh Health")
210
+ memory_btn = gr.Button("Refresh Memory")
211
+
212
+ refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
213
+ health_btn.click(fn=get_health_snapshot, outputs=health_output)
214
+ memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
215
 
216
+ # Load initial state after startup, not during import.
217
+ demo.load(fn=get_health_snapshot, outputs=health_output)
218
+ demo.load(fn=get_risk_snapshot, outputs=risk_output)
219
+ demo.load(fn=get_memory_snapshot, outputs=memory_output)
220
 
221
  # ============== MAIN ENTRY POINT ==============
222
  if __name__ == "__main__":
223
+ # Launch Gradio directly to keep the Space alive and avoid the startup crash.
224
  demo.launch(server_name="0.0.0.0")