File size: 15,119 Bytes
40981c8 b635d04 f6fe53e b635d04 6da229d b635d04 6da229d 189570d 462ba16 2ce3527 0f5f4d1 642b6b3 6bb0212 642b6b3 c98f35f ff4d74f eb30f11 6da229d eb30f11 6da229d b635d04 0f5f4d1 40981c8 6756da2 0f5f4d1 40981c8 6756da2 642b6b3 6756da2 b635d04 d2a0c5e 6c7e606 0f5f4d1 642b6b3 6bb0212 eb30f11 6da229d f6fe53e 0f5f4d1 40981c8 6bb0212 0f5f4d1 6bb0212 40981c8 642b6b3 abc61d7 7cfde2b 40981c8 0f5f4d1 b635d04 f6fe53e 0f5f4d1 f6fe53e 0f5f4d1 40981c8 0f5f4d1 f6fe53e 54e37af 0f5f4d1 b635d04 f6fe53e 0f5f4d1 54e37af 40981c8 0f5f4d1 f6fe53e 6da229d 0f5f4d1 6da229d eb30f11 6da229d 0f5f4d1 6da229d eb30f11 6da229d eb30f11 6da229d eb30f11 6da229d eb30f11 abc61d7 eb30f11 abc61d7 eb30f11 6da229d eb30f11 abc61d7 eb30f11 6da229d eb30f11 6da229d f6fe53e 40981c8 0f5f4d1 40981c8 0f5f4d1 a22ea2e 2ce3527 b635d04 0f5f4d1 b635d04 0f5f4d1 abc61d7 0f5f4d1 abc61d7 0f5f4d1 b635d04 0f5f4d1 b635d04 abc61d7 b635d04 0f5f4d1 f6fe53e 0f5f4d1 b635d04 0f5f4d1 b635d04 0f5f4d1 f6fe53e abc61d7 54e37af b635d04 54e37af b635d04 f6fe53e 0f5f4d1 f6fe53e b635d04 f6fe53e b635d04 0f5f4d1 f6fe53e 2ce3527 1da74ed 40981c8 6da229d 2ce3527 462ba16 2ce3527 462ba16 2ce3527 462ba16 2ce3527 462ba16 2ce3527 462ba16 2ce3527 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | # app.py – ARF v4 API with Gradio frontend (FastAPI mounted under /api)
import logging
import uuid
from datetime import datetime, timezone
from typing import Dict, Optional, List
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
from fastapi.responses import RedirectResponse
from pydantic import BaseModel
import gradio as gr
# ARF v4 imports
from agentic_reliability_framework.core.governance.risk_engine import RiskEngine
from agentic_reliability_framework.runtime.memory import create_faiss_index, RAGGraphMemory
from agentic_reliability_framework.runtime.memory.constants import MemoryConstants
# Additional imports for policy and cost
from agentic_reliability_framework.core.governance.policy_engine import PolicyEngine
from agentic_reliability_framework.core.governance.cost_estimator import CostEstimator
from agentic_reliability_framework.core.governance.intents import (
DeployConfigurationIntent,
Environment,
)
from agentic_reliability_framework.core.governance.healing_intent import (
HealingIntent,
RecommendedAction,
IntentStatus,
IntentSource,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ========================= FASTAPI APP =========================
fastapi_app = FastAPI(title="ARF v4 API")
# Enable CORS for your frontend
fastapi_app.add_middleware(
CORSMiddleware,
allow_origins=["https://arf-frontend-sandy.vercel.app"],
allow_methods=["*"],
allow_headers=["*"],
)
# ========================= ARF COMPONENTS =========================
risk_engine = RiskEngine()
faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
memory = RAGGraphMemory(faiss_index)
# Policy engine and cost estimator
policy_engine = PolicyEngine() # You may need to load policies
cost_estimator = CostEstimator() # Default estimator
# In‑memory storage for demo purposes (used by /v1/history and /v1/feedback)
decision_history = []
# ========================= PYDANTIC MODELS =========================
class EvaluateRequest(BaseModel):
service_name: str
event_type: str
severity: str
metrics: Dict[str, float] = {}
class EvaluateResponse(BaseModel):
risk_score: float
base_risk: float
memory_risk: Optional[float] = None
weight: float
similar_events: list = []
confidence: float
# ========================= HELPER: Demo Intent =========================
class _DemoIntent:
environment = "dev"
deployment_target = "dev"
service_name = "demo"
# ========================= API ENDPOINTS =========================
@fastapi_app.get("/")
async def root():
"""Root endpoint – returns a welcome message."""
return {"message": "ARF v4 API. See /docs for documentation."}
@fastapi_app.get("/health")
async def health():
return {"status": "ok", "version": "4.2.0"}
@fastapi_app.get("/v1/get_risk")
async def get_risk():
"""Return the current demo risk."""
intent = _DemoIntent()
risk_value, explanation, contributions = risk_engine.calculate_risk(
intent=intent,
cost_estimate=None,
policy_violations=[],
)
decision = "approve"
if risk_value > 0.8:
decision = "deny"
elif risk_value > 0.2:
decision = "escalate"
decision_id = str(uuid.uuid4())
decision_history.append({
"decision_id": decision_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"risk_score": float(risk_value),
"outcome": None, # will be filled when feedback is given
})
return {
"system_risk": float(risk_value),
"status": "critical" if risk_value > 0.8 else "normal",
"explanation": explanation,
"contributions": contributions,
"decision_id": decision_id,
"decision": decision,
"timestamp": datetime.now(timezone.utc).isoformat()
}
@fastapi_app.get("/v1/history")
async def get_history():
"""Return the last 10 decisions."""
return decision_history[-10:]
@fastapi_app.post("/v1/incidents/evaluate")
async def evaluate_incident(request: EvaluateRequest):
"""
Evaluate an incident by converting it into an infrastructure intent
and running it through the full governance components. Returns a complete
HealingIntent with risk assessment, similar incidents, and recommended actions.
"""
try:
# Map the incident to a DeployConfigurationIntent (as an example)
# You can change the mapping logic based on your needs.
intent = DeployConfigurationIntent(
service_name=request.service_name,
change_scope="single_instance", # default
deployment_target=Environment.DEV, # assume dev for now
configuration=request.metrics,
requester="system",
provenance={"source": "incident_evaluation", "event_type": request.event_type, "severity": request.severity},
)
# 1. Evaluate policies
policy_violations = policy_engine.evaluate_policies(intent) or []
# 2. Estimate cost
cost_projection = cost_estimator.estimate_monthly_cost(intent)
# 3. Compute risk score from risk engine
risk_score, explanation, contributions = risk_engine.calculate_risk(
intent=intent,
cost_estimate=cost_projection,
policy_violations=policy_violations,
)
# 4. Retrieve similar incidents from memory
similar_incidents = []
if memory and memory.has_historical_data():
# You need to embed the incident appropriately; for now, pass a dummy event
# This is a placeholder – you'll need to adapt based on your memory module.
# For simplicity, we'll leave it empty.
pass
# 5. Determine recommended action based on risk score
if risk_score < 0.2:
action = RecommendedAction.APPROVE
elif risk_score > 0.8:
action = RecommendedAction.DENY
else:
action = RecommendedAction.ESCALATE
# 6. Build risk_factors from component contributions
risk_factors = {}
weights = contributions.get("weights", {})
if weights.get("conjugate", 0.0) > 0:
conj_risk = contributions.get("conjugate_mean", risk_score)
risk_factors["conjugate"] = weights["conjugate"] * conj_risk
if weights.get("hyper", 0.0) > 0:
hyper_risk = contributions.get("hyper_mean", risk_score)
risk_factors["hyperprior"] = weights["hyper"] * hyper_risk
if weights.get("hmc", 0.0) > 0:
hmc_risk = contributions.get("hmc_prediction", risk_score)
risk_factors["hmc"] = weights["hmc"] * hmc_risk
# Fallback if no factors added
if not risk_factors:
risk_factors["conjugate"] = risk_score
# 7. Build HealingIntent manually
healing_intent = HealingIntent(
action=action.value,
component=intent.service_name,
parameters={}, # You can add more parameters if needed
justification=explanation,
confidence=0.9, # Placeholder – could be derived from epistemic uncertainty
incident_id="", # Not used in this context
detected_at=datetime.now(timezone.utc).timestamp(),
risk_score=risk_score,
risk_factors=risk_factors,
cost_projection=cost_projection,
recommended_action=action,
similar_incidents=similar_incidents,
policy_violations=policy_violations,
status=IntentStatus.OSS_ADVISORY_ONLY,
source=IntentSource.INFRASTRUCTURE_ANALYSIS,
requires_enterprise=True,
execution_allowed=False,
)
# Convert to dictionary for response
response_dict = healing_intent.to_dict(include_oss_context=True)
# Add computed fields expected by frontend
# (These might already be in HealingIntent, but ensure they exist)
if "epistemic_uncertainty" not in response_dict:
response_dict["epistemic_uncertainty"] = 0.05 # default
if "confidence_interval" not in response_dict:
# Use a simple +/- 0.05 interval
response_dict["confidence_interval"] = [
max(0.0, risk_score - 0.05),
min(1.0, risk_score + 0.05),
]
if "risk_contributions" not in response_dict:
# Convert contributions to list format (keeping only factors)
response_dict["risk_contributions"] = [
{"factor": k, "contribution": v}
for k, v in contributions.items() if k not in ["weights", "conjugate_mean", "hmc_prediction"]
]
return response_dict
except Exception as e:
logger.exception("Error in evaluate_incident")
raise HTTPException(status_code=500, detail=str(e))
@fastapi_app.post("/v1/feedback")
async def record_outcome(decision_id: str, success: bool):
"""Record the outcome of a decision (success/failure)."""
for dec in decision_history:
if dec["decision_id"] == decision_id:
dec["outcome"] = "success" if success else "failure"
# Update the risk engine (optional)
intent = _DemoIntent()
try:
risk_engine.update_outcome(intent, success)
except Exception as e:
logger.exception("Outcome update failed")
return {"status": "ok", "decision_id": decision_id, "outcome": dec["outcome"]}
return {"error": "decision not found"}
# ========================= NEW MEMORY STATS ENDPOINT =========================
@fastapi_app.get("/v1/memory/stats")
async def get_memory_stats():
"""Return current memory graph statistics."""
if memory:
return memory.get_graph_stats()
return {"error": "Memory not initialized"}
# ========================= GRADIO UI =========================
def get_risk_snapshot():
try:
intent = _DemoIntent()
risk_value, explanation, contributions = risk_engine.calculate_risk(
intent=intent,
cost_estimate=None,
policy_violations=[],
)
decision = "approve"
if risk_value > 0.8:
decision = "deny"
elif risk_value > 0.2:
decision = "escalate"
decision_id = str(uuid.uuid4())
decision_history.append({
"decision_id": decision_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"risk_score": float(risk_value),
"outcome": None,
})
# Build risk_factors for UI
risk_factors = {}
weights = contributions.get("weights", {})
if weights.get("conjugate", 0.0) > 0:
conj_risk = contributions.get("conjugate_mean", risk_value)
risk_factors["conjugate"] = weights["conjugate"] * conj_risk
if weights.get("hyper", 0.0) > 0:
hyper_risk = contributions.get("hyper_mean", risk_value)
risk_factors["hyperprior"] = weights["hyper"] * hyper_risk
if weights.get("hmc", 0.0) > 0:
hmc_risk = contributions.get("hmc_prediction", risk_value)
risk_factors["hmc"] = weights["hmc"] * hmc_risk
if not risk_factors:
risk_factors["conjugate"] = risk_value
return {
"risk": float(risk_value),
"status": "critical" if risk_value > 0.8 else "normal",
"explanation": explanation,
"risk_factors": risk_factors,
"decision_id": decision_id,
"decision": decision,
"timestamp": datetime.now(timezone.utc).isoformat()
}
except Exception as e:
logger.exception("Failed to compute risk snapshot")
return {"error": str(e)}
def get_health_snapshot():
return {"status": "ok", "version": "4.2.0", "service": "ARF OSS API", "timestamp": datetime.now(timezone.utc).isoformat()}
def get_memory_snapshot():
if memory.has_historical_data():
return {"status": "ok", "memory_stats": memory.get_graph_stats(), "timestamp": datetime.now(timezone.utc).isoformat()}
return {"status": "empty", "memory_stats": "No historical memory yet.", "timestamp": datetime.now(timezone.utc).isoformat()}
def record_outcome_ui(success: bool):
if not decision_history:
return {"error": "no decisions yet"}
last = decision_history[-1]
last["outcome"] = "success" if success else "failure"
intent = _DemoIntent()
try:
risk_engine.update_outcome(intent, success)
except Exception as e:
logger.exception("Outcome update failed")
return {"decision_id": last["decision_id"], "outcome": last["outcome"], "timestamp": datetime.now(timezone.utc).isoformat()}
with gr.Blocks(title="ARF v4.2.0 Demo", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Agentic Reliability Framework v4.2.0")
gr.Markdown("### Probabilistic Infrastructure Governance – [📚 API Docs](/api/docs) | [📦 GitHub](https://github.com/arf-foundation/agentic-reliability-framework) | [📅 Book a Call](https://calendly.com/petter2025us/30min)")
gr.Markdown("---")
with gr.Row():
health_output = gr.JSON(label="Health")
risk_output = gr.JSON(label="Current Risk")
with gr.Row():
memory_output = gr.JSON(label="Memory Stats")
with gr.Row():
decision_output = gr.JSON(label="Recent Decisions")
with gr.Row():
refresh_btn = gr.Button("Evaluate Intent")
success_btn = gr.Button("Action Succeeded")
fail_btn = gr.Button("Action Failed")
refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
success_btn.click(fn=lambda: record_outcome_ui(True), outputs=decision_output)
fail_btn.click(fn=lambda: record_outcome_ui(False), outputs=decision_output)
with gr.Row():
health_btn = gr.Button("Refresh Health")
memory_btn = gr.Button("Refresh Memory")
history_btn = gr.Button("Show Decision History")
health_btn.click(fn=get_health_snapshot, outputs=health_output)
memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
history_btn.click(fn=lambda: decision_history[-10:], outputs=decision_output)
# ========================= Mount Gradio and Add Documentation Routes =========================
app = gr.mount_gradio_app(fastapi_app, demo, path="/api")
# Add documentation routes at "/docs"
@app.get("/docs", include_in_schema=False)
async def swagger_ui():
return get_swagger_ui_html(
openapi_url="/openapi.json",
title="ARF API Docs"
)
@app.get("/redoc", include_in_schema=False)
async def redoc_ui():
return get_redoc_html(
openapi_url="/openapi.json",
title="ARF API ReDoc"
)
@app.get("/openapi.json", include_in_schema=False)
async def openapi():
return fastapi_app.openapi()
@app.get("/api/docs", include_in_schema=False)
async def redirect_docs():
return RedirectResponse(url="/docs") |