circle-ci-viz / app.py
ArthurZ's picture
ArthurZ HF Staff
Update app.py
3601d7a verified
raw
history blame
7.82 kB
import json
import os
from functools import lru_cache
from typing import List, Tuple
import gradio as gr
from datasets import Dataset, load_dataset
DATASET_ID = os.environ.get(
"CIRCLECI_RESULTS_DATASET_ID", "transformers-community/circleci-test-results"
)
MAX_ROWS = 200
@lru_cache(maxsize=1)
def _load_dataset() -> Dataset | None:
"""Lazy-load and cache the dataset."""
try:
return load_dataset(DATASET_ID, split="train")
except Exception as error:
print(f"Failed to load dataset {DATASET_ID}: {error}")
return None
def _record_timestamp(record: dict) -> str:
"""Best-effort extraction of a sortable timestamp from the payload."""
payload = record.get("payload_json")
if not payload:
return ""
try:
payload_dict = json.loads(payload)
except Exception:
return ""
metadata = payload_dict.get("metadata") or {}
return metadata.get("collected_at") or ""
def _filter_records(
dataset: Dataset,
repo: str,
pr: str,
sha: str,
only_failures: bool,
sort_desc: bool,
) -> List[dict]:
repo = repo.strip().lower()
pr = pr.strip()
sha = sha.strip().lower()
def _matches(example: dict) -> bool:
if repo and repo not in (example.get("repository") or "").lower():
return False
if pr and pr != (example.get("pr_number") or ""):
return False
if sha and sha not in (example.get("commit_sha") or "").lower():
return False
if only_failures and (example.get("failure_count") or 0) <= 0:
return False
return True
items = [ex for ex in dataset if _matches(ex)]
items.sort(key=_record_timestamp, reverse=sort_desc)
return items[:MAX_ROWS]
def query(
repo: str,
pr: str,
sha: str,
only_failures: bool,
sort_order: str,
) -> Tuple[List[List[str]], str, str]:
"""
Main query function used by the UI.
Returns:
- Table rows
- JSON details for latest entry
- Status message
"""
dataset = _load_dataset()
if dataset is None:
error_json = json.dumps(
{"error": f"Dataset {DATASET_ID} not available"}, indent=2
)
return [], error_json, f"❌ Dataset `{DATASET_ID}` is not available."
sort_desc = sort_order == "Newest first"
records = _filter_records(dataset, repo, pr, sha, only_failures, sort_desc)
table_rows: List[List[str]] = []
for record in records:
table_rows.append(
[
record.get("repository", ""),
record.get("branch", ""),
record.get("pr_number", ""),
(record.get("commit_sha") or "")[:12],
record.get("workflow_id", ""),
str(record.get("failure_count", 0)),
]
)
# Default details
details = json.dumps({}, indent=2)
if records:
payload = records[0].get("payload_json", "{}")
try:
details = json.dumps(json.loads(payload), indent=2)
except json.JSONDecodeError:
details = json.dumps({"error": "Unable to parse payload"}, indent=2)
# Build a human-friendly status message
filters = []
if repo.strip():
filters.append(f"repo = `{repo.strip()}`")
if pr.strip():
filters.append(f"PR = `{pr.strip()}`")
if sha.strip():
filters.append(f"SHA ~ `{sha.strip()}`")
if only_failures:
filters.append("only failing workflows")
filter_text = ", ".join(filters) if filters else "no filters"
match_count = len(records)
dataset_size = len(dataset)
if match_count == 0:
status_msg = (
f"πŸ” No matching records found using {filter_text}. "
f"(Dataset size: {dataset_size} rows)"
)
else:
status_msg = (
f"βœ… Found {match_count} matching record(s) using {filter_text}. "
f"Showing up to {MAX_ROWS} most recent entries "
f"(dataset size: {dataset_size} rows, sorted: {sort_order.lower()})."
)
return table_rows, details, status_msg
def refresh_dataset() -> str:
"""Clear the cache and reload the dataset."""
_load_dataset.cache_clear()
dataset = _load_dataset()
if dataset is None:
return f"❌ Failed to refresh dataset `{DATASET_ID}`."
return f"πŸ”„ Dataset refreshed with **{len(dataset)}** rows from `{DATASET_ID}`."
with gr.Blocks(
title="CircleCI Test Collection Helper",
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
) as demo:
gr.Markdown(
"""
# πŸ§ͺ CircleCI Test Collection Helper
Explore CircleCI test aggregation records stored in the
`transformers-community/circleci-test-results` dataset.
Use the filters below to quickly inspect runs for a specific repository, PR, or commit SHA.
""",
)
with gr.Tab("Search"):
with gr.Row():
with gr.Column():
gr.Markdown("### Filters")
repo_box = gr.Textbox(
label="Repository",
placeholder="e.g. huggingface/transformers",
value="huggingface/transformers",
)
pr_box = gr.Textbox(
label="PR number",
placeholder="e.g. 30232",
)
sha_box = gr.Textbox(
label="Commit SHA (prefix accepted)",
placeholder="e.g. 1234abcd",
)
with gr.Row():
failures_cb = gr.Checkbox(
label="Only show failing workflows",
value=True,
)
sort_dd = gr.Dropdown(
label="Sort order",
choices=["Newest first", "Oldest first"],
value="Newest first",
)
with gr.Row():
search_btn = gr.Button("πŸ” Search", variant="primary")
refresh_btn = gr.Button("♻️ Refresh dataset cache")
status = gr.Markdown("")
gr.Examples(
label="Quick examples",
examples=[
["huggingface/transformers", "30232", "", True, "Newest first"],
["huggingface/transformers", "", "", True, "Newest first"],
["huggingface/transformers", "", "abcd", False, "Newest first"],
],
inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd],
)
with gr.Column():
gr.Markdown("### Matching runs")
table = gr.Dataframe(
headers=[
"Repository",
"Branch",
"PR",
"Commit",
"Workflow ID",
"Failures",
],
wrap=True,
interactive=False,
height=300,
)
gr.Markdown("### Latest entry details")
json_view = gr.Code(
label="Payload (latest matching record)",
language="json",
lines=22,
)
# Wire up interactions
search_btn.click(
query,
inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd],
outputs=[table, json_view, status],
show_progress="full",
)
refresh_btn.click(
refresh_dataset,
outputs=status,
show_progress="minimal",
)
if __name__ == "__main__":
demo.queue(max_size=20).launch()