|
|
import json |
|
|
import os |
|
|
from functools import lru_cache |
|
|
from typing import List, Tuple |
|
|
|
|
|
import gradio as gr |
|
|
from datasets import Dataset, load_dataset |
|
|
|
|
|
DATASET_ID = os.environ.get( |
|
|
"CIRCLECI_RESULTS_DATASET_ID", "transformers-community/circleci-test-results" |
|
|
) |
|
|
MAX_ROWS = 200 |
|
|
|
|
|
|
|
|
@lru_cache(maxsize=1) |
|
|
def _load_dataset() -> Dataset | None: |
|
|
"""Lazy-load and cache the dataset.""" |
|
|
try: |
|
|
return load_dataset(DATASET_ID, split="train") |
|
|
except Exception as error: |
|
|
print(f"Failed to load dataset {DATASET_ID}: {error}") |
|
|
return None |
|
|
|
|
|
|
|
|
def _record_timestamp(record: dict) -> str: |
|
|
"""Best-effort extraction of a sortable timestamp from the payload.""" |
|
|
payload = record.get("payload_json") |
|
|
if not payload: |
|
|
return "" |
|
|
try: |
|
|
payload_dict = json.loads(payload) |
|
|
except Exception: |
|
|
return "" |
|
|
metadata = payload_dict.get("metadata") or {} |
|
|
return metadata.get("collected_at") or "" |
|
|
|
|
|
|
|
|
def _filter_records( |
|
|
dataset: Dataset, |
|
|
repo: str, |
|
|
pr: str, |
|
|
sha: str, |
|
|
only_failures: bool, |
|
|
sort_desc: bool, |
|
|
) -> List[dict]: |
|
|
repo = repo.strip().lower() |
|
|
pr = pr.strip() |
|
|
sha = sha.strip().lower() |
|
|
|
|
|
def _matches(example: dict) -> bool: |
|
|
if repo and repo not in (example.get("repository") or "").lower(): |
|
|
return False |
|
|
if pr and pr != (example.get("pr_number") or ""): |
|
|
return False |
|
|
if sha and sha not in (example.get("commit_sha") or "").lower(): |
|
|
return False |
|
|
if only_failures and (example.get("failure_count") or 0) <= 0: |
|
|
return False |
|
|
return True |
|
|
|
|
|
items = [ex for ex in dataset if _matches(ex)] |
|
|
items.sort(key=_record_timestamp, reverse=sort_desc) |
|
|
return items[:MAX_ROWS] |
|
|
|
|
|
|
|
|
def query( |
|
|
repo: str, |
|
|
pr: str, |
|
|
sha: str, |
|
|
only_failures: bool, |
|
|
sort_order: str, |
|
|
) -> Tuple[List[List[str]], str, str]: |
|
|
""" |
|
|
Main query function used by the UI. |
|
|
|
|
|
Returns: |
|
|
- Table rows |
|
|
- JSON details for latest entry |
|
|
- Status message |
|
|
""" |
|
|
dataset = _load_dataset() |
|
|
if dataset is None: |
|
|
error_json = json.dumps( |
|
|
{"error": f"Dataset {DATASET_ID} not available"}, indent=2 |
|
|
) |
|
|
return [], error_json, f"β Dataset `{DATASET_ID}` is not available." |
|
|
|
|
|
sort_desc = sort_order == "Newest first" |
|
|
records = _filter_records(dataset, repo, pr, sha, only_failures, sort_desc) |
|
|
|
|
|
table_rows: List[List[str]] = [] |
|
|
for record in records: |
|
|
table_rows.append( |
|
|
[ |
|
|
record.get("repository", ""), |
|
|
record.get("branch", ""), |
|
|
record.get("pr_number", ""), |
|
|
(record.get("commit_sha") or "")[:12], |
|
|
record.get("workflow_id", ""), |
|
|
str(record.get("failure_count", 0)), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
details = json.dumps({}, indent=2) |
|
|
if records: |
|
|
payload = records[0].get("payload_json", "{}") |
|
|
try: |
|
|
details = json.dumps(json.loads(payload), indent=2) |
|
|
except json.JSONDecodeError: |
|
|
details = json.dumps({"error": "Unable to parse payload"}, indent=2) |
|
|
|
|
|
|
|
|
filters = [] |
|
|
if repo.strip(): |
|
|
filters.append(f"repo = `{repo.strip()}`") |
|
|
if pr.strip(): |
|
|
filters.append(f"PR = `{pr.strip()}`") |
|
|
if sha.strip(): |
|
|
filters.append(f"SHA ~ `{sha.strip()}`") |
|
|
if only_failures: |
|
|
filters.append("only failing workflows") |
|
|
|
|
|
filter_text = ", ".join(filters) if filters else "no filters" |
|
|
match_count = len(records) |
|
|
dataset_size = len(dataset) |
|
|
|
|
|
if match_count == 0: |
|
|
status_msg = ( |
|
|
f"π No matching records found using {filter_text}. " |
|
|
f"(Dataset size: {dataset_size} rows)" |
|
|
) |
|
|
else: |
|
|
status_msg = ( |
|
|
f"β
Found {match_count} matching record(s) using {filter_text}. " |
|
|
f"Showing up to {MAX_ROWS} most recent entries " |
|
|
f"(dataset size: {dataset_size} rows, sorted: {sort_order.lower()})." |
|
|
) |
|
|
|
|
|
return table_rows, details, status_msg |
|
|
|
|
|
|
|
|
def refresh_dataset() -> str: |
|
|
"""Clear the cache and reload the dataset.""" |
|
|
_load_dataset.cache_clear() |
|
|
dataset = _load_dataset() |
|
|
if dataset is None: |
|
|
return f"β Failed to refresh dataset `{DATASET_ID}`." |
|
|
return f"π Dataset refreshed with **{len(dataset)}** rows from `{DATASET_ID}`." |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="CircleCI Test Collection Helper", |
|
|
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), |
|
|
) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# π§ͺ CircleCI Test Collection Helper |
|
|
|
|
|
Explore CircleCI test aggregation records stored in the |
|
|
`transformers-community/circleci-test-results` dataset. |
|
|
|
|
|
Use the filters below to quickly inspect runs for a specific repository, PR, or commit SHA. |
|
|
""", |
|
|
) |
|
|
|
|
|
with gr.Tab("Search"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### Filters") |
|
|
|
|
|
repo_box = gr.Textbox( |
|
|
label="Repository", |
|
|
placeholder="e.g. huggingface/transformers", |
|
|
value="huggingface/transformers", |
|
|
) |
|
|
pr_box = gr.Textbox( |
|
|
label="PR number", |
|
|
placeholder="e.g. 30232", |
|
|
) |
|
|
sha_box = gr.Textbox( |
|
|
label="Commit SHA (prefix accepted)", |
|
|
placeholder="e.g. 1234abcd", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
failures_cb = gr.Checkbox( |
|
|
label="Only show failing workflows", |
|
|
value=True, |
|
|
) |
|
|
sort_dd = gr.Dropdown( |
|
|
label="Sort order", |
|
|
choices=["Newest first", "Oldest first"], |
|
|
value="Newest first", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
search_btn = gr.Button("π Search", variant="primary") |
|
|
refresh_btn = gr.Button("β»οΈ Refresh dataset cache") |
|
|
|
|
|
status = gr.Markdown("") |
|
|
|
|
|
gr.Examples( |
|
|
label="Quick examples", |
|
|
examples=[ |
|
|
["huggingface/transformers", "30232", "", True, "Newest first"], |
|
|
["huggingface/transformers", "", "", True, "Newest first"], |
|
|
["huggingface/transformers", "", "abcd", False, "Newest first"], |
|
|
], |
|
|
inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd], |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### Matching runs") |
|
|
table = gr.Dataframe( |
|
|
headers=[ |
|
|
"Repository", |
|
|
"Branch", |
|
|
"PR", |
|
|
"Commit", |
|
|
"Workflow ID", |
|
|
"Failures", |
|
|
], |
|
|
wrap=True, |
|
|
interactive=False, |
|
|
height=300, |
|
|
) |
|
|
|
|
|
gr.Markdown("### Latest entry details") |
|
|
json_view = gr.Code( |
|
|
label="Payload (latest matching record)", |
|
|
language="json", |
|
|
lines=22, |
|
|
) |
|
|
|
|
|
|
|
|
search_btn.click( |
|
|
query, |
|
|
inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd], |
|
|
outputs=[table, json_view, status], |
|
|
show_progress="full", |
|
|
) |
|
|
|
|
|
refresh_btn.click( |
|
|
refresh_dataset, |
|
|
outputs=status, |
|
|
show_progress="minimal", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue(max_size=20).launch() |
|
|
|