ArthurZ HF Staff commited on
Commit
3601d7a
Β·
verified Β·
1 Parent(s): 8eea0da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -43
app.py CHANGED
@@ -14,6 +14,7 @@ MAX_ROWS = 200
14
 
15
  @lru_cache(maxsize=1)
16
  def _load_dataset() -> Dataset | None:
 
17
  try:
18
  return load_dataset(DATASET_ID, split="train")
19
  except Exception as error:
@@ -21,7 +22,27 @@ def _load_dataset() -> Dataset | None:
21
  return None
22
 
23
 
24
- def _filter_records(dataset: Dataset, repo: str, pr: str, sha: str) -> List[dict]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  repo = repo.strip().lower()
26
  pr = pr.strip()
27
  sha = sha.strip().lower()
@@ -33,34 +54,54 @@ def _filter_records(dataset: Dataset, repo: str, pr: str, sha: str) -> List[dict
33
  return False
34
  if sha and sha not in (example.get("commit_sha") or "").lower():
35
  return False
 
 
36
  return True
37
 
38
  items = [ex for ex in dataset if _matches(ex)]
39
- items.sort(key=lambda ex: ex.get("collected_at") or "", reverse=True)
40
  return items[:MAX_ROWS]
41
 
42
 
43
- def query(repo: str, pr: str, sha: str) -> Tuple[List[List[str]], str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  dataset = _load_dataset()
45
  if dataset is None:
46
- return [], json.dumps({"error": "Dataset not available"}, indent=2)
 
 
 
47
 
48
- records = _filter_records(dataset, repo, pr, sha)
49
- table_rows = []
 
 
50
  for record in records:
51
  table_rows.append(
52
  [
53
- record.get("collected_at", ""),
54
  record.get("repository", ""),
 
55
  record.get("pr_number", ""),
56
- record.get("commit_sha", "")[:12],
57
  record.get("workflow_id", ""),
58
  str(record.get("failure_count", 0)),
59
- str(record.get("job_count", 0)),
60
- str(record.get("test_count", 0)),
61
  ]
62
  )
63
 
 
64
  details = json.dumps({}, indent=2)
65
  if records:
66
  payload = records[0].get("payload_json", "{}")
@@ -69,54 +110,142 @@ def query(repo: str, pr: str, sha: str) -> Tuple[List[List[str]], str]:
69
  except json.JSONDecodeError:
70
  details = json.dumps({"error": "Unable to parse payload"}, indent=2)
71
 
72
- return table_rows, details
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
 
75
  def refresh_dataset() -> str:
 
76
  _load_dataset.cache_clear()
77
  dataset = _load_dataset()
78
  if dataset is None:
79
- return "Failed to refresh dataset."
80
- return f"Dataset refreshed with {len(dataset)} rows."
81
 
82
 
83
- with gr.Blocks() as demo:
 
 
 
84
  gr.Markdown(
85
  """
86
- # CircleCI Test Collection Helper
87
 
88
- Use the filters below to inspect CircleCI test aggregation records for the Transformers repository (or any
89
- repository that uploads data to the `transformers-community/circleci-test-results` dataset).
90
- """
 
 
91
  )
92
 
93
- with gr.Row():
94
- repo_box = gr.Textbox(label="Repository", placeholder="huggingface/transformers")
95
- pr_box = gr.Textbox(label="PR number")
96
- sha_box = gr.Textbox(label="Commit SHA (prefix accepted)")
97
-
98
- with gr.Row():
99
- search_btn = gr.Button("Search")
100
- refresh_btn = gr.Button("Refresh dataset cache")
101
-
102
- table = gr.Dataframe(
103
- headers=[
104
- "Collected at",
105
- "Repository",
106
- "PR",
107
- "Commit",
108
- "Workflow ID",
109
- "Failures",
110
- "Jobs",
111
- "Tests",
112
- ],
113
- wrap=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  )
115
- json_view = gr.Code(label="Latest entry details", language="json")
116
- status = gr.Markdown("")
117
 
118
- search_btn.click(query, inputs=[repo_box, pr_box, sha_box], outputs=[table, json_view])
119
- refresh_btn.click(refresh_dataset, outputs=status)
 
 
 
120
 
121
  if __name__ == "__main__":
122
  demo.queue(max_size=20).launch()
 
14
 
15
  @lru_cache(maxsize=1)
16
  def _load_dataset() -> Dataset | None:
17
+ """Lazy-load and cache the dataset."""
18
  try:
19
  return load_dataset(DATASET_ID, split="train")
20
  except Exception as error:
 
22
  return None
23
 
24
 
25
+ def _record_timestamp(record: dict) -> str:
26
+ """Best-effort extraction of a sortable timestamp from the payload."""
27
+ payload = record.get("payload_json")
28
+ if not payload:
29
+ return ""
30
+ try:
31
+ payload_dict = json.loads(payload)
32
+ except Exception:
33
+ return ""
34
+ metadata = payload_dict.get("metadata") or {}
35
+ return metadata.get("collected_at") or ""
36
+
37
+
38
+ def _filter_records(
39
+ dataset: Dataset,
40
+ repo: str,
41
+ pr: str,
42
+ sha: str,
43
+ only_failures: bool,
44
+ sort_desc: bool,
45
+ ) -> List[dict]:
46
  repo = repo.strip().lower()
47
  pr = pr.strip()
48
  sha = sha.strip().lower()
 
54
  return False
55
  if sha and sha not in (example.get("commit_sha") or "").lower():
56
  return False
57
+ if only_failures and (example.get("failure_count") or 0) <= 0:
58
+ return False
59
  return True
60
 
61
  items = [ex for ex in dataset if _matches(ex)]
62
+ items.sort(key=_record_timestamp, reverse=sort_desc)
63
  return items[:MAX_ROWS]
64
 
65
 
66
+ def query(
67
+ repo: str,
68
+ pr: str,
69
+ sha: str,
70
+ only_failures: bool,
71
+ sort_order: str,
72
+ ) -> Tuple[List[List[str]], str, str]:
73
+ """
74
+ Main query function used by the UI.
75
+
76
+ Returns:
77
+ - Table rows
78
+ - JSON details for latest entry
79
+ - Status message
80
+ """
81
  dataset = _load_dataset()
82
  if dataset is None:
83
+ error_json = json.dumps(
84
+ {"error": f"Dataset {DATASET_ID} not available"}, indent=2
85
+ )
86
+ return [], error_json, f"❌ Dataset `{DATASET_ID}` is not available."
87
 
88
+ sort_desc = sort_order == "Newest first"
89
+ records = _filter_records(dataset, repo, pr, sha, only_failures, sort_desc)
90
+
91
+ table_rows: List[List[str]] = []
92
  for record in records:
93
  table_rows.append(
94
  [
 
95
  record.get("repository", ""),
96
+ record.get("branch", ""),
97
  record.get("pr_number", ""),
98
+ (record.get("commit_sha") or "")[:12],
99
  record.get("workflow_id", ""),
100
  str(record.get("failure_count", 0)),
 
 
101
  ]
102
  )
103
 
104
+ # Default details
105
  details = json.dumps({}, indent=2)
106
  if records:
107
  payload = records[0].get("payload_json", "{}")
 
110
  except json.JSONDecodeError:
111
  details = json.dumps({"error": "Unable to parse payload"}, indent=2)
112
 
113
+ # Build a human-friendly status message
114
+ filters = []
115
+ if repo.strip():
116
+ filters.append(f"repo = `{repo.strip()}`")
117
+ if pr.strip():
118
+ filters.append(f"PR = `{pr.strip()}`")
119
+ if sha.strip():
120
+ filters.append(f"SHA ~ `{sha.strip()}`")
121
+ if only_failures:
122
+ filters.append("only failing workflows")
123
+
124
+ filter_text = ", ".join(filters) if filters else "no filters"
125
+ match_count = len(records)
126
+ dataset_size = len(dataset)
127
+
128
+ if match_count == 0:
129
+ status_msg = (
130
+ f"πŸ” No matching records found using {filter_text}. "
131
+ f"(Dataset size: {dataset_size} rows)"
132
+ )
133
+ else:
134
+ status_msg = (
135
+ f"βœ… Found {match_count} matching record(s) using {filter_text}. "
136
+ f"Showing up to {MAX_ROWS} most recent entries "
137
+ f"(dataset size: {dataset_size} rows, sorted: {sort_order.lower()})."
138
+ )
139
+
140
+ return table_rows, details, status_msg
141
 
142
 
143
  def refresh_dataset() -> str:
144
+ """Clear the cache and reload the dataset."""
145
  _load_dataset.cache_clear()
146
  dataset = _load_dataset()
147
  if dataset is None:
148
+ return f"❌ Failed to refresh dataset `{DATASET_ID}`."
149
+ return f"πŸ”„ Dataset refreshed with **{len(dataset)}** rows from `{DATASET_ID}`."
150
 
151
 
152
+ with gr.Blocks(
153
+ title="CircleCI Test Collection Helper",
154
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
155
+ ) as demo:
156
  gr.Markdown(
157
  """
158
+ # πŸ§ͺ CircleCI Test Collection Helper
159
 
160
+ Explore CircleCI test aggregation records stored in the
161
+ `transformers-community/circleci-test-results` dataset.
162
+
163
+ Use the filters below to quickly inspect runs for a specific repository, PR, or commit SHA.
164
+ """,
165
  )
166
 
167
+ with gr.Tab("Search"):
168
+ with gr.Row():
169
+ with gr.Column():
170
+ gr.Markdown("### Filters")
171
+
172
+ repo_box = gr.Textbox(
173
+ label="Repository",
174
+ placeholder="e.g. huggingface/transformers",
175
+ value="huggingface/transformers",
176
+ )
177
+ pr_box = gr.Textbox(
178
+ label="PR number",
179
+ placeholder="e.g. 30232",
180
+ )
181
+ sha_box = gr.Textbox(
182
+ label="Commit SHA (prefix accepted)",
183
+ placeholder="e.g. 1234abcd",
184
+ )
185
+
186
+ with gr.Row():
187
+ failures_cb = gr.Checkbox(
188
+ label="Only show failing workflows",
189
+ value=True,
190
+ )
191
+ sort_dd = gr.Dropdown(
192
+ label="Sort order",
193
+ choices=["Newest first", "Oldest first"],
194
+ value="Newest first",
195
+ )
196
+
197
+ with gr.Row():
198
+ search_btn = gr.Button("πŸ” Search", variant="primary")
199
+ refresh_btn = gr.Button("♻️ Refresh dataset cache")
200
+
201
+ status = gr.Markdown("")
202
+
203
+ gr.Examples(
204
+ label="Quick examples",
205
+ examples=[
206
+ ["huggingface/transformers", "30232", "", True, "Newest first"],
207
+ ["huggingface/transformers", "", "", True, "Newest first"],
208
+ ["huggingface/transformers", "", "abcd", False, "Newest first"],
209
+ ],
210
+ inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd],
211
+ )
212
+
213
+ with gr.Column():
214
+ gr.Markdown("### Matching runs")
215
+ table = gr.Dataframe(
216
+ headers=[
217
+ "Repository",
218
+ "Branch",
219
+ "PR",
220
+ "Commit",
221
+ "Workflow ID",
222
+ "Failures",
223
+ ],
224
+ wrap=True,
225
+ interactive=False,
226
+ height=300,
227
+ )
228
+
229
+ gr.Markdown("### Latest entry details")
230
+ json_view = gr.Code(
231
+ label="Payload (latest matching record)",
232
+ language="json",
233
+ lines=22,
234
+ )
235
+
236
+ # Wire up interactions
237
+ search_btn.click(
238
+ query,
239
+ inputs=[repo_box, pr_box, sha_box, failures_cb, sort_dd],
240
+ outputs=[table, json_view, status],
241
+ show_progress="full",
242
  )
 
 
243
 
244
+ refresh_btn.click(
245
+ refresh_dataset,
246
+ outputs=status,
247
+ show_progress="minimal",
248
+ )
249
 
250
  if __name__ == "__main__":
251
  demo.queue(max_size=20).launch()