webhook-processing / query_example.py
assafvayner's picture
assafvayner HF Staff
Add HuggingFace webhook processor Gradio app
41d63c9
"""
Example script to query webhook messages from the dataset.
This demonstrates how to load and analyze the batched parquet files.
"""
from datasets import load_dataset
import json
import pandas as pd
# Load the dataset
print("Loading webhook messages dataset...")
dataset = load_dataset("assafvayner/webhook-messages", split="train")
print(f"Total messages: {len(dataset)}")
print(f"\nFirst message:")
print("-" * 50)
# Convert to pandas for easier querying
df = dataset.to_pandas()
# Display first message
first_msg = df.iloc[0]
print(f"Timestamp: {first_msg['timestamp']}")
print(f"Event Type: {first_msg['event_type']}")
print(f"Scope: {first_msg['scope']}")
print(f"\nPayload:")
payload = json.loads(first_msg['payload'])
print(json.dumps(payload, indent=2))
print("\n" + "=" * 50)
print("Summary Statistics:")
print("=" * 50)
# Event type distribution
print("\nEvent Types:")
print(df['event_type'].value_counts())
print("\nScope Distribution:")
print(df['scope'].value_counts())
# Time range
print(f"\nTime Range:")
print(f" First message: {df['timestamp'].min()}")
print(f" Last message: {df['timestamp'].max()}")
# Example: Filter for specific event type
print("\n" + "=" * 50)
print("Example Query: Find all 'repo' scope events")
print("=" * 50)
repo_events = df[df['scope'] == 'repo']
print(f"Found {len(repo_events)} events")
# Show sample payloads
if len(repo_events) > 0:
print("\nSample payload:")
sample_payload = json.loads(repo_events.iloc[0]['payload'])
print(json.dumps(sample_payload, indent=2)[:500] + "...")