Spaces:
Sleeping
Sleeping
File size: 1,558 Bytes
41d63c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""
Example script to query webhook messages from the dataset.
This demonstrates how to load and analyze the batched parquet files.
"""
from datasets import load_dataset
import json
import pandas as pd
# Load the dataset
print("Loading webhook messages dataset...")
dataset = load_dataset("assafvayner/webhook-messages", split="train")
print(f"Total messages: {len(dataset)}")
print(f"\nFirst message:")
print("-" * 50)
# Convert to pandas for easier querying
df = dataset.to_pandas()
# Display first message
first_msg = df.iloc[0]
print(f"Timestamp: {first_msg['timestamp']}")
print(f"Event Type: {first_msg['event_type']}")
print(f"Scope: {first_msg['scope']}")
print(f"\nPayload:")
payload = json.loads(first_msg['payload'])
print(json.dumps(payload, indent=2))
print("\n" + "=" * 50)
print("Summary Statistics:")
print("=" * 50)
# Event type distribution
print("\nEvent Types:")
print(df['event_type'].value_counts())
print("\nScope Distribution:")
print(df['scope'].value_counts())
# Time range
print(f"\nTime Range:")
print(f" First message: {df['timestamp'].min()}")
print(f" Last message: {df['timestamp'].max()}")
# Example: Filter for specific event type
print("\n" + "=" * 50)
print("Example Query: Find all 'repo' scope events")
print("=" * 50)
repo_events = df[df['scope'] == 'repo']
print(f"Found {len(repo_events)} events")
# Show sample payloads
if len(repo_events) > 0:
print("\nSample payload:")
sample_payload = json.loads(repo_events.iloc[0]['payload'])
print(json.dumps(sample_payload, indent=2)[:500] + "...")
|