Spaces:
Sleeping
Sleeping
| """ | |
| Example script to query webhook messages from the dataset. | |
| This demonstrates how to load and analyze the batched parquet files. | |
| """ | |
| from datasets import load_dataset | |
| import json | |
| import pandas as pd | |
| # Load the dataset | |
| print("Loading webhook messages dataset...") | |
| dataset = load_dataset("assafvayner/webhook-messages", split="train") | |
| print(f"Total messages: {len(dataset)}") | |
| print(f"\nFirst message:") | |
| print("-" * 50) | |
| # Convert to pandas for easier querying | |
| df = dataset.to_pandas() | |
| # Display first message | |
| first_msg = df.iloc[0] | |
| print(f"Timestamp: {first_msg['timestamp']}") | |
| print(f"Event Type: {first_msg['event_type']}") | |
| print(f"Scope: {first_msg['scope']}") | |
| print(f"\nPayload:") | |
| payload = json.loads(first_msg['payload']) | |
| print(json.dumps(payload, indent=2)) | |
| print("\n" + "=" * 50) | |
| print("Summary Statistics:") | |
| print("=" * 50) | |
| # Event type distribution | |
| print("\nEvent Types:") | |
| print(df['event_type'].value_counts()) | |
| print("\nScope Distribution:") | |
| print(df['scope'].value_counts()) | |
| # Time range | |
| print(f"\nTime Range:") | |
| print(f" First message: {df['timestamp'].min()}") | |
| print(f" Last message: {df['timestamp'].max()}") | |
| # Example: Filter for specific event type | |
| print("\n" + "=" * 50) | |
| print("Example Query: Find all 'repo' scope events") | |
| print("=" * 50) | |
| repo_events = df[df['scope'] == 'repo'] | |
| print(f"Found {len(repo_events)} events") | |
| # Show sample payloads | |
| if len(repo_events) > 0: | |
| print("\nSample payload:") | |
| sample_payload = json.loads(repo_events.iloc[0]['payload']) | |
| print(json.dumps(sample_payload, indent=2)[:500] + "...") | |