Spaces:

assafvayner
/

webhook-processing

Sleeping

webhook-processing / query_example.py

Add HuggingFace webhook processor Gradio app

41d63c9 6 days ago

1.56 kB

	"""
	Example script to query webhook messages from the dataset.

	This demonstrates how to load and analyze the batched parquet files.
	"""
	from datasets import load_dataset
	import json
	import pandas as pd

	# Load the dataset
	print("Loading webhook messages dataset...")
	dataset = load_dataset("assafvayner/webhook-messages", split="train")

	print(f"Total messages: {len(dataset)}")
	print(f"\nFirst message:")
	print("-" * 50)

	# Convert to pandas for easier querying
	df = dataset.to_pandas()

	# Display first message
	first_msg = df.iloc[0]
	print(f"Timestamp: {first_msg['timestamp']}")
	print(f"Event Type: {first_msg['event_type']}")
	print(f"Scope: {first_msg['scope']}")
	print(f"\nPayload:")
	payload = json.loads(first_msg['payload'])
	print(json.dumps(payload, indent=2))

	print("\n" + "=" * 50)
	print("Summary Statistics:")
	print("=" * 50)

	# Event type distribution
	print("\nEvent Types:")
	print(df['event_type'].value_counts())

	print("\nScope Distribution:")
	print(df['scope'].value_counts())

	# Time range
	print(f"\nTime Range:")
	print(f" First message: {df['timestamp'].min()}")
	print(f" Last message: {df['timestamp'].max()}")

	# Example: Filter for specific event type
	print("\n" + "=" * 50)
	print("Example Query: Find all 'repo' scope events")
	print("=" * 50)
	repo_events = df[df['scope'] == 'repo']
	print(f"Found {len(repo_events)} events")

	# Show sample payloads
	if len(repo_events) > 0:
	print("\nSample payload:")
	sample_payload = json.loads(repo_events.iloc[0]['payload'])
	print(json.dumps(sample_payload, indent=2)[:500] + "...")