Spaces:

furbola
/

chaskick

Sleeping

File size: 10,634 Bytes

3cddaf8

"""
Example script showing how to use the SAM2 Video Background Remover API.

This script demonstrates various use cases:
1. Simple single object tracking
2. Multiple object tracking
3. Refined segmentation with background points
4. Batch processing multiple videos
"""

from gradio_client import Client
import json
from pathlib import Path


def example_1_simple_tracking():
    """
    Example 1: Track a single object (e.g., person, ball, car)
    """
    print("=" * 60)
    print("Example 1: Simple Single Object Tracking")
    print("=" * 60)
    
    # Connect to your Space
    client = Client("furbola/chaskick")
    
    # Simple annotation: click on the center of your object in the first frame
    annotations = [
        {
            "frame_idx": 0,      # First frame
            "object_id": 1,      # First object
            "points": [[320, 240]],  # x, y coordinates of the object center
            "labels": [1]        # 1 = this is a foreground point
        }
    ]
    
    # Process the video
    result = client.predict(
        video_file="./input_video.mp4",
        annotations_json=json.dumps(annotations),
        remove_background=True,
        max_frames=None,  # Process all frames
        api_name="/segment_video_api"
    )
    
    print(f"✅ Output saved to: {result}")


def example_2_multi_object_tracking():
    """
    Example 2: Track multiple objects simultaneously
    Useful for: tracking player + ball, multiple people, etc.
    """
    print("\n" + "=" * 60)
    print("Example 2: Multi-Object Tracking")
    print("=" * 60)
    
    client = Client("furbola/chaskick")
    
    annotations = [
        # Object 1: Player
        {
            "frame_idx": 0,
            "object_id": 1,
            "points": [[320, 240]],
            "labels": [1]
        },
        # Object 2: Ball
        {
            "frame_idx": 0,
            "object_id": 2,
            "points": [[500, 300]],
            "labels": [1]
        },
        # Object 3: Another player
        {
            "frame_idx": 0,
            "object_id": 3,
            "points": [[150, 200]],
            "labels": [1]
        }
    ]
    
    result = client.predict(
        video_file="./soccer_match.mp4",
        annotations_json=json.dumps(annotations),
        remove_background=True,
        max_frames=300,  # Limit to 300 frames for speed
        api_name="/segment_video_api"
    )
    
    print(f"✅ Tracked 3 objects! Output: {result}")


def example_3_refined_segmentation():
    """
    Example 3: Use both foreground AND background points for better accuracy
    Useful when: object is complex, background is similar color, etc.
    """
    print("\n" + "=" * 60)
    print("Example 3: Refined Segmentation with Negative Points")
    print("=" * 60)
    
    client = Client("furbola/chaskick")
    
    annotations = [
        {
            "frame_idx": 0,
            "object_id": 1,
            "points": [
                [320, 240],  # ✅ Point ON the person's body
                [350, 250],  # ✅ Another point on the person
                [280, 220],  # ✅ Third point for better coverage
                [100, 100],  # ❌ Point on the BACKGROUND to exclude
                [600, 400]   # ❌ Another background point
            ],
            "labels": [
                1,  # foreground
                1,  # foreground
                1,  # foreground
                0,  # background (exclude this area)
                0   # background (exclude this area)
            ]
        }
    ]
    
    result = client.predict(
        video_file="./person_video.mp4",
        annotations_json=json.dumps(annotations),
        remove_background=True,
        max_frames=None,
        api_name="/segment_video_api"
    )
    
    print(f"✅ Refined segmentation complete: {result}")


def example_4_temporal_annotations():
    """
    Example 4: Add annotations on multiple frames
    Useful when: object changes appearance, camera cuts, occlusions
    """
    print("\n" + "=" * 60)
    print("Example 4: Multi-Frame Annotations")
    print("=" * 60)
    
    client = Client("furbola/chaskick")
    
    annotations = [
        # Annotate frame 0
        {
            "frame_idx": 0,
            "object_id": 1,
            "points": [[320, 240]],
            "labels": [1]
        },
        # Annotate frame 50 (object might have moved or changed)
        {
            "frame_idx": 50,
            "object_id": 1,
            "points": [[450, 300]],
            "labels": [1]
        },
        # Annotate frame 100 (after a camera cut or scene change)
        {
            "frame_idx": 100,
            "object_id": 1,
            "points": [[200, 180]],
            "labels": [1]
        }
    ]
    
    result = client.predict(
        video_file="./long_video.mp4",
        annotations_json=json.dumps(annotations),
        remove_background=True,
        max_frames=None,
        api_name="/segment_video_api"
    )
    
    print(f"✅ Multi-frame tracking complete: {result}")


def example_5_batch_processing():
    """
    Example 5: Process multiple videos in batch
    """
    print("\n" + "=" * 60)
    print("Example 5: Batch Processing Multiple Videos")
    print("=" * 60)
    
    client = Client("furbola/chaskick")
    
    # List of videos to process
    videos = [
        {"path": "./video1.mp4", "point": [320, 240]},
        {"path": "./video2.mp4", "point": [400, 300]},
        {"path": "./video3.mp4", "point": [250, 200]},
    ]
    
    results = []
    
    for i, video in enumerate(videos, 1):
        print(f"\nProcessing video {i}/{len(videos)}: {video['path']}")
        
        annotations = [{
            "frame_idx": 0,
            "object_id": 1,
            "points": [video['point']],
            "labels": [1]
        }]
        
        try:
            result = client.predict(
                video_file=video['path'],
                annotations_json=json.dumps(annotations),
                remove_background=True,
                max_frames=200,  # Limit frames for faster batch processing
                api_name="/segment_video_api"
            )
            results.append({"input": video['path'], "output": result, "status": "✅"})
            print(f"  ✅ Success: {result}")
        except Exception as e:
            results.append({"input": video['path'], "output": None, "status": f"❌ {str(e)}"})
            print(f"  ❌ Failed: {e}")
    
    print("\n" + "=" * 60)
    print("Batch Processing Summary:")
    print("=" * 60)
    for r in results:
        print(f"{r['status']} {r['input']} -> {r['output']}")


def example_6_highlight_mode():
    """
    Example 6: Highlight objects instead of removing background
    Useful for: visualization, debugging, object detection demos
    """
    print("\n" + "=" * 60)
    print("Example 6: Highlight Mode (Keep Background)")
    print("=" * 60)
    
    client = Client("furbola/chaskick")
    
    annotations = [{
        "frame_idx": 0,
        "object_id": 1,
        "points": [[320, 240]],
        "labels": [1]
    }]
    
    result = client.predict(
        video_file="./input_video.mp4",
        annotations_json=json.dumps(annotations),
        remove_background=False,  # Keep background, just highlight the object
        max_frames=None,
        api_name="/segment_video_api"
    )
    
    print(f"✅ Object highlighted: {result}")


def example_7_find_coordinates():
    """
    Example 7: Helper to find coordinates in a video
    Opens the first frame so you can identify x,y coordinates
    """
    print("\n" + "=" * 60)
    print("Example 7: Find Coordinates Helper")
    print("=" * 60)
    
    import cv2
    
    video_path = "./input_video.mp4"
    
    # Read first frame
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    cap.release()
    
    if ret:
        # Save first frame
        cv2.imwrite("first_frame.jpg", frame)
        print(f"✅ Saved first frame to: first_frame.jpg")
        print(f"   Video size: {frame.shape[1]}x{frame.shape[0]} (width x height)")
        print(f"   Open this image and note the x,y coordinates of your object")
        print(f"   Then use those coordinates in your annotation!")
    else:
        print("❌ Could not read video")


# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def create_annotation(frame_idx, object_id, points, labels=None):
    """
    Helper function to create annotation objects.
    
    Args:
        frame_idx: Frame number (0 = first frame)
        object_id: Unique object ID (1, 2, 3, ...)
        points: List of [x, y] coordinates, e.g., [[320, 240]]
        labels: List of labels (1=foreground, 0=background). Defaults to all 1s.
    
    Returns:
        Dictionary with annotation
    """
    if labels is None:
        labels = [1] * len(points)
    
    return {
        "frame_idx": frame_idx,
        "object_id": object_id,
        "points": points,
        "labels": labels
    }


def load_annotations_from_file(json_file):
    """Load annotations from a JSON file."""
    with open(json_file, 'r') as f:
        return json.load(f)


def save_annotations_to_file(annotations, json_file):
    """Save annotations to a JSON file."""
    with open(json_file, 'w') as f:
        json.dump(annotations, f, indent=2)


# ============================================================================
# MAIN
# ============================================================================

if __name__ == "__main__":
    print("""
    ╔════════════════════════════════════════════════════════════╗
    ║  SAM2 Video Background Remover - API Examples              ║
    ║  Choose an example to run or uncomment in the code         ║
    ╚════════════════════════════════════════════════════════════╝
    """)
    
    # Uncomment the examples you want to run:
    
    # example_1_simple_tracking()
    # example_2_multi_object_tracking()
    # example_3_refined_segmentation()
    # example_4_temporal_annotations()
    # example_5_batch_processing()
    # example_6_highlight_mode()
    # example_7_find_coordinates()
    
    print("\n✅ Done! Check the output files.")
    print("\n🎉 Your Space: https://huggingface.co/spaces/furbola/chaskick")