|
|
""" |
|
|
Example script showing how to use the SAM2 Video Background Remover API. |
|
|
|
|
|
This script demonstrates various use cases: |
|
|
1. Simple single object tracking |
|
|
2. Multiple object tracking |
|
|
3. Refined segmentation with background points |
|
|
4. Batch processing multiple videos |
|
|
""" |
|
|
|
|
|
from gradio_client import Client |
|
|
import json |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
def example_1_simple_tracking(): |
|
|
""" |
|
|
Example 1: Track a single object (e.g., person, ball, car) |
|
|
""" |
|
|
print("=" * 60) |
|
|
print("Example 1: Simple Single Object Tracking") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
|
|
|
annotations = [ |
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [[320, 240]], |
|
|
"labels": [1] |
|
|
} |
|
|
] |
|
|
|
|
|
|
|
|
result = client.predict( |
|
|
video_file="./input_video.mp4", |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=True, |
|
|
max_frames=None, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
|
|
|
print(f"β
Output saved to: {result}") |
|
|
|
|
|
|
|
|
def example_2_multi_object_tracking(): |
|
|
""" |
|
|
Example 2: Track multiple objects simultaneously |
|
|
Useful for: tracking player + ball, multiple people, etc. |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 2: Multi-Object Tracking") |
|
|
print("=" * 60) |
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
annotations = [ |
|
|
|
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [[320, 240]], |
|
|
"labels": [1] |
|
|
}, |
|
|
|
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 2, |
|
|
"points": [[500, 300]], |
|
|
"labels": [1] |
|
|
}, |
|
|
|
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 3, |
|
|
"points": [[150, 200]], |
|
|
"labels": [1] |
|
|
} |
|
|
] |
|
|
|
|
|
result = client.predict( |
|
|
video_file="./soccer_match.mp4", |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=True, |
|
|
max_frames=300, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
|
|
|
print(f"β
Tracked 3 objects! Output: {result}") |
|
|
|
|
|
|
|
|
def example_3_refined_segmentation(): |
|
|
""" |
|
|
Example 3: Use both foreground AND background points for better accuracy |
|
|
Useful when: object is complex, background is similar color, etc. |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 3: Refined Segmentation with Negative Points") |
|
|
print("=" * 60) |
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
annotations = [ |
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [ |
|
|
[320, 240], |
|
|
[350, 250], |
|
|
[280, 220], |
|
|
[100, 100], |
|
|
[600, 400] |
|
|
], |
|
|
"labels": [ |
|
|
1, |
|
|
1, |
|
|
1, |
|
|
0, |
|
|
0 |
|
|
] |
|
|
} |
|
|
] |
|
|
|
|
|
result = client.predict( |
|
|
video_file="./person_video.mp4", |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=True, |
|
|
max_frames=None, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
|
|
|
print(f"β
Refined segmentation complete: {result}") |
|
|
|
|
|
|
|
|
def example_4_temporal_annotations(): |
|
|
""" |
|
|
Example 4: Add annotations on multiple frames |
|
|
Useful when: object changes appearance, camera cuts, occlusions |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 4: Multi-Frame Annotations") |
|
|
print("=" * 60) |
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
annotations = [ |
|
|
|
|
|
{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [[320, 240]], |
|
|
"labels": [1] |
|
|
}, |
|
|
|
|
|
{ |
|
|
"frame_idx": 50, |
|
|
"object_id": 1, |
|
|
"points": [[450, 300]], |
|
|
"labels": [1] |
|
|
}, |
|
|
|
|
|
{ |
|
|
"frame_idx": 100, |
|
|
"object_id": 1, |
|
|
"points": [[200, 180]], |
|
|
"labels": [1] |
|
|
} |
|
|
] |
|
|
|
|
|
result = client.predict( |
|
|
video_file="./long_video.mp4", |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=True, |
|
|
max_frames=None, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
|
|
|
print(f"β
Multi-frame tracking complete: {result}") |
|
|
|
|
|
|
|
|
def example_5_batch_processing(): |
|
|
""" |
|
|
Example 5: Process multiple videos in batch |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 5: Batch Processing Multiple Videos") |
|
|
print("=" * 60) |
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
|
|
|
videos = [ |
|
|
{"path": "./video1.mp4", "point": [320, 240]}, |
|
|
{"path": "./video2.mp4", "point": [400, 300]}, |
|
|
{"path": "./video3.mp4", "point": [250, 200]}, |
|
|
] |
|
|
|
|
|
results = [] |
|
|
|
|
|
for i, video in enumerate(videos, 1): |
|
|
print(f"\nProcessing video {i}/{len(videos)}: {video['path']}") |
|
|
|
|
|
annotations = [{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [video['point']], |
|
|
"labels": [1] |
|
|
}] |
|
|
|
|
|
try: |
|
|
result = client.predict( |
|
|
video_file=video['path'], |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=True, |
|
|
max_frames=200, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
results.append({"input": video['path'], "output": result, "status": "β
"}) |
|
|
print(f" β
Success: {result}") |
|
|
except Exception as e: |
|
|
results.append({"input": video['path'], "output": None, "status": f"β {str(e)}"}) |
|
|
print(f" β Failed: {e}") |
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Batch Processing Summary:") |
|
|
print("=" * 60) |
|
|
for r in results: |
|
|
print(f"{r['status']} {r['input']} -> {r['output']}") |
|
|
|
|
|
|
|
|
def example_6_highlight_mode(): |
|
|
""" |
|
|
Example 6: Highlight objects instead of removing background |
|
|
Useful for: visualization, debugging, object detection demos |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 6: Highlight Mode (Keep Background)") |
|
|
print("=" * 60) |
|
|
|
|
|
client = Client("furbola/chaskick") |
|
|
|
|
|
annotations = [{ |
|
|
"frame_idx": 0, |
|
|
"object_id": 1, |
|
|
"points": [[320, 240]], |
|
|
"labels": [1] |
|
|
}] |
|
|
|
|
|
result = client.predict( |
|
|
video_file="./input_video.mp4", |
|
|
annotations_json=json.dumps(annotations), |
|
|
remove_background=False, |
|
|
max_frames=None, |
|
|
api_name="/segment_video_api" |
|
|
) |
|
|
|
|
|
print(f"β
Object highlighted: {result}") |
|
|
|
|
|
|
|
|
def example_7_find_coordinates(): |
|
|
""" |
|
|
Example 7: Helper to find coordinates in a video |
|
|
Opens the first frame so you can identify x,y coordinates |
|
|
""" |
|
|
print("\n" + "=" * 60) |
|
|
print("Example 7: Find Coordinates Helper") |
|
|
print("=" * 60) |
|
|
|
|
|
import cv2 |
|
|
|
|
|
video_path = "./input_video.mp4" |
|
|
|
|
|
|
|
|
cap = cv2.VideoCapture(video_path) |
|
|
ret, frame = cap.read() |
|
|
cap.release() |
|
|
|
|
|
if ret: |
|
|
|
|
|
cv2.imwrite("first_frame.jpg", frame) |
|
|
print(f"β
Saved first frame to: first_frame.jpg") |
|
|
print(f" Video size: {frame.shape[1]}x{frame.shape[0]} (width x height)") |
|
|
print(f" Open this image and note the x,y coordinates of your object") |
|
|
print(f" Then use those coordinates in your annotation!") |
|
|
else: |
|
|
print("β Could not read video") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_annotation(frame_idx, object_id, points, labels=None): |
|
|
""" |
|
|
Helper function to create annotation objects. |
|
|
|
|
|
Args: |
|
|
frame_idx: Frame number (0 = first frame) |
|
|
object_id: Unique object ID (1, 2, 3, ...) |
|
|
points: List of [x, y] coordinates, e.g., [[320, 240]] |
|
|
labels: List of labels (1=foreground, 0=background). Defaults to all 1s. |
|
|
|
|
|
Returns: |
|
|
Dictionary with annotation |
|
|
""" |
|
|
if labels is None: |
|
|
labels = [1] * len(points) |
|
|
|
|
|
return { |
|
|
"frame_idx": frame_idx, |
|
|
"object_id": object_id, |
|
|
"points": points, |
|
|
"labels": labels |
|
|
} |
|
|
|
|
|
|
|
|
def load_annotations_from_file(json_file): |
|
|
"""Load annotations from a JSON file.""" |
|
|
with open(json_file, 'r') as f: |
|
|
return json.load(f) |
|
|
|
|
|
|
|
|
def save_annotations_to_file(annotations, json_file): |
|
|
"""Save annotations to a JSON file.""" |
|
|
with open(json_file, 'w') as f: |
|
|
json.dump(annotations, f, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print(""" |
|
|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
β SAM2 Video Background Remover - API Examples β |
|
|
β Choose an example to run or uncomment in the code β |
|
|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\nβ
Done! Check the output files.") |
|
|
print("\nπ Your Space: https://huggingface.co/spaces/furbola/chaskick") |
|
|
|
|
|
|