Spaces:

akhaliq
/

sam-3d-body

Runtime error

File size: 10,235 Bytes

2c2a833

"""
SAM 3D Body Gradio App - ZeroGPU Compatible
This app handles all dependencies and provides a user-friendly interface for 3D body estimation.
Optimized for Hugging Face Spaces with ZeroGPU support.
"""

import os
import sys
import subprocess
import importlib.util

def check_and_install_package(package_name, import_name=None, pip_name=None):
    """Check if a package is installed, if not, install it."""
    if import_name is None:
        import_name = package_name
    if pip_name is None:
        pip_name = package_name
    
    spec = importlib.util.find_spec(import_name)
    if spec is None:
        print(f"Installing {package_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name, "-q"])
        print(f"✓ {package_name} installed successfully")
    return True

# Install core dependencies
print("Checking and installing dependencies...")
check_and_install_package("gradio")
check_and_install_package("spaces")  # ZeroGPU support
check_and_install_package("torch", pip_name="torch torchvision torchaudio")
check_and_install_package("pytorch_lightning", "pytorch_lightning")
check_and_install_package("cv2", "cv2", "opencv-python")
check_and_install_package("numpy")
check_and_install_package("PIL", "PIL", "Pillow")
check_and_install_package("huggingface_hub")

# Install additional dependencies
additional_deps = [
    "pyrender", "yacs", "scikit-image", "einops", "timm", "dill", 
    "pandas", "rich", "hydra-core", "pyrootutils", "webdataset", 
    "networkx==3.2.1", "roma", "joblib", "seaborn", "loguru", 
    "pycocotools", "fvcore"
]

for dep in additional_deps:
    try:
        pkg_name = dep.split("==")[0].replace("-", "_")
        check_and_install_package(pkg_name, pip_name=dep)
    except:
        pass

print("Core dependencies installed!")

import gradio as gr
import cv2
import numpy as np
from PIL import Image
import torch
import spaces  # ZeroGPU decorator
from huggingface_hub import hf_hub_download, login
import warnings
warnings.filterwarnings('ignore')

class SAM3DBodyEstimator:
    """Wrapper class for SAM 3D Body estimation with ZeroGPU support."""
    
    def __init__(self, hf_repo_id="facebook/sam-3d-body-dinov3"):
        self.hf_repo_id = hf_repo_id
        self.model = None
        self.faces = None
        self.initialized = False
        
    def setup(self, hf_token=None):
        """Setup the SAM 3D Body model (CPU operations only)."""
        try:
            if hf_token:
                login(token=hf_token)
                print("✓ Logged in to Hugging Face")
            
            # Try to import the SAM 3D Body utilities
            try:
                from notebook.utils import setup_sam_3d_body
                # Initialize model on CPU first, will move to GPU during inference
                self.model = setup_sam_3d_body(hf_repo_id=self.hf_repo_id)
                self.faces = self.model.faces
                self.initialized = True
                return "✓ Model loaded successfully! Ready for GPU inference."
            except ImportError:
                return "⚠️ SAM 3D Body package not found. Please install manually or provide installation path."
            except Exception as e:
                return f"❌ Error loading model: {str(e)}\n\nPlease ensure you have access to the Hugging Face repo and are authenticated."
                
        except Exception as e:
            return f"❌ Setup error: {str(e)}"
    
    @spaces.GPU(duration=120)  # ZeroGPU decorator with 120s timeout
    def process_image(self, image):
        """Process an image and return 3D body estimation (GPU accelerated)."""
        if not self.initialized:
            return None, "❌ Model not initialized. Please setup first with your HF token."
        
        try:
            # Ensure model is on GPU
            if hasattr(self.model, 'to'):
                self.model.to('cuda')
            
            # Convert PIL to BGR
            img_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
            
            # Process image (GPU operations happen here)
            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
            outputs = self.model.process_one_image(img_rgb)
            
            # Visualize results
            try:
                from tools.vis_utils import visualize_sample_together
                rend_img = visualize_sample_together(img_bgr, outputs, self.faces)
                result_img = Image.fromarray(cv2.cvtColor(rend_img.astype(np.uint8), cv2.COLOR_BGR2RGB))
                
                # GPU is automatically released after this function completes
                return result_img, "✓ Processing completed successfully!"
            except ImportError:
                # Fallback visualization if vis_utils not available
                return image, "⚠️ Visualization utilities not found. Model processed but cannot render 3D output."
                
        except Exception as e:
            return None, f"❌ Processing error: {str(e)}"
        finally:
            # Clean up GPU memory
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

# Initialize estimator
estimator = SAM3DBodyEstimator()

def setup_model(hf_token, model_choice):
    """Setup the SAM 3D Body model with HF token."""
    repo_ids = {
        "DINOv3 (Recommended)": "facebook/sam-3d-body-dinov3",
        "ViT-H": "facebook/sam-3d-body-vith"
    }
    estimator.hf_repo_id = repo_ids[model_choice]
    return estimator.setup(hf_token)

def process_uploaded_image(image):
    """Process uploaded image through SAM 3D Body (GPU allocated dynamically)."""
    if image is None:
        return None, "❌ Please upload an image first."
    return estimator.process_image(image)

# Create Gradio interface
with gr.Blocks(title="SAM 3D Body Estimator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🎯 SAM 3D Body Estimator (ZeroGPU)
    
    Generate 3D body meshes from single images using Meta's SAM 3D Body model.
    **Powered by Hugging Face Spaces ZeroGPU** - Dynamic GPU allocation for efficient inference!
    
    ### 📋 Setup Instructions:
    1. Get access to the model on [Hugging Face](https://huggingface.co/facebook/sam-3d-body-dinov3)
    2. Create a [Hugging Face token](https://huggingface.co/settings/tokens) with read access
    3. Enter your token below and click "Initialize Model"
    4. Upload an image and click "Process Image"
    
    ⚠️ **Note**: You need approved access to the SAM 3D Body repos on Hugging Face.
    
    ### ⚡ ZeroGPU Features:
    - **Dynamic GPU Allocation**: H200 GPU allocated only during inference
    - **Free GPU Access**: Available to all users with daily quotas
    - **PRO Benefits**: PRO users get 7x more quota (25 min/day vs 3.5 min/day)
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 🔧 Model Setup")
            hf_token_input = gr.Textbox(
                label="Hugging Face Token",
                placeholder="hf_...",
                type="password",
                info="Your HF token with read access"
            )
            model_choice = gr.Radio(
                choices=["DINOv3 (Recommended)", "ViT-H"],
                value="DINOv3 (Recommended)",
                label="Model Selection"
            )
            setup_btn = gr.Button("🚀 Initialize Model", variant="primary")
            setup_status = gr.Textbox(label="Setup Status", interactive=False)
            
            gr.Markdown("### 📸 Upload Image")
            input_image = gr.Image(
                label="Input Image",
                type="pil",
                sources=["upload", "webcam"]
            )
            process_btn = gr.Button("▶️ Process Image (GPU)", variant="primary")
            process_status = gr.Textbox(label="Processing Status", interactive=False)
        
        with gr.Column(scale=1):
            gr.Markdown("### 🎨 Results")
            output_image = gr.Image(label="3D Body Estimation", type="pil")
            
            gr.Markdown("""
            ### 💡 Tips:
            - Use clear, full-body images for best results
            - Ensure good lighting and minimal occlusion
            - Person should be facing the camera
            - High resolution images work better
            - Processing time: ~30-60 seconds per image
            
            ### 📊 GPU Usage:
            - **Duration**: Up to 120 seconds per inference
            - **VRAM**: 70GB H200 GPU available
            - **Queue**: Priority based on account tier
            """)
    
    gr.Markdown("""
    ---
    ### 📚 Additional Information
    
    **Model Details:**
    - Paper: [SAM 3D Body](https://arxiv.org/abs/your-paper-link)
    - GitHub: [facebook/sam-3d-body](https://github.com/facebookresearch/sam-3d-body)
    
    **ZeroGPU Daily Quotas:**
    - Unauthenticated: 2 minutes
    - Free account: 3.5 minutes
    - PRO account: 25 minutes (7x more!)
    - Enterprise: 45 minutes
    
    **System Requirements:**
    - Python 3.10.13+
    - PyTorch 2.1.0+
    - Gradio 4+
    - ZeroGPU Space (H200 GPU)
    
    **Troubleshooting:**
    - If model fails to load, ensure you have access to the HF repo
    - GPU allocation is dynamic - wait for your turn in queue
    - Check your daily quota if processing fails
    - Clear browser cache if interface doesn't load properly
    
    **About ZeroGPU:**
    This Space uses ZeroGPU, which dynamically allocates NVIDIA H200 GPUs only during inference.
    This maximizes efficiency and allows free GPU access for AI demos!
    """)
    
    # Event handlers
    setup_btn.click(
        fn=setup_model,
        inputs=[hf_token_input, model_choice],
        outputs=setup_status
    )
    
    process_btn.click(
        fn=process_uploaded_image,
        inputs=input_image,
        outputs=[output_image, process_status]
    )

# Launch the app
if __name__ == "__main__":
    print("\n" + "="*60)
    print("🚀 Starting SAM 3D Body Gradio App (ZeroGPU)")
    print("="*60 + "\n")
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )