Spaces:

JS6969
/

ForgeCaptions

Sleeping

App Files Files Community

JS6969 commited on Sep 6

Commit

a85a2ba

verified ·

1 Parent(s): e69a823

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -71

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # =====================================================================
-# ForgeCaptions - Gradio app for single & batch image captioning
 # =====================================================================
 # ------------------------------
@@ -17,6 +17,7 @@ import gradio as gr
 from PIL import Image
 import torch
 from transformers import LlavaForConditionalGeneration, AutoProcessor
 # Optional: Liger kernel (ignored if missing)
 try:
@@ -25,14 +26,6 @@ except Exception:
     def apply_liger_kernel_to_llama(*args, **kwargs):
         pass
-# Try Spaces; we’ll use explicit @spaces.GPU() when available
-try:
-    import spaces
-    HAS_SPACES = True
-except Exception:
-    spaces = None
-    HAS_SPACES = False
 # ------------------------------
 # 1) Paths & small constants
@@ -80,8 +73,8 @@ _DTYPE = torch.float32
 def get_model():
     """
-    Create/reuse the model. IMPORTANT: call ONLY inside GPU-decorated functions on Spaces.
-    Avoids CUDA init in main process (Stateless GPU rule).
     """
     global _MODEL, _DEVICE, _DTYPE
     if _MODEL is None:
@@ -94,7 +87,7 @@ def get_model():
                 low_cpu_mem_usage=True,
                 device_map=0,
             )
-            # Try to enable Liger on the LLM submodule (best-effort)
             try:
                 lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
                 if lm is not None:
@@ -230,7 +223,6 @@ def load_settings() -> dict:
         except Exception:
             cfg = {}
-    # Defaults
     defaults = {
         "dataset_name": "forgecaptions",
         "temperature": 0.6,
@@ -254,7 +246,6 @@ def load_settings() -> dict:
     for k, v in defaults.items():
         cfg.setdefault(k, v)
-    # Normalize
     styles = cfg.get("styles") or []
     if not isinstance(styles, list):
         styles = [styles]
@@ -440,7 +431,7 @@ def final_instruction(style_list: List[str], extra_opts: List[str], name_value:
 # ------------------------------
-# 8) GPU caption functions
 # ------------------------------
 def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
     convo = [
@@ -453,59 +444,37 @@ def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
         inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
     return inputs
-# ---- caption_single (explicit @spaces.GPU() on Spaces) ----
-if HAS_SPACES:
-    @spaces.GPU()
-    @torch.no_grad()
-    def caption_single(img: Image.Image, instr: str) -> str:
-        if img is None:
-            return "No image provided."
-        s = load_settings()
-        im = resize_for_model(img, int(s.get("max_side", 896)))
-        cap = caption_once_core(im, instr, s)
-        return cap
-else:
-    @torch.no_grad()
-    def caption_single(img: Image.Image, instr: str) -> str:
-        if img is None:
-            return "No image provided."
-        s = load_settings()
-        im = resize_for_model(img, int(s.get("max_side", 896)))
-        cap = caption_once_core(im, instr, s)
-        return cap
-# ---- run_batch (explicit @spaces.GPU() on Spaces) ----
-if HAS_SPACES:
-    @spaces.GPU()
-    @torch.no_grad()
-    def run_batch(
-        files: List[Any],
-        session_rows: List[dict],
-        instr_text: str,
-        temp: float,
-        top_p: float,
-        max_tokens: int,
-        max_side: int,
-        time_budget_s: float | None = None,
-        progress: gr.Progress = gr.Progress(track_tqdm=True),
-    ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
-        return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
-else:
-    @torch.no_grad()
-    def run_batch(
-        files: List[Any],
-        session_rows: List[dict],
-        instr_text: str,
-        temp: float,
-        top_p: float,
-        max_tokens: int,
-        max_side: int,
-        time_budget_s: float | None = None,
-        progress: gr.Progress = gr.Progress(track_tqdm=True),
-    ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
-        return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
-# ---- shared core routines used by both CPU and GPU-decorated wrappers ----
 def caption_once_core(im: Image.Image, instr: str, settings: dict) -> str:
     cap = caption_once(
         im, instr,
@@ -862,9 +831,8 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
         gr.Markdown(
             "### 🔷 Shape Aliases\n"
             "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
-            "**How to use:**\n"
-            "- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
-            "- Right column = replacement name, e.g. `family-emblem`\n"
             "Matches are case-insensitive, catches simple plurals, and also matches `*-shaped` / `* shaped` variants."
         )

 # =====================================================================
+# ForgeCaptions - Gradio app for single & batch image captioning (Spaces-only)
 # =====================================================================
 # ------------------------------
 from PIL import Image
 import torch
 from transformers import LlavaForConditionalGeneration, AutoProcessor
+import spaces  # Spaces-only
 # Optional: Liger kernel (ignored if missing)
 try:
     def apply_liger_kernel_to_llama(*args, **kwargs):
         pass
 # ------------------------------
 # 1) Paths & small constants
 def get_model():
     """
+    Create/reuse the model.
+    IMPORTANT: call ONLY inside @spaces.GPU() functions on Spaces (ZeroGPU stateless rule).
     """
     global _MODEL, _DEVICE, _DTYPE
     if _MODEL is None:
                 low_cpu_mem_usage=True,
                 device_map=0,
             )
+            # Best-effort Liger on the LLM submodule
             try:
                 lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
                 if lm is not None:
         except Exception:
             cfg = {}
     defaults = {
         "dataset_name": "forgecaptions",
         "temperature": 0.6,
     for k, v in defaults.items():
         cfg.setdefault(k, v)
     styles = cfg.get("styles") or []
     if not isinstance(styles, list):
         styles = [styles]
 # ------------------------------
+# 8) GPU caption functions (Spaces-only)
 # ------------------------------
 def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
     convo = [
         inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
     return inputs
+@spaces.GPU()
+@torch.no_grad()
+def caption_single(img: Image.Image, instr: str) -> str:
+    if img is None:
+        return "No image provided."
+    s = load_settings()
+    im = resize_for_model(img, int(s.get("max_side", 896)))
+    cap = caption_once_core(im, instr, s)
+    return cap
+@spaces.GPU()
+@torch.no_grad()
+def run_batch(
+    files: List[Any],
+    session_rows: List[dict],
+    instr_text: str,
+    temp: float,
+    top_p: float,
+    max_tokens: int,
+    max_side: int,
+    time_budget_s: float | None = None,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+) -> Tuple[List[dict], list, list, str, List[str], int, int]:
+    return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
+# Optional tiny probe to satisfy strict scanners (not called)
+@spaces.GPU()
+def _gpu_probe() -> str:
+    return "ok"
+# ---- shared core routines used by both GPU functions ----
 def caption_once_core(im: Image.Image, instr: str, settings: dict) -> str:
     cap = caption_once(
         im, instr,
         gr.Markdown(
             "### 🔷 Shape Aliases\n"
             "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
+            "- Left column = a single token **or** comma/pipe-separated synonyms (e.g., `diamond, rhombus | lozenge`)\n"
+            "- Right column = replacement name (e.g., `family-emblem`)\n"
             "Matches are case-insensitive, catches simple plurals, and also matches `*-shaped` / `* shaped` variants."
         )