Spaces:

JS6969
/

ForgeCaptions

Sleeping

App Files Files Community

JS6969 commited on Sep 6

Commit

e69a823

verified ·

1 Parent(s): 0f30e70

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -28

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
 os.makedirs(os.environ["HF_HOME"], exist_ok=True)
-import io, csv, time, json, base64, re, zipfile
 from typing import List, Tuple, Dict, Any
 import gradio as gr
@@ -25,12 +25,13 @@ except Exception:
     def apply_liger_kernel_to_llama(*args, **kwargs):
         pass
-# Hugging Face Spaces GPU decorator (no-op locally)
 try:
     import spaces
-    gpu = spaces.GPU()
 except Exception:
-    def gpu(f): return f  # fallback no-op
 # ------------------------------
@@ -79,7 +80,7 @@ _DTYPE = torch.float32
 def get_model():
     """
-    Create/reuse the model. IMPORTANT: call ONLY inside @gpu functions.
     Avoids CUDA init in main process (Stateless GPU rule).
     """
     global _MODEL, _DEVICE, _DTYPE
@@ -93,7 +94,7 @@ def get_model():
                 low_cpu_mem_usage=True,
                 device_map=0,
             )
-            # Try to enable Liger on the LLM submodule (best-effort, silent if missing)
             try:
                 lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
                 if lm is not None:
@@ -452,6 +453,70 @@ def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
         inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
     return inputs
 @torch.no_grad()
 def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
     model, device, dtype = get_model()
@@ -468,21 +533,7 @@ def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tok
     gen_ids = out[0, inputs["input_ids"].shape[1]:]
     return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
-@gpu
-@torch.no_grad()
-def caption_single(img: Image.Image, instr: str) -> str:
-    if img is None:
-        return "No image provided."
-    s = load_settings()
-    im = resize_for_model(img, int(s.get("max_side", 896)))
-    cap = caption_once(im, instr, s.get("temperature",0.6), s.get("top_p",0.9), s.get("max_tokens",256))
-    cap = apply_shape_aliases(cap)
-    cap = apply_prefix_suffix(cap, s.get("trigger",""), s.get("begin",""), s.get("end",""))
-    return cap
-@gpu
-@torch.no_grad()
-def run_batch(
     files: List[Any],
     session_rows: List[dict],
     instr_text: str,
@@ -490,14 +541,9 @@ def run_batch(
     top_p: float,
     max_tokens: int,
     max_side: int,
-    time_budget_s: float | None = None,
-    progress: gr.Progress = gr.Progress(track_tqdm=True),
 ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
-    """
-    Returns:
-      session_rows, gallery_pairs, table_rows, status_text,
-      leftover_files, processed_in_this_call, total_in_this_call
-    """
     session_rows = session_rows or []
     files = [f for f in (files or []) if f and os.path.exists(f)]
     total = len(files)

 os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
 os.makedirs(os.environ["HF_HOME"], exist_ok=True)
+import csv, time, json, base64, re, zipfile
 from typing import List, Tuple, Dict, Any
 import gradio as gr
     def apply_liger_kernel_to_llama(*args, **kwargs):
         pass
+# Try Spaces; we’ll use explicit @spaces.GPU() when available
 try:
     import spaces
+    HAS_SPACES = True
 except Exception:
+    spaces = None
+    HAS_SPACES = False
 # ------------------------------
 def get_model():
     """
+    Create/reuse the model. IMPORTANT: call ONLY inside GPU-decorated functions on Spaces.
     Avoids CUDA init in main process (Stateless GPU rule).
     """
     global _MODEL, _DEVICE, _DTYPE
                 low_cpu_mem_usage=True,
                 device_map=0,
             )
+            # Try to enable Liger on the LLM submodule (best-effort)
             try:
                 lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
                 if lm is not None:
         inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
     return inputs
+# ---- caption_single (explicit @spaces.GPU() on Spaces) ----
+if HAS_SPACES:
+    @spaces.GPU()
+    @torch.no_grad()
+    def caption_single(img: Image.Image, instr: str) -> str:
+        if img is None:
+            return "No image provided."
+        s = load_settings()
+        im = resize_for_model(img, int(s.get("max_side", 896)))
+        cap = caption_once_core(im, instr, s)
+        return cap
+else:
+    @torch.no_grad()
+    def caption_single(img: Image.Image, instr: str) -> str:
+        if img is None:
+            return "No image provided."
+        s = load_settings()
+        im = resize_for_model(img, int(s.get("max_side", 896)))
+        cap = caption_once_core(im, instr, s)
+        return cap
+# ---- run_batch (explicit @spaces.GPU() on Spaces) ----
+if HAS_SPACES:
+    @spaces.GPU()
+    @torch.no_grad()
+    def run_batch(
+        files: List[Any],
+        session_rows: List[dict],
+        instr_text: str,
+        temp: float,
+        top_p: float,
+        max_tokens: int,
+        max_side: int,
+        time_budget_s: float | None = None,
+        progress: gr.Progress = gr.Progress(track_tqdm=True),
+    ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
+        return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
+else:
+    @torch.no_grad()
+    def run_batch(
+        files: List[Any],
+        session_rows: List[dict],
+        instr_text: str,
+        temp: float,
+        top_p: float,
+        max_tokens: int,
+        max_side: int,
+        time_budget_s: float | None = None,
+        progress: gr.Progress = gr.Progress(track_tqdm=True),
+    ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
+        return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
+# ---- shared core routines used by both CPU and GPU-decorated wrappers ----
+def caption_once_core(im: Image.Image, instr: str, settings: dict) -> str:
+    cap = caption_once(
+        im, instr,
+        settings.get("temperature", 0.6),
+        settings.get("top_p", 0.9),
+        settings.get("max_tokens", 256),
+    )
+    cap = apply_shape_aliases(cap)
+    cap = apply_prefix_suffix(cap, settings.get("trigger",""), settings.get("begin",""), settings.get("end",""))
+    return cap
 @torch.no_grad()
 def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
     model, device, dtype = get_model()
     gen_ids = out[0, inputs["input_ids"].shape[1]:]
     return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
+def run_batch_core(
     files: List[Any],
     session_rows: List[dict],
     instr_text: str,
     top_p: float,
     max_tokens: int,
     max_side: int,
+    time_budget_s: float | None,
+    progress: gr.Progress,
 ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
     session_rows = session_rows or []
     files = [f for f in (files or []) if f and os.path.exists(f)]
     total = len(files)