Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
|
|
| 10 |
os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
|
| 11 |
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
|
| 12 |
|
| 13 |
-
import
|
| 14 |
from typing import List, Tuple, Dict, Any
|
| 15 |
|
| 16 |
import gradio as gr
|
|
@@ -25,12 +25,13 @@ except Exception:
|
|
| 25 |
def apply_liger_kernel_to_llama(*args, **kwargs):
|
| 26 |
pass
|
| 27 |
|
| 28 |
-
#
|
| 29 |
try:
|
| 30 |
import spaces
|
| 31 |
-
|
| 32 |
except Exception:
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
# ------------------------------
|
|
@@ -79,7 +80,7 @@ _DTYPE = torch.float32
|
|
| 79 |
|
| 80 |
def get_model():
|
| 81 |
"""
|
| 82 |
-
Create/reuse the model. IMPORTANT: call ONLY inside
|
| 83 |
Avoids CUDA init in main process (Stateless GPU rule).
|
| 84 |
"""
|
| 85 |
global _MODEL, _DEVICE, _DTYPE
|
|
@@ -93,7 +94,7 @@ def get_model():
|
|
| 93 |
low_cpu_mem_usage=True,
|
| 94 |
device_map=0,
|
| 95 |
)
|
| 96 |
-
# Try to enable Liger on the LLM submodule (best-effort
|
| 97 |
try:
|
| 98 |
lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
|
| 99 |
if lm is not None:
|
|
@@ -452,6 +453,70 @@ def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
|
|
| 452 |
inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
|
| 453 |
return inputs
|
| 454 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
@torch.no_grad()
|
| 456 |
def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
|
| 457 |
model, device, dtype = get_model()
|
|
@@ -468,21 +533,7 @@ def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tok
|
|
| 468 |
gen_ids = out[0, inputs["input_ids"].shape[1]:]
|
| 469 |
return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
|
| 470 |
|
| 471 |
-
|
| 472 |
-
@torch.no_grad()
|
| 473 |
-
def caption_single(img: Image.Image, instr: str) -> str:
|
| 474 |
-
if img is None:
|
| 475 |
-
return "No image provided."
|
| 476 |
-
s = load_settings()
|
| 477 |
-
im = resize_for_model(img, int(s.get("max_side", 896)))
|
| 478 |
-
cap = caption_once(im, instr, s.get("temperature",0.6), s.get("top_p",0.9), s.get("max_tokens",256))
|
| 479 |
-
cap = apply_shape_aliases(cap)
|
| 480 |
-
cap = apply_prefix_suffix(cap, s.get("trigger",""), s.get("begin",""), s.get("end",""))
|
| 481 |
-
return cap
|
| 482 |
-
|
| 483 |
-
@gpu
|
| 484 |
-
@torch.no_grad()
|
| 485 |
-
def run_batch(
|
| 486 |
files: List[Any],
|
| 487 |
session_rows: List[dict],
|
| 488 |
instr_text: str,
|
|
@@ -490,14 +541,9 @@ def run_batch(
|
|
| 490 |
top_p: float,
|
| 491 |
max_tokens: int,
|
| 492 |
max_side: int,
|
| 493 |
-
time_budget_s: float | None
|
| 494 |
-
progress: gr.Progress
|
| 495 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 496 |
-
"""
|
| 497 |
-
Returns:
|
| 498 |
-
session_rows, gallery_pairs, table_rows, status_text,
|
| 499 |
-
leftover_files, processed_in_this_call, total_in_this_call
|
| 500 |
-
"""
|
| 501 |
session_rows = session_rows or []
|
| 502 |
files = [f for f in (files or []) if f and os.path.exists(f)]
|
| 503 |
total = len(files)
|
|
|
|
| 10 |
os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
|
| 11 |
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
|
| 12 |
|
| 13 |
+
import csv, time, json, base64, re, zipfile
|
| 14 |
from typing import List, Tuple, Dict, Any
|
| 15 |
|
| 16 |
import gradio as gr
|
|
|
|
| 25 |
def apply_liger_kernel_to_llama(*args, **kwargs):
|
| 26 |
pass
|
| 27 |
|
| 28 |
+
# Try Spaces; we’ll use explicit @spaces.GPU() when available
|
| 29 |
try:
|
| 30 |
import spaces
|
| 31 |
+
HAS_SPACES = True
|
| 32 |
except Exception:
|
| 33 |
+
spaces = None
|
| 34 |
+
HAS_SPACES = False
|
| 35 |
|
| 36 |
|
| 37 |
# ------------------------------
|
|
|
|
| 80 |
|
| 81 |
def get_model():
|
| 82 |
"""
|
| 83 |
+
Create/reuse the model. IMPORTANT: call ONLY inside GPU-decorated functions on Spaces.
|
| 84 |
Avoids CUDA init in main process (Stateless GPU rule).
|
| 85 |
"""
|
| 86 |
global _MODEL, _DEVICE, _DTYPE
|
|
|
|
| 94 |
low_cpu_mem_usage=True,
|
| 95 |
device_map=0,
|
| 96 |
)
|
| 97 |
+
# Try to enable Liger on the LLM submodule (best-effort)
|
| 98 |
try:
|
| 99 |
lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
|
| 100 |
if lm is not None:
|
|
|
|
| 453 |
inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
|
| 454 |
return inputs
|
| 455 |
|
| 456 |
+
# ---- caption_single (explicit @spaces.GPU() on Spaces) ----
|
| 457 |
+
if HAS_SPACES:
|
| 458 |
+
@spaces.GPU()
|
| 459 |
+
@torch.no_grad()
|
| 460 |
+
def caption_single(img: Image.Image, instr: str) -> str:
|
| 461 |
+
if img is None:
|
| 462 |
+
return "No image provided."
|
| 463 |
+
s = load_settings()
|
| 464 |
+
im = resize_for_model(img, int(s.get("max_side", 896)))
|
| 465 |
+
cap = caption_once_core(im, instr, s)
|
| 466 |
+
return cap
|
| 467 |
+
else:
|
| 468 |
+
@torch.no_grad()
|
| 469 |
+
def caption_single(img: Image.Image, instr: str) -> str:
|
| 470 |
+
if img is None:
|
| 471 |
+
return "No image provided."
|
| 472 |
+
s = load_settings()
|
| 473 |
+
im = resize_for_model(img, int(s.get("max_side", 896)))
|
| 474 |
+
cap = caption_once_core(im, instr, s)
|
| 475 |
+
return cap
|
| 476 |
+
|
| 477 |
+
# ---- run_batch (explicit @spaces.GPU() on Spaces) ----
|
| 478 |
+
if HAS_SPACES:
|
| 479 |
+
@spaces.GPU()
|
| 480 |
+
@torch.no_grad()
|
| 481 |
+
def run_batch(
|
| 482 |
+
files: List[Any],
|
| 483 |
+
session_rows: List[dict],
|
| 484 |
+
instr_text: str,
|
| 485 |
+
temp: float,
|
| 486 |
+
top_p: float,
|
| 487 |
+
max_tokens: int,
|
| 488 |
+
max_side: int,
|
| 489 |
+
time_budget_s: float | None = None,
|
| 490 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True),
|
| 491 |
+
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 492 |
+
return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
|
| 493 |
+
else:
|
| 494 |
+
@torch.no_grad()
|
| 495 |
+
def run_batch(
|
| 496 |
+
files: List[Any],
|
| 497 |
+
session_rows: List[dict],
|
| 498 |
+
instr_text: str,
|
| 499 |
+
temp: float,
|
| 500 |
+
top_p: float,
|
| 501 |
+
max_tokens: int,
|
| 502 |
+
max_side: int,
|
| 503 |
+
time_budget_s: float | None = None,
|
| 504 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True),
|
| 505 |
+
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 506 |
+
return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
|
| 507 |
+
|
| 508 |
+
# ---- shared core routines used by both CPU and GPU-decorated wrappers ----
|
| 509 |
+
def caption_once_core(im: Image.Image, instr: str, settings: dict) -> str:
|
| 510 |
+
cap = caption_once(
|
| 511 |
+
im, instr,
|
| 512 |
+
settings.get("temperature", 0.6),
|
| 513 |
+
settings.get("top_p", 0.9),
|
| 514 |
+
settings.get("max_tokens", 256),
|
| 515 |
+
)
|
| 516 |
+
cap = apply_shape_aliases(cap)
|
| 517 |
+
cap = apply_prefix_suffix(cap, settings.get("trigger",""), settings.get("begin",""), settings.get("end",""))
|
| 518 |
+
return cap
|
| 519 |
+
|
| 520 |
@torch.no_grad()
|
| 521 |
def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
|
| 522 |
model, device, dtype = get_model()
|
|
|
|
| 533 |
gen_ids = out[0, inputs["input_ids"].shape[1]:]
|
| 534 |
return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
|
| 535 |
|
| 536 |
+
def run_batch_core(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
files: List[Any],
|
| 538 |
session_rows: List[dict],
|
| 539 |
instr_text: str,
|
|
|
|
| 541 |
top_p: float,
|
| 542 |
max_tokens: int,
|
| 543 |
max_side: int,
|
| 544 |
+
time_budget_s: float | None,
|
| 545 |
+
progress: gr.Progress,
|
| 546 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
session_rows = session_rows or []
|
| 548 |
files = [f for f in (files or []) if f and os.path.exists(f)]
|
| 549 |
total = len(files)
|