JS6969 commited on
Commit
e69a823
·
verified ·
1 Parent(s): 0f30e70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -28
app.py CHANGED
@@ -10,7 +10,7 @@ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
10
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
11
  os.makedirs(os.environ["HF_HOME"], exist_ok=True)
12
 
13
- import io, csv, time, json, base64, re, zipfile
14
  from typing import List, Tuple, Dict, Any
15
 
16
  import gradio as gr
@@ -25,12 +25,13 @@ except Exception:
25
  def apply_liger_kernel_to_llama(*args, **kwargs):
26
  pass
27
 
28
- # Hugging Face Spaces GPU decorator (no-op locally)
29
  try:
30
  import spaces
31
- gpu = spaces.GPU()
32
  except Exception:
33
- def gpu(f): return f # fallback no-op
 
34
 
35
 
36
  # ------------------------------
@@ -79,7 +80,7 @@ _DTYPE = torch.float32
79
 
80
  def get_model():
81
  """
82
- Create/reuse the model. IMPORTANT: call ONLY inside @gpu functions.
83
  Avoids CUDA init in main process (Stateless GPU rule).
84
  """
85
  global _MODEL, _DEVICE, _DTYPE
@@ -93,7 +94,7 @@ def get_model():
93
  low_cpu_mem_usage=True,
94
  device_map=0,
95
  )
96
- # Try to enable Liger on the LLM submodule (best-effort, silent if missing)
97
  try:
98
  lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
99
  if lm is not None:
@@ -452,6 +453,70 @@ def _build_inputs(im: Image.Image, instr: str, dtype) -> Dict[str, Any]:
452
  inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
453
  return inputs
454
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  @torch.no_grad()
456
  def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
457
  model, device, dtype = get_model()
@@ -468,21 +533,7 @@ def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tok
468
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
469
  return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
470
 
471
- @gpu
472
- @torch.no_grad()
473
- def caption_single(img: Image.Image, instr: str) -> str:
474
- if img is None:
475
- return "No image provided."
476
- s = load_settings()
477
- im = resize_for_model(img, int(s.get("max_side", 896)))
478
- cap = caption_once(im, instr, s.get("temperature",0.6), s.get("top_p",0.9), s.get("max_tokens",256))
479
- cap = apply_shape_aliases(cap)
480
- cap = apply_prefix_suffix(cap, s.get("trigger",""), s.get("begin",""), s.get("end",""))
481
- return cap
482
-
483
- @gpu
484
- @torch.no_grad()
485
- def run_batch(
486
  files: List[Any],
487
  session_rows: List[dict],
488
  instr_text: str,
@@ -490,14 +541,9 @@ def run_batch(
490
  top_p: float,
491
  max_tokens: int,
492
  max_side: int,
493
- time_budget_s: float | None = None,
494
- progress: gr.Progress = gr.Progress(track_tqdm=True),
495
  ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
496
- """
497
- Returns:
498
- session_rows, gallery_pairs, table_rows, status_text,
499
- leftover_files, processed_in_this_call, total_in_this_call
500
- """
501
  session_rows = session_rows or []
502
  files = [f for f in (files or []) if f and os.path.exists(f)]
503
  total = len(files)
 
10
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
11
  os.makedirs(os.environ["HF_HOME"], exist_ok=True)
12
 
13
+ import csv, time, json, base64, re, zipfile
14
  from typing import List, Tuple, Dict, Any
15
 
16
  import gradio as gr
 
25
  def apply_liger_kernel_to_llama(*args, **kwargs):
26
  pass
27
 
28
+ # Try Spaces; we’ll use explicit @spaces.GPU() when available
29
  try:
30
  import spaces
31
+ HAS_SPACES = True
32
  except Exception:
33
+ spaces = None
34
+ HAS_SPACES = False
35
 
36
 
37
  # ------------------------------
 
80
 
81
  def get_model():
82
  """
83
+ Create/reuse the model. IMPORTANT: call ONLY inside GPU-decorated functions on Spaces.
84
  Avoids CUDA init in main process (Stateless GPU rule).
85
  """
86
  global _MODEL, _DEVICE, _DTYPE
 
94
  low_cpu_mem_usage=True,
95
  device_map=0,
96
  )
97
+ # Try to enable Liger on the LLM submodule (best-effort)
98
  try:
99
  lm = getattr(_MODEL, "language_model", None) or getattr(_MODEL, "model", None)
100
  if lm is not None:
 
453
  inputs["pixel_values"] = inputs["pixel_values"].to(dtype)
454
  return inputs
455
 
456
+ # ---- caption_single (explicit @spaces.GPU() on Spaces) ----
457
+ if HAS_SPACES:
458
+ @spaces.GPU()
459
+ @torch.no_grad()
460
+ def caption_single(img: Image.Image, instr: str) -> str:
461
+ if img is None:
462
+ return "No image provided."
463
+ s = load_settings()
464
+ im = resize_for_model(img, int(s.get("max_side", 896)))
465
+ cap = caption_once_core(im, instr, s)
466
+ return cap
467
+ else:
468
+ @torch.no_grad()
469
+ def caption_single(img: Image.Image, instr: str) -> str:
470
+ if img is None:
471
+ return "No image provided."
472
+ s = load_settings()
473
+ im = resize_for_model(img, int(s.get("max_side", 896)))
474
+ cap = caption_once_core(im, instr, s)
475
+ return cap
476
+
477
+ # ---- run_batch (explicit @spaces.GPU() on Spaces) ----
478
+ if HAS_SPACES:
479
+ @spaces.GPU()
480
+ @torch.no_grad()
481
+ def run_batch(
482
+ files: List[Any],
483
+ session_rows: List[dict],
484
+ instr_text: str,
485
+ temp: float,
486
+ top_p: float,
487
+ max_tokens: int,
488
+ max_side: int,
489
+ time_budget_s: float | None = None,
490
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
491
+ ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
492
+ return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
493
+ else:
494
+ @torch.no_grad()
495
+ def run_batch(
496
+ files: List[Any],
497
+ session_rows: List[dict],
498
+ instr_text: str,
499
+ temp: float,
500
+ top_p: float,
501
+ max_tokens: int,
502
+ max_side: int,
503
+ time_budget_s: float | None = None,
504
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
505
+ ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
506
+ return run_batch_core(files, session_rows, instr_text, temp, top_p, max_tokens, max_side, time_budget_s, progress)
507
+
508
+ # ---- shared core routines used by both CPU and GPU-decorated wrappers ----
509
+ def caption_once_core(im: Image.Image, instr: str, settings: dict) -> str:
510
+ cap = caption_once(
511
+ im, instr,
512
+ settings.get("temperature", 0.6),
513
+ settings.get("top_p", 0.9),
514
+ settings.get("max_tokens", 256),
515
+ )
516
+ cap = apply_shape_aliases(cap)
517
+ cap = apply_prefix_suffix(cap, settings.get("trigger",""), settings.get("begin",""), settings.get("end",""))
518
+ return cap
519
+
520
  @torch.no_grad()
521
  def caption_once(im: Image.Image, instr: str, temp: float, top_p: float, max_tokens: int) -> str:
522
  model, device, dtype = get_model()
 
533
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
534
  return processor.tokenizer.decode(gen_ids, skip_special_tokens=True)
535
 
536
+ def run_batch_core(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  files: List[Any],
538
  session_rows: List[dict],
539
  instr_text: str,
 
541
  top_p: float,
542
  max_tokens: int,
543
  max_side: int,
544
+ time_budget_s: float | None,
545
+ progress: gr.Progress,
546
  ) -> Tuple[List[dict], list, list, str, List[str], int, int]:
 
 
 
 
 
547
  session_rows = session_rows or []
548
  files = [f for f in (files or []) if f and os.path.exists(f)]
549
  total = len(files)