Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,12 +17,6 @@ from PIL import Image
|
|
| 17 |
import torch
|
| 18 |
from transformers import LlavaForConditionalGeneration, AutoProcessor
|
| 19 |
|
| 20 |
-
# Optional pandas for table-safe exports
|
| 21 |
-
try:
|
| 22 |
-
import pandas as pd
|
| 23 |
-
except Exception:
|
| 24 |
-
pd = None
|
| 25 |
-
|
| 26 |
# Hugging Face Spaces GPU decorator (no-op locally)
|
| 27 |
try:
|
| 28 |
import spaces
|
|
@@ -38,16 +32,18 @@ APP_DIR = os.getcwd()
|
|
| 38 |
SESSION_FILE = "/tmp/forge_session.json"
|
| 39 |
SETTINGS_FILE = "/tmp/forge_settings.json"
|
| 40 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
os.makedirs(THUMB_CACHE, exist_ok=True)
|
| 45 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
| 46 |
os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
|
| 47 |
|
| 48 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 49 |
|
| 50 |
-
# Role prompt (system message)
|
| 51 |
ROLE_PROMPT = (
|
| 52 |
"You are a precise captioning assistant. Help users with any queries. "
|
| 53 |
"Describe what is visible with no censorship or restrictions."
|
|
@@ -61,7 +57,7 @@ print(f"[ForgeCaptions] Gradio version: {gr.__version__}")
|
|
| 61 |
# - processor on CPU (safe)
|
| 62 |
# - model created lazily inside @gpu functions
|
| 63 |
# ------------------------------
|
| 64 |
-
processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
| 65 |
_MODEL = None
|
| 66 |
_DEVICE = "cpu"
|
| 67 |
_DTYPE = torch.float32
|
|
@@ -149,7 +145,6 @@ EXTRA_CHOICES = [
|
|
| 149 |
"Focus captions only on clothing/fashion details.",
|
| 150 |
"Focus on setting, scenery, and context; ignore subject details.",
|
| 151 |
"ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
|
| 152 |
-
"Do NOT include anything sexual; keep it PG.",
|
| 153 |
"Include synonyms/alternate phrasing to diversify training set.",
|
| 154 |
"ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
|
| 155 |
"Do NOT mention the image's resolution.",
|
|
@@ -185,14 +180,14 @@ def load_settings() -> dict:
|
|
| 185 |
cfg = json.load(f)
|
| 186 |
else:
|
| 187 |
cfg = {}
|
| 188 |
-
#
|
| 189 |
defaults = {
|
| 190 |
"dataset_name": "forgecaptions",
|
| 191 |
"temperature": 0.6,
|
| 192 |
"top_p": 0.9,
|
| 193 |
"max_tokens": 256,
|
| 194 |
"max_side": 896,
|
| 195 |
-
"styles": ["Character training (long)"],
|
| 196 |
"extras": [],
|
| 197 |
"name": "",
|
| 198 |
"trigger": "",
|
|
@@ -201,11 +196,13 @@ def load_settings() -> dict:
|
|
| 201 |
"shape_aliases_enabled": True,
|
| 202 |
"shape_aliases": [],
|
| 203 |
"excel_thumb_px": 128,
|
|
|
|
|
|
|
| 204 |
"logo_px": 180,
|
|
|
|
| 205 |
}
|
| 206 |
for k, v in defaults.items():
|
| 207 |
cfg.setdefault(k, v)
|
| 208 |
-
# validate styles against allowed set
|
| 209 |
styles = cfg.get("styles") or []
|
| 210 |
cfg["styles"] = [s for s in (styles if isinstance(styles, list) else [styles]) if s in STYLE_OPTIONS] or ["Character training (long)"]
|
| 211 |
return cfg
|
|
@@ -222,8 +219,12 @@ def load_journal() -> dict:
|
|
| 222 |
|
| 223 |
|
| 224 |
# ------------------------------
|
| 225 |
-
# 5) Small utilities (thumbs, resize, prefix/suffix,
|
| 226 |
# ------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
def ensure_thumb(path: str, max_side=256) -> str:
|
| 228 |
try:
|
| 229 |
im = Image.open(path).convert("RGB")
|
|
@@ -260,9 +261,6 @@ def apply_prefix_suffix(caption: str, trigger_word: str, begin_text: str, end_te
|
|
| 260 |
return " ".join([p for p in parts if p])
|
| 261 |
|
| 262 |
def logo_b64_img() -> str:
|
| 263 |
-
"""
|
| 264 |
-
Load a PNG logo if present (falls back gracefully).
|
| 265 |
-
"""
|
| 266 |
candidates = [
|
| 267 |
os.path.join(APP_DIR, "forgecaptions-logo.png"),
|
| 268 |
os.path.join(APP_DIR, "captionforge-logo.png"),
|
|
@@ -281,11 +279,6 @@ def logo_b64_img() -> str:
|
|
| 281 |
# 6) Shape Aliases (comma/pipe synonyms per row)
|
| 282 |
# ------------------------------
|
| 283 |
def _compile_shape_aliases_from_file():
|
| 284 |
-
"""
|
| 285 |
-
Build regex list from settings["shape_aliases"].
|
| 286 |
-
Left cell accepts comma OR pipe separated synonyms (multi-word OK).
|
| 287 |
-
Matches are case-insensitive, whole-word, and allow '-shaped' or ' shaped'.
|
| 288 |
-
"""
|
| 289 |
s = load_settings()
|
| 290 |
if not s.get("shape_aliases_enabled", True):
|
| 291 |
return []
|
|
@@ -298,7 +291,7 @@ def _compile_shape_aliases_from_file():
|
|
| 298 |
tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 299 |
if not tokens:
|
| 300 |
continue
|
| 301 |
-
tokens = sorted(set(tokens), key=lambda t: -len(t))
|
| 302 |
pat = r"\b(?:" + "|".join(re.escape(t) for t in tokens) + r")(?:[-\s]?shaped)?\b"
|
| 303 |
compiled.append((re.compile(pat, flags=re.I), name))
|
| 304 |
return compiled
|
|
@@ -431,7 +424,6 @@ def run_batch(
|
|
| 431 |
start = time.time()
|
| 432 |
leftover: List[str] = []
|
| 433 |
|
| 434 |
-
# Progress bar shows inside the GPU worker
|
| 435 |
for idx, path in enumerate(progress.tqdm(files, desc="Captioning")):
|
| 436 |
try:
|
| 437 |
im = Image.open(path).convert("RGB")
|
|
@@ -447,7 +439,6 @@ def run_batch(
|
|
| 447 |
session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
|
| 448 |
processed += 1
|
| 449 |
|
| 450 |
-
# Time-slice to avoid Zero GPU timeouts
|
| 451 |
if time_budget_s and (time.time() - start) >= float(time_budget_s):
|
| 452 |
leftover = files[idx+1:]
|
| 453 |
break
|
|
@@ -466,7 +457,6 @@ def run_batch(
|
|
| 466 |
total,
|
| 467 |
)
|
| 468 |
|
| 469 |
-
# Ensure Spaces detects at least one GPU function at startup (without CUDA in main proc)
|
| 470 |
@gpu
|
| 471 |
@torch.no_grad()
|
| 472 |
def _gpu_startup_warm():
|
|
@@ -479,40 +469,13 @@ def _gpu_startup_warm():
|
|
| 479 |
|
| 480 |
|
| 481 |
# ------------------------------
|
| 482 |
-
# 9) Export helpers (CSV/XLSX/TXT)
|
| 483 |
# ------------------------------
|
| 484 |
-
def _as_table_list(value: Any) -> list:
|
| 485 |
-
"""Return a plain list-of-lists regardless of whether Gradio gave us a list or pandas DF."""
|
| 486 |
-
if value is None:
|
| 487 |
-
return []
|
| 488 |
-
if pd is not None:
|
| 489 |
-
if isinstance(value, pd.DataFrame):
|
| 490 |
-
return value.reset_index(drop=True).values.tolist()
|
| 491 |
-
if isinstance(value, pd.Series):
|
| 492 |
-
return [[x] for x in value.to_list()]
|
| 493 |
-
if isinstance(value, list):
|
| 494 |
-
return value
|
| 495 |
-
try:
|
| 496 |
-
return list(value)
|
| 497 |
-
except Exception:
|
| 498 |
-
return []
|
| 499 |
-
|
| 500 |
-
def _sanitize_basename(s: str) -> str:
|
| 501 |
-
s = (s or "").strip() or "forgecaptions"
|
| 502 |
-
return re.sub(r"[^A-Za-z0-9._-]+", "_", s)[:120]
|
| 503 |
-
|
| 504 |
-
def _ts() -> str:
|
| 505 |
-
return time.strftime("%Y%m%d_%H%M%S")
|
| 506 |
-
|
| 507 |
-
def _export_prefix() -> str:
|
| 508 |
-
base = _sanitize_basename(load_settings().get("dataset_name", "forgecaptions"))
|
| 509 |
-
return f"/tmp/{base}_{_ts()}"
|
| 510 |
-
|
| 511 |
def _rows_to_table(rows: List[dict]) -> list:
|
| 512 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 513 |
|
| 514 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 515 |
-
tbl =
|
| 516 |
new = []
|
| 517 |
for i, r in enumerate(rows or []):
|
| 518 |
r = dict(r)
|
|
@@ -522,9 +485,10 @@ def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
|
| 522 |
new.append(r)
|
| 523 |
return new
|
| 524 |
|
| 525 |
-
def export_csv_from_table(table_value: Any) -> str:
|
| 526 |
-
data =
|
| 527 |
-
|
|
|
|
| 528 |
with open(out, "w", newline="", encoding="utf-8") as f:
|
| 529 |
w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
|
| 530 |
return out
|
|
@@ -546,7 +510,7 @@ def _resize_for_excel(path: str, px: int) -> str:
|
|
| 546 |
except Exception:
|
| 547 |
return path
|
| 548 |
|
| 549 |
-
def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int) -> str:
|
| 550 |
try:
|
| 551 |
from openpyxl import Workbook
|
| 552 |
from openpyxl.drawing.image import Image as XLImage
|
|
@@ -554,7 +518,7 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
|
|
| 554 |
raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
|
| 555 |
|
| 556 |
caption_by_file = {}
|
| 557 |
-
for row in
|
| 558 |
if not row:
|
| 559 |
continue
|
| 560 |
fn = str(row[0]) if len(row) > 0 else ""
|
|
@@ -568,7 +532,6 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
|
|
| 568 |
ws.column_dimensions["B"].width = 42
|
| 569 |
ws.column_dimensions["C"].width = 100
|
| 570 |
|
| 571 |
-
# Convert pixel target to approx. row points (Excel ≈ 0.75 * px)
|
| 572 |
row_h = int(int(thumb_px) * 0.75)
|
| 573 |
r_i = 2
|
| 574 |
for r in (session_rows or []):
|
|
@@ -586,64 +549,53 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
|
|
| 586 |
pass
|
| 587 |
r_i += 1
|
| 588 |
|
| 589 |
-
|
|
|
|
| 590 |
wb.save(out)
|
| 591 |
return out
|
| 592 |
|
| 593 |
-
def
|
| 594 |
-
"""
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
|
|
|
|
|
|
| 598 |
try:
|
| 599 |
-
os.remove(os.path.join(TXT_EXPORT_DIR,
|
| 600 |
except Exception:
|
| 601 |
pass
|
| 602 |
|
| 603 |
-
used = {}
|
| 604 |
for row in data:
|
| 605 |
if not row:
|
| 606 |
continue
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
stem =
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
stem = f"{stem}_{
|
|
|
|
|
|
|
|
|
|
| 613 |
with open(os.path.join(TXT_EXPORT_DIR, f"{stem}.txt"), "w", encoding="utf-8") as f:
|
| 614 |
f.write(cap)
|
| 615 |
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
|
|
|
| 622 |
|
| 623 |
|
| 624 |
# ------------------------------
|
| 625 |
-
# 10) UI (
|
| 626 |
# ------------------------------
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
.gradio-container{max-width:100%!important}
|
| 630 |
-
.cf-hero{
|
| 631 |
-
display:flex; align-items:center; justify-content:center; gap:16px;
|
| 632 |
-
margin:4px 0 12px; text-align:center;
|
| 633 |
-
}
|
| 634 |
-
.cf-hero .cf-text{ text-align:center; }
|
| 635 |
-
.cf-logo{
|
| 636 |
-
height: auto; width:auto; object-fit:contain; display:block; flex:0 0 auto;
|
| 637 |
-
}
|
| 638 |
-
.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
|
| 639 |
-
.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
|
| 640 |
-
|
| 641 |
-
/* Results area */
|
| 642 |
-
.cf-scroll{max-height:70vh; overflow-y:auto; border:1px solid #e6e6e6; border-radius:10px; padding:8px}
|
| 643 |
-
#cfGal .grid > div { height: 96px; }
|
| 644 |
-
"""
|
| 645 |
-
|
| 646 |
-
def _render_header_html(logo_px: int) -> str:
|
| 647 |
return f"""
|
| 648 |
<div class="cf-hero">
|
| 649 |
{logo_b64_img()}
|
|
@@ -656,24 +608,72 @@ def _render_header_html(logo_px: int) -> str:
|
|
| 656 |
</div>
|
| 657 |
<hr>
|
| 658 |
<style>
|
| 659 |
-
.cf-logo {{ height:
|
| 660 |
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
"""
|
| 662 |
|
| 663 |
with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
| 664 |
-
# Ensure Spaces sees a GPU function (without touching CUDA in main)
|
| 665 |
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 666 |
|
| 667 |
-
#
|
| 668 |
settings = load_settings()
|
|
|
|
|
|
|
|
|
|
| 669 |
|
| 670 |
-
#
|
| 671 |
-
header_html = gr.HTML(_render_header_html(settings.get("logo_px", 180)))
|
| 672 |
-
|
| 673 |
-
# ---- Controls group (left/right columns)
|
| 674 |
with gr.Group():
|
| 675 |
with gr.Row():
|
| 676 |
-
# LEFT:
|
| 677 |
with gr.Column(scale=2):
|
| 678 |
with gr.Accordion("Caption style (choose one or combine)", open=True):
|
| 679 |
style_checks = gr.CheckboxGroup(
|
|
@@ -693,35 +693,36 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 693 |
add_start = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
|
| 694 |
add_end = gr.Textbox(label="Add text to end", value=settings.get("end",""))
|
| 695 |
|
| 696 |
-
# RIGHT:
|
| 697 |
with gr.Column(scale=1):
|
| 698 |
with gr.Accordion("Model Instructions", open=False):
|
| 699 |
-
instruction_preview = gr.Textbox(
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
settings.get("extras", []),
|
| 704 |
-
settings.get("name", ""),
|
| 705 |
-
)
|
| 706 |
-
)
|
| 707 |
dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
|
| 708 |
value=settings.get("dataset_name", "forgecaptions"))
|
| 709 |
max_side = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
|
| 710 |
excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128),
|
| 711 |
step=8, label="Excel thumbnail size (px)")
|
| 712 |
-
|
| 713 |
-
step=10, label="Logo height (px)")
|
| 714 |
-
# Chunking controls
|
| 715 |
chunk_mode = gr.Radio(
|
| 716 |
choices=["Auto", "Manual (all at once)", "Manual (step)"],
|
| 717 |
-
value="Manual (step)",
|
| 718 |
-
label="Batch mode"
|
| 719 |
)
|
| 720 |
chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
|
| 721 |
gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
|
| 722 |
|
| 723 |
-
|
| 724 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
|
| 726 |
cfg = load_settings()
|
| 727 |
cfg.update({
|
|
@@ -731,31 +732,37 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 731 |
"trigger": trigv, "begin": begv, "end": endv,
|
| 732 |
"excel_thumb_px": int(excel_px),
|
| 733 |
"max_side": int(ms),
|
| 734 |
-
"dataset_name": dsn or "forgecaptions",
|
| 735 |
})
|
| 736 |
save_settings(cfg)
|
| 737 |
return instr
|
| 738 |
|
| 739 |
-
for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side
|
| 740 |
-
comp.change(
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
outputs=[instruction_preview]
|
| 744 |
-
)
|
| 745 |
|
| 746 |
-
def
|
| 747 |
cfg = load_settings()
|
| 748 |
-
cfg["
|
| 749 |
save_settings(cfg)
|
| 750 |
-
return
|
|
|
|
|
|
|
| 751 |
|
| 752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
|
| 758 |
-
# ---- Shape Aliases (
|
| 759 |
with gr.Accordion("Shape Aliases", open=False):
|
| 760 |
gr.Markdown(
|
| 761 |
"### 🔷 Shape Aliases\n"
|
|
@@ -806,11 +813,11 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 806 |
input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
|
| 807 |
run_button = gr.Button("Caption batch", variant="primary")
|
| 808 |
|
| 809 |
-
# ---- Results (
|
| 810 |
rows_state = gr.State(load_session())
|
| 811 |
autosave_md = gr.Markdown("Ready.")
|
| 812 |
-
progress_md = gr.Markdown("")
|
| 813 |
-
remaining_state = gr.State([])
|
| 814 |
|
| 815 |
with gr.Row():
|
| 816 |
with gr.Column(scale=1):
|
|
@@ -818,29 +825,27 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 818 |
label="Results (image + caption)",
|
| 819 |
show_label=True,
|
| 820 |
columns=3,
|
| 821 |
-
height=520,
|
| 822 |
elem_id="cfGal",
|
| 823 |
elem_classes=["cf-scroll"]
|
| 824 |
)
|
| 825 |
-
with gr.Column(scale=1):
|
| 826 |
table = gr.Dataframe(
|
| 827 |
label="Editable captions (whole session)",
|
| 828 |
value=_rows_to_table(load_session()),
|
| 829 |
headers=["filename", "caption"],
|
| 830 |
interactive=True,
|
| 831 |
wrap=True,
|
| 832 |
-
elem_id="cfTable"
|
| 833 |
-
elem_classes=["cf-scroll"]
|
| 834 |
)
|
| 835 |
|
| 836 |
-
# ---- Step panel
|
| 837 |
step_panel = gr.Group(visible=False)
|
| 838 |
with step_panel:
|
| 839 |
step_msg = gr.Markdown("")
|
| 840 |
step_next = gr.Button("Process next chunk")
|
| 841 |
step_finish = gr.Button("Finish")
|
| 842 |
|
| 843 |
-
# ---- Exports
|
| 844 |
with gr.Row():
|
| 845 |
with gr.Column():
|
| 846 |
export_csv_btn = gr.Button("Export CSV")
|
|
@@ -852,52 +857,47 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 852 |
export_txt_btn = gr.Button("Export captions as .txt (zip)")
|
| 853 |
txt_zip = gr.File(label="TXT zip", visible=False)
|
| 854 |
|
| 855 |
-
# ----
|
| 856 |
gr.HTML("""
|
| 857 |
<script>
|
| 858 |
(function () {
|
| 859 |
-
function
|
| 860 |
const host = document.querySelector("#cfGal");
|
| 861 |
if (!host) return null;
|
| 862 |
-
return host.querySelector(
|
| 863 |
}
|
| 864 |
-
function
|
| 865 |
-
|
| 866 |
-
if (!host) return null;
|
| 867 |
-
return host.querySelector(".wrap") ||
|
| 868 |
-
host.querySelector(".dataframe-wrap") ||
|
| 869 |
-
(host.querySelector("table") ? host.querySelector("table").parentElement : null) ||
|
| 870 |
-
host;
|
| 871 |
}
|
| 872 |
function syncScroll(a, b) {
|
| 873 |
if (!a || !b) return;
|
| 874 |
let lock = false;
|
| 875 |
-
const
|
| 876 |
-
const
|
| 877 |
-
a.addEventListener("scroll",
|
| 878 |
-
b.addEventListener("scroll",
|
| 879 |
}
|
| 880 |
let tries = 0;
|
| 881 |
-
const
|
| 882 |
tries++;
|
| 883 |
-
const gal =
|
| 884 |
-
const tab =
|
| 885 |
if (gal && tab) {
|
| 886 |
-
const H = Math.min(
|
| 887 |
gal.style.maxHeight = H + "px";
|
| 888 |
gal.style.overflowY = "auto";
|
| 889 |
tab.style.maxHeight = H + "px";
|
| 890 |
tab.style.overflowY = "auto";
|
| 891 |
syncScroll(gal, tab);
|
| 892 |
-
clearInterval(
|
| 893 |
}
|
| 894 |
-
if (tries >
|
| 895 |
-
},
|
| 896 |
})();
|
| 897 |
</script>
|
| 898 |
""")
|
| 899 |
|
| 900 |
-
# ----
|
| 901 |
def _split_chunks(files, csize: int):
|
| 902 |
files = files or []
|
| 903 |
c = max(1, int(csize))
|
|
@@ -910,8 +910,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 910 |
def _run_click(files, rows, instr, ms, mode, csize, budget_s):
|
| 911 |
t, p, m = _tpms()
|
| 912 |
files = files or []
|
| 913 |
-
|
| 914 |
-
# Manual step → process first chunk only
|
| 915 |
if mode == "Manual (step)" and files:
|
| 916 |
chunks = _split_chunks(files, int(csize))
|
| 917 |
batch = chunks[0]
|
|
@@ -925,7 +924,6 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 925 |
prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
|
| 926 |
return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
|
| 927 |
|
| 928 |
-
# Auto / All-at-once (still obey time budget)
|
| 929 |
new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
|
| 930 |
files, rows or [], instr, t, p, m, int(ms), float(budget_s)
|
| 931 |
)
|
|
@@ -945,7 +943,6 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 945 |
remain = remain or []
|
| 946 |
if not remain:
|
| 947 |
return rows, gr.update(value="No files remaining."), gr.update(visible=False), [], [], [], "Saved.", gr.update(value="")
|
| 948 |
-
|
| 949 |
batch = remain[:int(csize)]
|
| 950 |
leftover = remain[int(csize):]
|
| 951 |
new_rows, gal, tbl, stamp, leftover_from_batch, done, total = run_batch(
|
|
@@ -956,7 +953,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 956 |
msg = f"{len(leftover)} files remain. Process next chunk?" if leftover else "All done."
|
| 957 |
prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(leftover)}"
|
| 958 |
return new_rows, msg, panel_vis, leftover, gal, tbl, stamp, gr.update(value=prog)
|
| 959 |
-
|
| 960 |
step_next.click(
|
| 961 |
_step_next,
|
| 962 |
inputs=[remaining_state, rows_state, instruction_preview, max_side, chunk_size, gpu_budget],
|
|
@@ -966,13 +963,9 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 966 |
def _step_finish():
|
| 967 |
return gr.update(visible=False), gr.update(value=""), []
|
| 968 |
|
| 969 |
-
step_finish.click(
|
| 970 |
-
_step_finish,
|
| 971 |
-
inputs=None,
|
| 972 |
-
outputs=[step_panel, step_msg, remaining_state]
|
| 973 |
-
)
|
| 974 |
|
| 975 |
-
# Table edits → persist + refresh gallery
|
| 976 |
def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
|
| 977 |
session_rows = _table_to_rows(table_value, session_rows or [])
|
| 978 |
save_session(session_rows)
|
|
@@ -980,29 +973,25 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 980 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
| 981 |
return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
|
| 982 |
|
| 983 |
-
table.change(
|
| 984 |
-
sync_table_to_session,
|
| 985 |
-
inputs=[table, rows_state],
|
| 986 |
-
outputs=[rows_state, gallery, autosave_md]
|
| 987 |
-
)
|
| 988 |
|
| 989 |
-
# Exports
|
| 990 |
export_csv_btn.click(
|
| 991 |
-
lambda tbl: (export_csv_from_table(tbl), gr.update(visible=True)),
|
| 992 |
-
inputs=[table], outputs=[csv_file, csv_file]
|
| 993 |
)
|
| 994 |
export_xlsx_btn.click(
|
| 995 |
-
lambda tbl, rows, px: (export_excel_with_thumbs(tbl, rows or [], int(px)), gr.update(visible=True)),
|
| 996 |
-
inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
|
| 997 |
)
|
| 998 |
export_txt_btn.click(
|
| 999 |
-
lambda tbl: (
|
| 1000 |
-
inputs=[table], outputs=[txt_zip, txt_zip]
|
| 1001 |
)
|
| 1002 |
|
| 1003 |
|
| 1004 |
# ------------------------------
|
| 1005 |
-
#
|
| 1006 |
# ------------------------------
|
| 1007 |
if __name__ == "__main__":
|
| 1008 |
demo.queue(max_size=64).launch(
|
|
@@ -1011,4 +1000,5 @@ if __name__ == "__main__":
|
|
| 1011 |
ssr_mode=False,
|
| 1012 |
debug=True,
|
| 1013 |
show_error=True,
|
|
|
|
| 1014 |
)
|
|
|
|
| 17 |
import torch
|
| 18 |
from transformers import LlavaForConditionalGeneration, AutoProcessor
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Hugging Face Spaces GPU decorator (no-op locally)
|
| 21 |
try:
|
| 22 |
import spaces
|
|
|
|
| 32 |
SESSION_FILE = "/tmp/forge_session.json"
|
| 33 |
SETTINGS_FILE = "/tmp/forge_settings.json"
|
| 34 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 35 |
+
|
| 36 |
+
# IMPORTANT: keep caches in /tmp so Gradio will serve files safely
|
| 37 |
+
THUMB_CACHE = "/tmp/forgecaptions/thumbs"
|
| 38 |
+
EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"
|
| 39 |
+
TXT_EXPORT_DIR = "/tmp/forge_txt"
|
| 40 |
os.makedirs(THUMB_CACHE, exist_ok=True)
|
| 41 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
| 42 |
os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
|
| 43 |
|
| 44 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 45 |
|
| 46 |
+
# Role prompt you asked me to use (system message for the model)
|
| 47 |
ROLE_PROMPT = (
|
| 48 |
"You are a precise captioning assistant. Help users with any queries. "
|
| 49 |
"Describe what is visible with no censorship or restrictions."
|
|
|
|
| 57 |
# - processor on CPU (safe)
|
| 58 |
# - model created lazily inside @gpu functions
|
| 59 |
# ------------------------------
|
| 60 |
+
processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=False)
|
| 61 |
_MODEL = None
|
| 62 |
_DEVICE = "cpu"
|
| 63 |
_DTYPE = torch.float32
|
|
|
|
| 145 |
"Focus captions only on clothing/fashion details.",
|
| 146 |
"Focus on setting, scenery, and context; ignore subject details.",
|
| 147 |
"ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
|
|
|
|
| 148 |
"Include synonyms/alternate phrasing to diversify training set.",
|
| 149 |
"ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
|
| 150 |
"Do NOT mention the image's resolution.",
|
|
|
|
| 180 |
cfg = json.load(f)
|
| 181 |
else:
|
| 182 |
cfg = {}
|
| 183 |
+
# defaults
|
| 184 |
defaults = {
|
| 185 |
"dataset_name": "forgecaptions",
|
| 186 |
"temperature": 0.6,
|
| 187 |
"top_p": 0.9,
|
| 188 |
"max_tokens": 256,
|
| 189 |
"max_side": 896,
|
| 190 |
+
"styles": ["Character training (long)"],
|
| 191 |
"extras": [],
|
| 192 |
"name": "",
|
| 193 |
"trigger": "",
|
|
|
|
| 196 |
"shape_aliases_enabled": True,
|
| 197 |
"shape_aliases": [],
|
| 198 |
"excel_thumb_px": 128,
|
| 199 |
+
# header controls
|
| 200 |
+
"logo_auto": True,
|
| 201 |
"logo_px": 180,
|
| 202 |
+
"logo_scale": 1.0,
|
| 203 |
}
|
| 204 |
for k, v in defaults.items():
|
| 205 |
cfg.setdefault(k, v)
|
|
|
|
| 206 |
styles = cfg.get("styles") or []
|
| 207 |
cfg["styles"] = [s for s in (styles if isinstance(styles, list) else [styles]) if s in STYLE_OPTIONS] or ["Character training (long)"]
|
| 208 |
return cfg
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
# ------------------------------
|
| 222 |
+
# 5) Small utilities (thumbs, resize, prefix/suffix, names)
|
| 223 |
# ------------------------------
|
| 224 |
+
def sanitize_basename(s: str) -> str:
|
| 225 |
+
s = (s or "").strip() or "forgecaptions"
|
| 226 |
+
return re.sub(r"[^A-Za-z0-9._-]+", "_", s)[:120]
|
| 227 |
+
|
| 228 |
def ensure_thumb(path: str, max_side=256) -> str:
|
| 229 |
try:
|
| 230 |
im = Image.open(path).convert("RGB")
|
|
|
|
| 261 |
return " ".join([p for p in parts if p])
|
| 262 |
|
| 263 |
def logo_b64_img() -> str:
|
|
|
|
|
|
|
|
|
|
| 264 |
candidates = [
|
| 265 |
os.path.join(APP_DIR, "forgecaptions-logo.png"),
|
| 266 |
os.path.join(APP_DIR, "captionforge-logo.png"),
|
|
|
|
| 279 |
# 6) Shape Aliases (comma/pipe synonyms per row)
|
| 280 |
# ------------------------------
|
| 281 |
def _compile_shape_aliases_from_file():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
s = load_settings()
|
| 283 |
if not s.get("shape_aliases_enabled", True):
|
| 284 |
return []
|
|
|
|
| 291 |
tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 292 |
if not tokens:
|
| 293 |
continue
|
| 294 |
+
tokens = sorted(set(tokens), key=lambda t: -len(t))
|
| 295 |
pat = r"\b(?:" + "|".join(re.escape(t) for t in tokens) + r")(?:[-\s]?shaped)?\b"
|
| 296 |
compiled.append((re.compile(pat, flags=re.I), name))
|
| 297 |
return compiled
|
|
|
|
| 424 |
start = time.time()
|
| 425 |
leftover: List[str] = []
|
| 426 |
|
|
|
|
| 427 |
for idx, path in enumerate(progress.tqdm(files, desc="Captioning")):
|
| 428 |
try:
|
| 429 |
im = Image.open(path).convert("RGB")
|
|
|
|
| 439 |
session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
|
| 440 |
processed += 1
|
| 441 |
|
|
|
|
| 442 |
if time_budget_s and (time.time() - start) >= float(time_budget_s):
|
| 443 |
leftover = files[idx+1:]
|
| 444 |
break
|
|
|
|
| 457 |
total,
|
| 458 |
)
|
| 459 |
|
|
|
|
| 460 |
@gpu
|
| 461 |
@torch.no_grad()
|
| 462 |
def _gpu_startup_warm():
|
|
|
|
| 469 |
|
| 470 |
|
| 471 |
# ------------------------------
|
| 472 |
+
# 9) Export helpers (CSV/XLSX/TXT ZIP)
|
| 473 |
# ------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
def _rows_to_table(rows: List[dict]) -> list:
|
| 475 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 476 |
|
| 477 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 478 |
+
tbl = table_value or []
|
| 479 |
new = []
|
| 480 |
for i, r in enumerate(rows or []):
|
| 481 |
r = dict(r)
|
|
|
|
| 485 |
new.append(r)
|
| 486 |
return new
|
| 487 |
|
| 488 |
+
def export_csv_from_table(table_value: Any, dataset_name: str) -> str:
|
| 489 |
+
data = table_value or []
|
| 490 |
+
name = sanitize_basename(dataset_name)
|
| 491 |
+
out = f"/tmp/{name}_{int(time.time())}.csv"
|
| 492 |
with open(out, "w", newline="", encoding="utf-8") as f:
|
| 493 |
w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
|
| 494 |
return out
|
|
|
|
| 510 |
except Exception:
|
| 511 |
return path
|
| 512 |
|
| 513 |
+
def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int, dataset_name: str) -> str:
|
| 514 |
try:
|
| 515 |
from openpyxl import Workbook
|
| 516 |
from openpyxl.drawing.image import Image as XLImage
|
|
|
|
| 518 |
raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
|
| 519 |
|
| 520 |
caption_by_file = {}
|
| 521 |
+
for row in (table_value or []):
|
| 522 |
if not row:
|
| 523 |
continue
|
| 524 |
fn = str(row[0]) if len(row) > 0 else ""
|
|
|
|
| 532 |
ws.column_dimensions["B"].width = 42
|
| 533 |
ws.column_dimensions["C"].width = 100
|
| 534 |
|
|
|
|
| 535 |
row_h = int(int(thumb_px) * 0.75)
|
| 536 |
r_i = 2
|
| 537 |
for r in (session_rows or []):
|
|
|
|
| 549 |
pass
|
| 550 |
r_i += 1
|
| 551 |
|
| 552 |
+
name = sanitize_basename(dataset_name)
|
| 553 |
+
out = f"/tmp/{name}_{int(time.time())}.xlsx"
|
| 554 |
wb.save(out)
|
| 555 |
return out
|
| 556 |
|
| 557 |
+
def export_txt_zip(table_value: Any, dataset_name: str) -> str:
|
| 558 |
+
"""
|
| 559 |
+
Create one .txt per caption, zip them.
|
| 560 |
+
"""
|
| 561 |
+
data = table_value or []
|
| 562 |
+
# wipe old
|
| 563 |
+
for fn in os.listdir(TXT_EXPORT_DIR):
|
| 564 |
try:
|
| 565 |
+
os.remove(os.path.join(TXT_EXPORT_DIR, fn))
|
| 566 |
except Exception:
|
| 567 |
pass
|
| 568 |
|
| 569 |
+
used: Dict[str,int] = {}
|
| 570 |
for row in data:
|
| 571 |
if not row:
|
| 572 |
continue
|
| 573 |
+
orig = (row[0] or "item").strip() if len(row) > 0 else "item"
|
| 574 |
+
stem = re.sub(r"\.[A-Za-z0-9]+$", "", orig)
|
| 575 |
+
stem = sanitize_basename(stem or "item")
|
| 576 |
+
if stem in used:
|
| 577 |
+
used[stem] += 1
|
| 578 |
+
stem = f"{stem}_{used[stem]}"
|
| 579 |
+
else:
|
| 580 |
+
used[stem] = 0
|
| 581 |
+
cap = (row[1] or "").strip() if len(row) > 1 and row[1] is not None else ""
|
| 582 |
with open(os.path.join(TXT_EXPORT_DIR, f"{stem}.txt"), "w", encoding="utf-8") as f:
|
| 583 |
f.write(cap)
|
| 584 |
|
| 585 |
+
name = sanitize_basename(dataset_name)
|
| 586 |
+
zpath = f"/tmp/{name}_{int(time.time())}_txt.zip"
|
| 587 |
+
with zipfile.ZipFile(zpath, "w", zipfile.ZIP_DEFLATED) as z:
|
| 588 |
+
for fn in os.listdir(TXT_EXPORT_DIR):
|
| 589 |
+
if fn.endswith(".txt"):
|
| 590 |
+
z.write(os.path.join(TXT_EXPORT_DIR, fn), arcname=fn)
|
| 591 |
+
return zpath
|
| 592 |
|
| 593 |
|
| 594 |
# ------------------------------
|
| 595 |
+
# 10) UI header helper (logo auto-fit)
|
| 596 |
# ------------------------------
|
| 597 |
+
def _render_header_html(auto: bool, px: int, scale: float) -> str:
|
| 598 |
+
auto_js = "true" if auto else "false"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 599 |
return f"""
|
| 600 |
<div class="cf-hero">
|
| 601 |
{logo_b64_img()}
|
|
|
|
| 608 |
</div>
|
| 609 |
<hr>
|
| 610 |
<style>
|
| 611 |
+
.cf-logo {{ height: auto; width: auto; object-fit: contain; }}
|
| 612 |
</style>
|
| 613 |
+
<script>
|
| 614 |
+
(function() {{
|
| 615 |
+
const AUTO = {auto_js};
|
| 616 |
+
const PX = {int(px)};
|
| 617 |
+
const SCALE = {float(scale)};
|
| 618 |
+
function fit() {{
|
| 619 |
+
const logo = document.querySelector(".cf-logo");
|
| 620 |
+
const text = document.querySelector(".cf-text");
|
| 621 |
+
if (!logo || !text) return;
|
| 622 |
+
if (AUTO) {{
|
| 623 |
+
const h = text.getBoundingClientRect().height || 180;
|
| 624 |
+
const target = Math.max(80, Math.min(420, Math.round(h * SCALE)));
|
| 625 |
+
logo.style.height = target + "px";
|
| 626 |
+
}} else {{
|
| 627 |
+
logo.style.height = Math.max(80, Math.min(420, PX)) + "px";
|
| 628 |
+
}}
|
| 629 |
+
}}
|
| 630 |
+
const textNode = document.querySelector(".cf-text");
|
| 631 |
+
if (window.ResizeObserver && textNode) {{
|
| 632 |
+
const ro = new ResizeObserver(fit);
|
| 633 |
+
ro.observe(textNode);
|
| 634 |
+
}}
|
| 635 |
+
window.addEventListener("resize", fit, {{ passive: true }});
|
| 636 |
+
setTimeout(fit, 0);
|
| 637 |
+
}})();
|
| 638 |
+
</script>
|
| 639 |
+
"""
|
| 640 |
+
|
| 641 |
+
|
| 642 |
+
# ------------------------------
|
| 643 |
+
# 11) UI (Blocks)
|
| 644 |
+
# ------------------------------
|
| 645 |
+
BASE_CSS = """
|
| 646 |
+
:root{--galleryW:50%;--tableW:50%;}
|
| 647 |
+
.gradio-container{max-width:100%!important}
|
| 648 |
+
|
| 649 |
+
/* Header */
|
| 650 |
+
.cf-hero{display:flex; align-items:center; justify-content:center; gap:16px;
|
| 651 |
+
margin:4px 0 12px; text-align:center;}
|
| 652 |
+
.cf-hero .cf-text{text-align:center;}
|
| 653 |
+
.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
|
| 654 |
+
.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
|
| 655 |
+
|
| 656 |
+
/* Results area + robust scrollbars */
|
| 657 |
+
.cf-scroll{border:1px solid #e6e6e6; border-radius:10px; padding:8px}
|
| 658 |
+
#cfGal{max-height:520px; overflow-y:auto !important;}
|
| 659 |
+
#cfTableWrap{max-height:520px; overflow-y:auto !important;}
|
| 660 |
+
#cfGal [data-testid="gallery"]{height:auto !important;}
|
| 661 |
+
#cfGal .grid > div { height: 96px; }
|
| 662 |
"""
|
| 663 |
|
| 664 |
with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
|
|
| 665 |
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 666 |
|
| 667 |
+
# ---- Header
|
| 668 |
settings = load_settings()
|
| 669 |
+
header_html = gr.HTML(_render_header_html(settings.get("logo_auto", True),
|
| 670 |
+
settings.get("logo_px", 180),
|
| 671 |
+
settings.get("logo_scale", 1.0)))
|
| 672 |
|
| 673 |
+
# ---- Controls group
|
|
|
|
|
|
|
|
|
|
| 674 |
with gr.Group():
|
| 675 |
with gr.Row():
|
| 676 |
+
# LEFT: styles / extras / name & prefix-suffix
|
| 677 |
with gr.Column(scale=2):
|
| 678 |
with gr.Accordion("Caption style (choose one or combine)", open=True):
|
| 679 |
style_checks = gr.CheckboxGroup(
|
|
|
|
| 693 |
add_start = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
|
| 694 |
add_end = gr.Textbox(label="Add text to end", value=settings.get("end",""))
|
| 695 |
|
| 696 |
+
# RIGHT: instructions + dataset + general sliders + logo controls
|
| 697 |
with gr.Column(scale=1):
|
| 698 |
with gr.Accordion("Model Instructions", open=False):
|
| 699 |
+
instruction_preview = gr.Textbox(label=None, lines=12,
|
| 700 |
+
value=final_instruction(settings.get("styles", ["Character training (long)"]),
|
| 701 |
+
settings.get("extras", []),
|
| 702 |
+
settings.get("name","")))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
|
| 704 |
value=settings.get("dataset_name", "forgecaptions"))
|
| 705 |
max_side = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
|
| 706 |
excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128),
|
| 707 |
step=8, label="Excel thumbnail size (px)")
|
| 708 |
+
# Chunking
|
|
|
|
|
|
|
| 709 |
chunk_mode = gr.Radio(
|
| 710 |
choices=["Auto", "Manual (all at once)", "Manual (step)"],
|
| 711 |
+
value="Manual (step)", label="Batch mode"
|
|
|
|
| 712 |
)
|
| 713 |
chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
|
| 714 |
gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
|
| 715 |
|
| 716 |
+
# Logo controls
|
| 717 |
+
logo_auto = gr.Checkbox(value=settings.get("logo_auto", True),
|
| 718 |
+
label="Auto-match logo height to text")
|
| 719 |
+
logo_px = gr.Slider(80, 420, value=settings.get("logo_px", 180),
|
| 720 |
+
step=4, label="Logo height (px, if Auto off)")
|
| 721 |
+
logo_scale = gr.Slider(0.6, 1.6, value=settings.get("logo_scale", 1.0),
|
| 722 |
+
step=0.05, label="Logo scale × (if Auto on)")
|
| 723 |
+
|
| 724 |
+
# Persist instruction + general settings
|
| 725 |
+
def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
|
| 726 |
instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
|
| 727 |
cfg = load_settings()
|
| 728 |
cfg.update({
|
|
|
|
| 732 |
"trigger": trigv, "begin": begv, "end": endv,
|
| 733 |
"excel_thumb_px": int(excel_px),
|
| 734 |
"max_side": int(ms),
|
|
|
|
| 735 |
})
|
| 736 |
save_settings(cfg)
|
| 737 |
return instr
|
| 738 |
|
| 739 |
+
for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side]:
|
| 740 |
+
comp.change(_refresh_instruction,
|
| 741 |
+
inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side],
|
| 742 |
+
outputs=[instruction_preview])
|
|
|
|
|
|
|
| 743 |
|
| 744 |
+
def _save_dataset_name(name):
|
| 745 |
cfg = load_settings()
|
| 746 |
+
cfg["dataset_name"] = sanitize_basename(name)
|
| 747 |
save_settings(cfg)
|
| 748 |
+
return gr.update()
|
| 749 |
+
|
| 750 |
+
dataset_name.change(_save_dataset_name, inputs=[dataset_name], outputs=[])
|
| 751 |
|
| 752 |
+
# Header controls live update
|
| 753 |
+
def _update_header(auto, px, scale):
|
| 754 |
+
cfg = load_settings()
|
| 755 |
+
cfg["logo_auto"] = bool(auto)
|
| 756 |
+
cfg["logo_px"] = int(px)
|
| 757 |
+
cfg["logo_scale"] = float(scale)
|
| 758 |
+
save_settings(cfg)
|
| 759 |
+
return _render_header_html(cfg["logo_auto"], cfg["logo_px"], cfg["logo_scale"])
|
| 760 |
|
| 761 |
+
logo_px.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 762 |
+
logo_auto.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 763 |
+
logo_scale.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 764 |
|
| 765 |
+
# ---- Shape Aliases block (placed WITH other settings, before uploads)
|
| 766 |
with gr.Accordion("Shape Aliases", open=False):
|
| 767 |
gr.Markdown(
|
| 768 |
"### 🔷 Shape Aliases\n"
|
|
|
|
| 813 |
input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
|
| 814 |
run_button = gr.Button("Caption batch", variant="primary")
|
| 815 |
|
| 816 |
+
# ---- Results area (gallery left / table right)
|
| 817 |
rows_state = gr.State(load_session())
|
| 818 |
autosave_md = gr.Markdown("Ready.")
|
| 819 |
+
progress_md = gr.Markdown("")
|
| 820 |
+
remaining_state = gr.State([])
|
| 821 |
|
| 822 |
with gr.Row():
|
| 823 |
with gr.Column(scale=1):
|
|
|
|
| 825 |
label="Results (image + caption)",
|
| 826 |
show_label=True,
|
| 827 |
columns=3,
|
|
|
|
| 828 |
elem_id="cfGal",
|
| 829 |
elem_classes=["cf-scroll"]
|
| 830 |
)
|
| 831 |
+
with gr.Column(scale=1, elem_id="cfTableWrap", elem_classes=["cf-scroll"]):
|
| 832 |
table = gr.Dataframe(
|
| 833 |
label="Editable captions (whole session)",
|
| 834 |
value=_rows_to_table(load_session()),
|
| 835 |
headers=["filename", "caption"],
|
| 836 |
interactive=True,
|
| 837 |
wrap=True,
|
| 838 |
+
elem_id="cfTable"
|
|
|
|
| 839 |
)
|
| 840 |
|
| 841 |
+
# ---- Step panel
|
| 842 |
step_panel = gr.Group(visible=False)
|
| 843 |
with step_panel:
|
| 844 |
step_msg = gr.Markdown("")
|
| 845 |
step_next = gr.Button("Process next chunk")
|
| 846 |
step_finish = gr.Button("Finish")
|
| 847 |
|
| 848 |
+
# ---- Exports
|
| 849 |
with gr.Row():
|
| 850 |
with gr.Column():
|
| 851 |
export_csv_btn = gr.Button("Export CSV")
|
|
|
|
| 857 |
export_txt_btn = gr.Button("Export captions as .txt (zip)")
|
| 858 |
txt_zip = gr.File(label="TXT zip", visible=False)
|
| 859 |
|
| 860 |
+
# ---- Robust scroll sync (works with Gradio v5 Gallery)
|
| 861 |
gr.HTML("""
|
| 862 |
<script>
|
| 863 |
(function () {
|
| 864 |
+
function findGal() {
|
| 865 |
const host = document.querySelector("#cfGal");
|
| 866 |
if (!host) return null;
|
| 867 |
+
return host.querySelector('[data-testid="gallery"]') || host;
|
| 868 |
}
|
| 869 |
+
function findTbl() {
|
| 870 |
+
return document.querySelector("#cfTableWrap");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 871 |
}
|
| 872 |
function syncScroll(a, b) {
|
| 873 |
if (!a || !b) return;
|
| 874 |
let lock = false;
|
| 875 |
+
const onA = () => { if (lock) return; lock = true; b.scrollTop = a.scrollTop; lock = false; };
|
| 876 |
+
const onB = () => { if (lock) return; lock = true; a.scrollTop = b.scrollTop; lock = false; };
|
| 877 |
+
a.addEventListener("scroll", onA, { passive: true });
|
| 878 |
+
b.addEventListener("scroll", onB, { passive: true });
|
| 879 |
}
|
| 880 |
let tries = 0;
|
| 881 |
+
const t = setInterval(() => {
|
| 882 |
tries++;
|
| 883 |
+
const gal = findGal();
|
| 884 |
+
const tab = findTbl();
|
| 885 |
if (gal && tab) {
|
| 886 |
+
const H = Math.min(520, Math.max(360, tab.clientHeight || 520));
|
| 887 |
gal.style.maxHeight = H + "px";
|
| 888 |
gal.style.overflowY = "auto";
|
| 889 |
tab.style.maxHeight = H + "px";
|
| 890 |
tab.style.overflowY = "auto";
|
| 891 |
syncScroll(gal, tab);
|
| 892 |
+
clearInterval(t);
|
| 893 |
}
|
| 894 |
+
if (tries > 30) clearInterval(t);
|
| 895 |
+
}, 120);
|
| 896 |
})();
|
| 897 |
</script>
|
| 898 |
""")
|
| 899 |
|
| 900 |
+
# ---- Chunking logic
|
| 901 |
def _split_chunks(files, csize: int):
|
| 902 |
files = files or []
|
| 903 |
c = max(1, int(csize))
|
|
|
|
| 910 |
def _run_click(files, rows, instr, ms, mode, csize, budget_s):
|
| 911 |
t, p, m = _tpms()
|
| 912 |
files = files or []
|
| 913 |
+
|
|
|
|
| 914 |
if mode == "Manual (step)" and files:
|
| 915 |
chunks = _split_chunks(files, int(csize))
|
| 916 |
batch = chunks[0]
|
|
|
|
| 924 |
prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
|
| 925 |
return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
|
| 926 |
|
|
|
|
| 927 |
new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
|
| 928 |
files, rows or [], instr, t, p, m, int(ms), float(budget_s)
|
| 929 |
)
|
|
|
|
| 943 |
remain = remain or []
|
| 944 |
if not remain:
|
| 945 |
return rows, gr.update(value="No files remaining."), gr.update(visible=False), [], [], [], "Saved.", gr.update(value="")
|
|
|
|
| 946 |
batch = remain[:int(csize)]
|
| 947 |
leftover = remain[int(csize):]
|
| 948 |
new_rows, gal, tbl, stamp, leftover_from_batch, done, total = run_batch(
|
|
|
|
| 953 |
msg = f"{len(leftover)} files remain. Process next chunk?" if leftover else "All done."
|
| 954 |
prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(leftover)}"
|
| 955 |
return new_rows, msg, panel_vis, leftover, gal, tbl, stamp, gr.update(value=prog)
|
| 956 |
+
|
| 957 |
step_next.click(
|
| 958 |
_step_next,
|
| 959 |
inputs=[remaining_state, rows_state, instruction_preview, max_side, chunk_size, gpu_budget],
|
|
|
|
| 963 |
def _step_finish():
|
| 964 |
return gr.update(visible=False), gr.update(value=""), []
|
| 965 |
|
| 966 |
+
step_finish.click(_step_finish, inputs=None, outputs=[step_panel, step_msg, remaining_state])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
|
| 968 |
+
# ---- Table edits → persist + refresh gallery
|
| 969 |
def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
|
| 970 |
session_rows = _table_to_rows(table_value, session_rows or [])
|
| 971 |
save_session(session_rows)
|
|
|
|
| 973 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
| 974 |
return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
|
| 975 |
|
| 976 |
+
table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
|
| 978 |
+
# ---- Exports
|
| 979 |
export_csv_btn.click(
|
| 980 |
+
lambda tbl, ds: (export_csv_from_table(tbl, ds), gr.update(visible=True)),
|
| 981 |
+
inputs=[table, dataset_name], outputs=[csv_file, csv_file]
|
| 982 |
)
|
| 983 |
export_xlsx_btn.click(
|
| 984 |
+
lambda tbl, rows, px, ds: (export_excel_with_thumbs(tbl, rows or [], int(px), ds), gr.update(visible=True)),
|
| 985 |
+
inputs=[table, rows_state, excel_thumb_px, dataset_name], outputs=[xlsx_file, xlsx_file]
|
| 986 |
)
|
| 987 |
export_txt_btn.click(
|
| 988 |
+
lambda tbl, ds: (export_txt_zip(tbl, ds), gr.update(visible=True)),
|
| 989 |
+
inputs=[table, dataset_name], outputs=[txt_zip, txt_zip]
|
| 990 |
)
|
| 991 |
|
| 992 |
|
| 993 |
# ------------------------------
|
| 994 |
+
# 12) Launch (SSR disabled for stability on Spaces)
|
| 995 |
# ------------------------------
|
| 996 |
if __name__ == "__main__":
|
| 997 |
demo.queue(max_size=64).launch(
|
|
|
|
| 1000 |
ssr_mode=False,
|
| 1001 |
debug=True,
|
| 1002 |
show_error=True,
|
| 1003 |
+
allowed_paths=[THUMB_CACHE, EXCEL_THUMB_DIR, TXT_EXPORT_DIR], # serve /tmp caches safely
|
| 1004 |
)
|