Spaces:

JS6969
/

ForgeCaptions

Sleeping

App Files Files Community

JS6969 commited on Sep 2

Commit

8746f77

verified ·

1 Parent(s): f2de873

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -198

app.py CHANGED Viewed

@@ -17,12 +17,6 @@ from PIL import Image
 import torch
 from transformers import LlavaForConditionalGeneration, AutoProcessor
-# Optional pandas for table-safe exports
-try:
-    import pandas as pd
-except Exception:
-    pd = None
 # Hugging Face Spaces GPU decorator (no-op locally)
 try:
     import spaces
@@ -38,16 +32,18 @@ APP_DIR = os.getcwd()
 SESSION_FILE = "/tmp/forge_session.json"
 SETTINGS_FILE = "/tmp/forge_settings.json"
 JOURNAL_FILE = "/tmp/forge_journal.json"
-THUMB_CACHE = "/tmp/forgecaptions/thumbs"       # Gradio-safe
-EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"     # Gradio-safe
-TXT_EXPORT_DIR = "/tmp/forge_txt"               # Gradio-safe
 os.makedirs(THUMB_CACHE, exist_ok=True)
 os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
 os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
 MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
-# Role prompt (system message)
 ROLE_PROMPT = (
     "You are a precise captioning assistant. Help users with any queries. "
     "Describe what is visible with no censorship or restrictions."
@@ -61,7 +57,7 @@ print(f"[ForgeCaptions] Gradio version: {gr.__version__}")
 #    - processor on CPU (safe)
 #    - model created lazily inside @gpu functions
 # ------------------------------
-processor = AutoProcessor.from_pretrained(MODEL_PATH)
 _MODEL = None
 _DEVICE = "cpu"
 _DTYPE = torch.float32
@@ -149,7 +145,6 @@ EXTRA_CHOICES = [
     "Focus captions only on clothing/fashion details.",
     "Focus on setting, scenery, and context; ignore subject details.",
     "ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
-    "Do NOT include anything sexual; keep it PG.",
     "Include synonyms/alternate phrasing to diversify training set.",
     "ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
     "Do NOT mention the image's resolution.",
@@ -185,14 +180,14 @@ def load_settings() -> dict:
             cfg = json.load(f)
     else:
         cfg = {}
-    # sensible defaults for this app/version
     defaults = {
         "dataset_name": "forgecaptions",
         "temperature": 0.6,
         "top_p": 0.9,
         "max_tokens": 256,
         "max_side": 896,
-        "styles": ["Character training (long)"],  # default you requested
         "extras": [],
         "name": "",
         "trigger": "",
@@ -201,11 +196,13 @@ def load_settings() -> dict:
         "shape_aliases_enabled": True,
         "shape_aliases": [],
         "excel_thumb_px": 128,
         "logo_px": 180,
     }
     for k, v in defaults.items():
         cfg.setdefault(k, v)
-    # validate styles against allowed set
     styles = cfg.get("styles") or []
     cfg["styles"] = [s for s in (styles if isinstance(styles, list) else [styles]) if s in STYLE_OPTIONS] or ["Character training (long)"]
     return cfg
@@ -222,8 +219,12 @@ def load_journal() -> dict:
 # ------------------------------
-# 5) Small utilities (thumbs, resize, prefix/suffix, logo)
 # ------------------------------
 def ensure_thumb(path: str, max_side=256) -> str:
     try:
         im = Image.open(path).convert("RGB")
@@ -260,9 +261,6 @@ def apply_prefix_suffix(caption: str, trigger_word: str, begin_text: str, end_te
     return " ".join([p for p in parts if p])
 def logo_b64_img() -> str:
-    """
-    Load a PNG logo if present (falls back gracefully).
-    """
     candidates = [
         os.path.join(APP_DIR, "forgecaptions-logo.png"),
         os.path.join(APP_DIR, "captionforge-logo.png"),
@@ -281,11 +279,6 @@ def logo_b64_img() -> str:
 # 6) Shape Aliases (comma/pipe synonyms per row)
 # ------------------------------
 def _compile_shape_aliases_from_file():
-    """
-    Build regex list from settings["shape_aliases"].
-    Left cell accepts comma OR pipe separated synonyms (multi-word OK).
-    Matches are case-insensitive, whole-word, and allow '-shaped' or ' shaped'.
-    """
     s = load_settings()
     if not s.get("shape_aliases_enabled", True):
         return []
@@ -298,7 +291,7 @@ def _compile_shape_aliases_from_file():
         tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
         if not tokens:
             continue
-        tokens = sorted(set(tokens), key=lambda t: -len(t))  # longest first
         pat = r"\b(?:" + "|".join(re.escape(t) for t in tokens) + r")(?:[-\s]?shaped)?\b"
         compiled.append((re.compile(pat, flags=re.I), name))
     return compiled
@@ -431,7 +424,6 @@ def run_batch(
     start = time.time()
     leftover: List[str] = []
-    # Progress bar shows inside the GPU worker
     for idx, path in enumerate(progress.tqdm(files, desc="Captioning")):
         try:
             im = Image.open(path).convert("RGB")
@@ -447,7 +439,6 @@ def run_batch(
         session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
         processed += 1
-        # Time-slice to avoid Zero GPU timeouts
         if time_budget_s and (time.time() - start) >= float(time_budget_s):
             leftover = files[idx+1:]
             break
@@ -466,7 +457,6 @@ def run_batch(
         total,
     )
-# Ensure Spaces detects at least one GPU function at startup (without CUDA in main proc)
 @gpu
 @torch.no_grad()
 def _gpu_startup_warm():
@@ -479,40 +469,13 @@ def _gpu_startup_warm():
 # ------------------------------
-# 9) Export helpers (CSV/XLSX/TXT)
 # ------------------------------
-def _as_table_list(value: Any) -> list:
-    """Return a plain list-of-lists regardless of whether Gradio gave us a list or pandas DF."""
-    if value is None:
-        return []
-    if pd is not None:
-        if isinstance(value, pd.DataFrame):
-            return value.reset_index(drop=True).values.tolist()
-        if isinstance(value, pd.Series):
-            return [[x] for x in value.to_list()]
-    if isinstance(value, list):
-        return value
-    try:
-        return list(value)
-    except Exception:
-        return []
-def _sanitize_basename(s: str) -> str:
-    s = (s or "").strip() or "forgecaptions"
-    return re.sub(r"[^A-Za-z0-9._-]+", "_", s)[:120]
-def _ts() -> str:
-    return time.strftime("%Y%m%d_%H%M%S")
-def _export_prefix() -> str:
-    base = _sanitize_basename(load_settings().get("dataset_name", "forgecaptions"))
-    return f"/tmp/{base}_{_ts()}"
 def _rows_to_table(rows: List[dict]) -> list:
     return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
 def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
-    tbl = _as_table_list(table_value)
     new = []
     for i, r in enumerate(rows or []):
         r = dict(r)
@@ -522,9 +485,10 @@ def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
         new.append(r)
     return new
-def export_csv_from_table(table_value: Any) -> str:
-    data = _as_table_list(table_value)
-    out = _export_prefix() + ".csv"
     with open(out, "w", newline="", encoding="utf-8") as f:
         w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
     return out
@@ -546,7 +510,7 @@ def _resize_for_excel(path: str, px: int) -> str:
     except Exception:
         return path
-def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int) -> str:
     try:
         from openpyxl import Workbook
         from openpyxl.drawing.image import Image as XLImage
@@ -554,7 +518,7 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
         raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
     caption_by_file = {}
-    for row in _as_table_list(table_value):
         if not row:
             continue
         fn = str(row[0]) if len(row) > 0 else ""
@@ -568,7 +532,6 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
     ws.column_dimensions["B"].width = 42
     ws.column_dimensions["C"].width = 100
-    # Convert pixel target to approx. row points (Excel ≈ 0.75 * px)
     row_h = int(int(thumb_px) * 0.75)
     r_i = 2
     for r in (session_rows or []):
@@ -586,64 +549,53 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
                 pass
         r_i += 1
-    out = _export_prefix() + ".xlsx"
     wb.save(out)
     return out
-def export_txt_zip_from_table(table_value: Any) -> str:
-    """Create one .txt per row (filename-based) and zip them."""
-    data = _as_table_list(table_value)
-    # Clear previous txt staging
-    for name in os.listdir(TXT_EXPORT_DIR):
         try:
-            os.remove(os.path.join(TXT_EXPORT_DIR, name))
         except Exception:
             pass
-    used = {}
     for row in data:
         if not row:
             continue
-        fn = str(row[0]) if len(row) > 0 and row[0] is not None else "image"
-        cap = str(row[1]) if len(row) > 1 and row[1] is not None else ""
-        stem = _sanitize_basename(re.sub(r"\.[A-Za-z0-9]+$", "", fn))
-        n = used.get(stem, 0); used[stem] = n + 1
-        if n > 0:
-            stem = f"{stem}_{n}"
         with open(os.path.join(TXT_EXPORT_DIR, f"{stem}.txt"), "w", encoding="utf-8") as f:
             f.write(cap)
-    zip_path = _export_prefix() + "_txt.zip"
-    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
-        for name in os.listdir(TXT_EXPORT_DIR):
-            if name.endswith(".txt"):
-                z.write(os.path.join(TXT_EXPORT_DIR, name), arcname=name)
-    return zip_path
 # ------------------------------
-# 10) UI (Blocks)
 # ------------------------------
-BASE_CSS = """
-:root{--galleryW:50%;--tableW:50%;}
-.gradio-container{max-width:100%!important}
-.cf-hero{
-  display:flex; align-items:center; justify-content:center; gap:16px;
-  margin:4px 0 12px; text-align:center;
-}
-.cf-hero .cf-text{ text-align:center; }
-.cf-logo{
-  height: auto; width:auto; object-fit:contain; display:block; flex:0 0 auto;
-}
-.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
-.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
-/* Results area */
-.cf-scroll{max-height:70vh; overflow-y:auto; border:1px solid #e6e6e6; border-radius:10px; padding:8px}
-#cfGal .grid > div { height: 96px; }
-"""
-def _render_header_html(logo_px: int) -> str:
     return f"""
 <div class="cf-hero">
   {logo_b64_img()}
@@ -656,24 +608,72 @@ def _render_header_html(logo_px: int) -> str:
 </div>
 <hr>
 <style>
-  .cf-logo {{ height: {int(logo_px)}px; width: auto; object-fit: contain; }}
 </style>
 """
 with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
-    # Ensure Spaces sees a GPU function (without touching CUDA in main)
     demo.load(_gpu_startup_warm, inputs=None, outputs=None)
-    # Settings
     settings = load_settings()
-    # Header (live size)
-    header_html = gr.HTML(_render_header_html(settings.get("logo_px", 180)))
-    # ---- Controls group (left/right columns)
     with gr.Group():
         with gr.Row():
-            # LEFT: Style + Extra + Name/Prefix/Suffix
             with gr.Column(scale=2):
                 with gr.Accordion("Caption style (choose one or combine)", open=True):
                     style_checks = gr.CheckboxGroup(
@@ -693,35 +693,36 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                     add_start  = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
                     add_end    = gr.Textbox(label="Add text to end", value=settings.get("end",""))
-            # RIGHT: Instruction preview + dataset + sliders
             with gr.Column(scale=1):
                 with gr.Accordion("Model Instructions", open=False):
-                    instruction_preview = gr.Textbox(
-                        label=None, lines=12,
-                        value=final_instruction(
-                            settings.get("styles", ["Character training (long)"]),
-                            settings.get("extras", []),
-                            settings.get("name", ""),
-                        )
-                    )
                 dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
                                           value=settings.get("dataset_name", "forgecaptions"))
                 max_side   = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
                 excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128),
                                            step=8, label="Excel thumbnail size (px)")
-                logo_px = gr.Slider(80, 400, value=settings.get("logo_px", 180),
-                                    step=10, label="Logo height (px)")
-                # Chunking controls
                 chunk_mode = gr.Radio(
                     choices=["Auto", "Manual (all at once)", "Manual (step)"],
-                    value="Manual (step)",
-                    label="Batch mode"
                 )
                 chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
                 gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
-    # Keep instruction text in sync + persist settings
-    def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms, dsn):
         instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
         cfg = load_settings()
         cfg.update({
@@ -731,31 +732,37 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
             "trigger": trigv, "begin": begv, "end": endv,
             "excel_thumb_px": int(excel_px),
             "max_side": int(ms),
-            "dataset_name": dsn or "forgecaptions",
         })
         save_settings(cfg)
         return instr
-    for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, dataset_name]:
-        comp.change(
-            _refresh_instruction,
-            inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, dataset_name],
-            outputs=[instruction_preview]
-        )
-    def _set_logo_px(px):
         cfg = load_settings()
-        cfg["logo_px"] = int(px)
         save_settings(cfg)
-        return _render_header_html(int(px))
-    logo_px.change(_set_logo_px, inputs=[logo_px], outputs=[header_html])
-    # Initial instruction render on load (in case)
-    demo.load(lambda s,e,n: final_instruction(s or ["Character training (long)"], e or [], n),
-              inputs=[style_checks, extra_opts, name_input], outputs=[instruction_preview])
-    # ---- Shape Aliases (positioned with settings, BEFORE uploads)
     with gr.Accordion("Shape Aliases", open=False):
         gr.Markdown(
             "### 🔷 Shape Aliases\n"
@@ -806,11 +813,11 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                 input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
             run_button = gr.Button("Caption batch", variant="primary")
-    # ---- Results (UNCHANGED POSITION): Gallery left, Table right
     rows_state  = gr.State(load_session())
     autosave_md = gr.Markdown("Ready.")
-    progress_md = gr.Markdown("")
-    remaining_state = gr.State([])  # for manual step mode
     with gr.Row():
         with gr.Column(scale=1):
@@ -818,29 +825,27 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                 label="Results (image + caption)",
                 show_label=True,
                 columns=3,
-                height=520,
                 elem_id="cfGal",
                 elem_classes=["cf-scroll"]
             )
-        with gr.Column(scale=1):
             table = gr.Dataframe(
                 label="Editable captions (whole session)",
                 value=_rows_to_table(load_session()),
                 headers=["filename", "caption"],
                 interactive=True,
                 wrap=True,
-                elem_id="cfTable",
-                elem_classes=["cf-scroll"]
             )
-    # ---- Step panel (restored)
     step_panel = gr.Group(visible=False)
     with step_panel:
         step_msg    = gr.Markdown("")
         step_next   = gr.Button("Process next chunk")
         step_finish = gr.Button("Finish")
-    # ---- Exports (CSV / XLSX / TXT)
     with gr.Row():
         with gr.Column():
             export_csv_btn  = gr.Button("Export CSV")
@@ -852,52 +857,47 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
             export_txt_btn  = gr.Button("Export captions as .txt (zip)")
             txt_zip         = gr.File(label="TXT zip", visible=False)
-    # ---- Scroll sync (gallery ↔ table)
     gr.HTML("""
 <script>
 (function () {
-  function findGalleryScrollRoot() {
     const host = document.querySelector("#cfGal");
     if (!host) return null;
-    return host.querySelector(".grid") || host.querySelector("[data-testid='gallery']") || host;
   }
-  function findTableScrollRoot() {
-    const host = document.querySelector("#cfTable");
-    if (!host) return null;
-    return host.querySelector(".wrap") ||
-           host.querySelector(".dataframe-wrap") ||
-           (host.querySelector("table") ? host.querySelector("table").parentElement : null) ||
-           host;
   }
   function syncScroll(a, b) {
     if (!a || !b) return;
     let lock = false;
-    const onScrollA = () => { if (lock) return; lock = true; b.scrollTop = a.scrollTop; lock = false; };
-    const onScrollB = () => { if (lock) return; lock = true; a.scrollTop = b.scrollTop; lock = false; };
-    a.addEventListener("scroll", onScrollA, { passive: true });
-    b.addEventListener("scroll", onScrollB, { passive: true });
   }
   let tries = 0;
-  const timer = setInterval(() => {
     tries++;
-    const gal = findGalleryScrollRoot();
-    const tab = findTableScrollRoot();
     if (gal && tab) {
-      const H = Math.min(gal.clientHeight || 520, tab.clientHeight || 520);
       gal.style.maxHeight = H + "px";
       gal.style.overflowY = "auto";
       tab.style.maxHeight = H + "px";
       tab.style.overflowY = "auto";
       syncScroll(gal, tab);
-      clearInterval(timer);
     }
-    if (tries > 20) clearInterval(timer);
-  }, 100);
 })();
 </script>
 """)
-    # ---- Batch chunking logic
     def _split_chunks(files, csize: int):
         files = files or []
         c = max(1, int(csize))
@@ -910,8 +910,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
     def _run_click(files, rows, instr, ms, mode, csize, budget_s):
         t, p, m = _tpms()
         files = files or []
-        # Manual step → process first chunk only
         if mode == "Manual (step)" and files:
             chunks = _split_chunks(files, int(csize))
             batch = chunks[0]
@@ -925,7 +924,6 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
             prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
             return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
-        # Auto / All-at-once (still obey time budget)
         new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
             files, rows or [], instr, t, p, m, int(ms), float(budget_s)
         )
@@ -945,7 +943,6 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
         remain = remain or []
         if not remain:
             return rows, gr.update(value="No files remaining."), gr.update(visible=False), [], [], [], "Saved.", gr.update(value="")
         batch = remain[:int(csize)]
         leftover = remain[int(csize):]
         new_rows, gal, tbl, stamp, leftover_from_batch, done, total = run_batch(
@@ -956,7 +953,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
         msg = f"{len(leftover)} files remain. Process next chunk?" if leftover else "All done."
         prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(leftover)}"
         return new_rows, msg, panel_vis, leftover, gal, tbl, stamp, gr.update(value=prog)
     step_next.click(
         _step_next,
         inputs=[remaining_state, rows_state, instruction_preview, max_side, chunk_size, gpu_budget],
@@ -966,13 +963,9 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
     def _step_finish():
         return gr.update(visible=False), gr.update(value=""), []
-    step_finish.click(
-        _step_finish,
-        inputs=None,
-        outputs=[step_panel, step_msg, remaining_state]
-    )
-    # Table edits → persist + refresh gallery
     def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
         session_rows = _table_to_rows(table_value, session_rows or [])
         save_session(session_rows)
@@ -980,29 +973,25 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                          for r in session_rows if (r.get("thumb_path") or r.get("path"))]
         return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
-    table.change(
-        sync_table_to_session,
-        inputs=[table, rows_state],
-        outputs=[rows_state, gallery, autosave_md]
-    )
-    # Exports (CSV/XLSX/TXT)
     export_csv_btn.click(
-        lambda tbl: (export_csv_from_table(tbl), gr.update(visible=True)),
-        inputs=[table], outputs=[csv_file, csv_file]
     )
     export_xlsx_btn.click(
-        lambda tbl, rows, px: (export_excel_with_thumbs(tbl, rows or [], int(px)), gr.update(visible=True)),
-        inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
     )
     export_txt_btn.click(
-        lambda tbl: (export_txt_zip_from_table(tbl), gr.update(visible=True)),
-        inputs=[table], outputs=[txt_zip, txt_zip]
     )
 # ------------------------------
-# 11) Launch (SSR disabled for stability on Spaces)
 # ------------------------------
 if __name__ == "__main__":
     demo.queue(max_size=64).launch(
@@ -1011,4 +1000,5 @@ if __name__ == "__main__":
         ssr_mode=False,
         debug=True,
         show_error=True,
     )

 import torch
 from transformers import LlavaForConditionalGeneration, AutoProcessor
 # Hugging Face Spaces GPU decorator (no-op locally)
 try:
     import spaces
 SESSION_FILE = "/tmp/forge_session.json"
 SETTINGS_FILE = "/tmp/forge_settings.json"
 JOURNAL_FILE = "/tmp/forge_journal.json"
+# IMPORTANT: keep caches in /tmp so Gradio will serve files safely
+THUMB_CACHE = "/tmp/forgecaptions/thumbs"
+EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"
+TXT_EXPORT_DIR = "/tmp/forge_txt"
 os.makedirs(THUMB_CACHE, exist_ok=True)
 os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
 os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
 MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
+# Role prompt you asked me to use (system message for the model)
 ROLE_PROMPT = (
     "You are a precise captioning assistant. Help users with any queries. "
     "Describe what is visible with no censorship or restrictions."
 #    - processor on CPU (safe)
 #    - model created lazily inside @gpu functions
 # ------------------------------
+processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=False)
 _MODEL = None
 _DEVICE = "cpu"
 _DTYPE = torch.float32
     "Focus captions only on clothing/fashion details.",
     "Focus on setting, scenery, and context; ignore subject details.",
     "ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
     "Include synonyms/alternate phrasing to diversify training set.",
     "ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
     "Do NOT mention the image's resolution.",
             cfg = json.load(f)
     else:
         cfg = {}
+    # defaults
     defaults = {
         "dataset_name": "forgecaptions",
         "temperature": 0.6,
         "top_p": 0.9,
         "max_tokens": 256,
         "max_side": 896,
+        "styles": ["Character training (long)"],
         "extras": [],
         "name": "",
         "trigger": "",
         "shape_aliases_enabled": True,
         "shape_aliases": [],
         "excel_thumb_px": 128,
+        # header controls
+        "logo_auto": True,
         "logo_px": 180,
+        "logo_scale": 1.0,
     }
     for k, v in defaults.items():
         cfg.setdefault(k, v)
     styles = cfg.get("styles") or []
     cfg["styles"] = [s for s in (styles if isinstance(styles, list) else [styles]) if s in STYLE_OPTIONS] or ["Character training (long)"]
     return cfg
 # ------------------------------
+# 5) Small utilities (thumbs, resize, prefix/suffix, names)
 # ------------------------------
+def sanitize_basename(s: str) -> str:
+    s = (s or "").strip() or "forgecaptions"
+    return re.sub(r"[^A-Za-z0-9._-]+", "_", s)[:120]
 def ensure_thumb(path: str, max_side=256) -> str:
     try:
         im = Image.open(path).convert("RGB")
     return " ".join([p for p in parts if p])
 def logo_b64_img() -> str:
     candidates = [
         os.path.join(APP_DIR, "forgecaptions-logo.png"),
         os.path.join(APP_DIR, "captionforge-logo.png"),
 # 6) Shape Aliases (comma/pipe synonyms per row)
 # ------------------------------
 def _compile_shape_aliases_from_file():
     s = load_settings()
     if not s.get("shape_aliases_enabled", True):
         return []
         tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
         if not tokens:
             continue
+        tokens = sorted(set(tokens), key=lambda t: -len(t))
         pat = r"\b(?:" + "|".join(re.escape(t) for t in tokens) + r")(?:[-\s]?shaped)?\b"
         compiled.append((re.compile(pat, flags=re.I), name))
     return compiled
     start = time.time()
     leftover: List[str] = []
     for idx, path in enumerate(progress.tqdm(files, desc="Captioning")):
         try:
             im = Image.open(path).convert("RGB")
         session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
         processed += 1
         if time_budget_s and (time.time() - start) >= float(time_budget_s):
             leftover = files[idx+1:]
             break
         total,
     )
 @gpu
 @torch.no_grad()
 def _gpu_startup_warm():
 # ------------------------------
+# 9) Export helpers (CSV/XLSX/TXT ZIP)
 # ------------------------------
 def _rows_to_table(rows: List[dict]) -> list:
     return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
 def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
+    tbl = table_value or []
     new = []
     for i, r in enumerate(rows or []):
         r = dict(r)
         new.append(r)
     return new
+def export_csv_from_table(table_value: Any, dataset_name: str) -> str:
+    data = table_value or []
+    name = sanitize_basename(dataset_name)
+    out = f"/tmp/{name}_{int(time.time())}.csv"
     with open(out, "w", newline="", encoding="utf-8") as f:
         w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
     return out
     except Exception:
         return path
+def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int, dataset_name: str) -> str:
     try:
         from openpyxl import Workbook
         from openpyxl.drawing.image import Image as XLImage
         raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
     caption_by_file = {}
+    for row in (table_value or []):
         if not row:
             continue
         fn = str(row[0]) if len(row) > 0 else ""
     ws.column_dimensions["B"].width = 42
     ws.column_dimensions["C"].width = 100
     row_h = int(int(thumb_px) * 0.75)
     r_i = 2
     for r in (session_rows or []):
                 pass
         r_i += 1
+    name = sanitize_basename(dataset_name)
+    out = f"/tmp/{name}_{int(time.time())}.xlsx"
     wb.save(out)
     return out
+def export_txt_zip(table_value: Any, dataset_name: str) -> str:
+    """
+    Create one .txt per caption, zip them.
+    """
+    data = table_value or []
+    # wipe old
+    for fn in os.listdir(TXT_EXPORT_DIR):
         try:
+            os.remove(os.path.join(TXT_EXPORT_DIR, fn))
         except Exception:
             pass
+    used: Dict[str,int] = {}
     for row in data:
         if not row:
             continue
+        orig = (row[0] or "item").strip() if len(row) > 0 else "item"
+        stem = re.sub(r"\.[A-Za-z0-9]+$", "", orig)
+        stem = sanitize_basename(stem or "item")
+        if stem in used:
+            used[stem] += 1
+            stem = f"{stem}_{used[stem]}"
+        else:
+            used[stem] = 0
+        cap = (row[1] or "").strip() if len(row) > 1 and row[1] is not None else ""
         with open(os.path.join(TXT_EXPORT_DIR, f"{stem}.txt"), "w", encoding="utf-8") as f:
             f.write(cap)
+    name = sanitize_basename(dataset_name)
+    zpath = f"/tmp/{name}_{int(time.time())}_txt.zip"
+    with zipfile.ZipFile(zpath, "w", zipfile.ZIP_DEFLATED) as z:
+        for fn in os.listdir(TXT_EXPORT_DIR):
+            if fn.endswith(".txt"):
+                z.write(os.path.join(TXT_EXPORT_DIR, fn), arcname=fn)
+    return zpath
 # ------------------------------
+# 10) UI header helper (logo auto-fit)
 # ------------------------------
+def _render_header_html(auto: bool, px: int, scale: float) -> str:
+    auto_js = "true" if auto else "false"
     return f"""
 <div class="cf-hero">
   {logo_b64_img()}
 </div>
 <hr>
 <style>
+  .cf-logo {{ height: auto; width: auto; object-fit: contain; }}
 </style>
+<script>
+(function() {{
+  const AUTO = {auto_js};
+  const PX = {int(px)};
+  const SCALE = {float(scale)};
+  function fit() {{
+    const logo = document.querySelector(".cf-logo");
+    const text = document.querySelector(".cf-text");
+    if (!logo || !text) return;
+    if (AUTO) {{
+      const h = text.getBoundingClientRect().height || 180;
+      const target = Math.max(80, Math.min(420, Math.round(h * SCALE)));
+      logo.style.height = target + "px";
+    }} else {{
+      logo.style.height = Math.max(80, Math.min(420, PX)) + "px";
+    }}
+  }}
+  const textNode = document.querySelector(".cf-text");
+  if (window.ResizeObserver && textNode) {{
+    const ro = new ResizeObserver(fit);
+    ro.observe(textNode);
+  }}
+  window.addEventListener("resize", fit, {{ passive: true }});
+  setTimeout(fit, 0);
+}})();
+</script>
+"""
+# ------------------------------
+# 11) UI (Blocks)
+# ------------------------------
+BASE_CSS = """
+:root{--galleryW:50%;--tableW:50%;}
+.gradio-container{max-width:100%!important}
+/* Header */
+.cf-hero{display:flex; align-items:center; justify-content:center; gap:16px;
+  margin:4px 0 12px; text-align:center;}
+.cf-hero .cf-text{text-align:center;}
+.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
+.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
+/* Results area + robust scrollbars */
+.cf-scroll{border:1px solid #e6e6e6; border-radius:10px; padding:8px}
+#cfGal{max-height:520px; overflow-y:auto !important;}
+#cfTableWrap{max-height:520px; overflow-y:auto !important;}
+#cfGal [data-testid="gallery"]{height:auto !important;}
+#cfGal .grid > div { height: 96px; }
 """
 with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
     demo.load(_gpu_startup_warm, inputs=None, outputs=None)
+    # ---- Header
     settings = load_settings()
+    header_html = gr.HTML(_render_header_html(settings.get("logo_auto", True),
+                                              settings.get("logo_px", 180),
+                                              settings.get("logo_scale", 1.0)))
+    # ---- Controls group
     with gr.Group():
         with gr.Row():
+            # LEFT: styles / extras / name & prefix-suffix
             with gr.Column(scale=2):
                 with gr.Accordion("Caption style (choose one or combine)", open=True):
                     style_checks = gr.CheckboxGroup(
                     add_start  = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
                     add_end    = gr.Textbox(label="Add text to end", value=settings.get("end",""))
+            # RIGHT: instructions + dataset + general sliders + logo controls
             with gr.Column(scale=1):
                 with gr.Accordion("Model Instructions", open=False):
+                    instruction_preview = gr.Textbox(label=None, lines=12,
+                        value=final_instruction(settings.get("styles", ["Character training (long)"]),
+                                                settings.get("extras", []),
+                                                settings.get("name","")))
                 dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
                                           value=settings.get("dataset_name", "forgecaptions"))
                 max_side   = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
                 excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128),
                                            step=8, label="Excel thumbnail size (px)")
+                # Chunking
                 chunk_mode = gr.Radio(
                     choices=["Auto", "Manual (all at once)", "Manual (step)"],
+                    value="Manual (step)", label="Batch mode"
                 )
                 chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
                 gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
+                # Logo controls
+                logo_auto = gr.Checkbox(value=settings.get("logo_auto", True),
+                                        label="Auto-match logo height to text")
+                logo_px   = gr.Slider(80, 420, value=settings.get("logo_px", 180),
+                                      step=4, label="Logo height (px, if Auto off)")
+                logo_scale = gr.Slider(0.6, 1.6, value=settings.get("logo_scale", 1.0),
+                                       step=0.05, label="Logo scale × (if Auto on)")
+    # Persist instruction + general settings
+    def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
         instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
         cfg = load_settings()
         cfg.update({
             "trigger": trigv, "begin": begv, "end": endv,
             "excel_thumb_px": int(excel_px),
             "max_side": int(ms),
         })
         save_settings(cfg)
         return instr
+    for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side]:
+        comp.change(_refresh_instruction,
+                    inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side],
+                    outputs=[instruction_preview])
+    def _save_dataset_name(name):
         cfg = load_settings()
+        cfg["dataset_name"] = sanitize_basename(name)
         save_settings(cfg)
+        return gr.update()
+    dataset_name.change(_save_dataset_name, inputs=[dataset_name], outputs=[])
+    # Header controls live update
+    def _update_header(auto, px, scale):
+        cfg = load_settings()
+        cfg["logo_auto"] = bool(auto)
+        cfg["logo_px"] = int(px)
+        cfg["logo_scale"] = float(scale)
+        save_settings(cfg)
+        return _render_header_html(cfg["logo_auto"], cfg["logo_px"], cfg["logo_scale"])
+    logo_px.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
+    logo_auto.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
+    logo_scale.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
+    # ---- Shape Aliases block (placed WITH other settings, before uploads)
     with gr.Accordion("Shape Aliases", open=False):
         gr.Markdown(
             "### 🔷 Shape Aliases\n"
                 input_files = gr.File(label="Drop images", file_types=["image"], file_count="multiple", type="filepath")
             run_button = gr.Button("Caption batch", variant="primary")
+    # ---- Results area (gallery left / table right)
     rows_state  = gr.State(load_session())
     autosave_md = gr.Markdown("Ready.")
+    progress_md = gr.Markdown("")
+    remaining_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=1):
                 label="Results (image + caption)",
                 show_label=True,
                 columns=3,
                 elem_id="cfGal",
                 elem_classes=["cf-scroll"]
             )
+        with gr.Column(scale=1, elem_id="cfTableWrap", elem_classes=["cf-scroll"]):
             table = gr.Dataframe(
                 label="Editable captions (whole session)",
                 value=_rows_to_table(load_session()),
                 headers=["filename", "caption"],
                 interactive=True,
                 wrap=True,
+                elem_id="cfTable"
             )
+    # ---- Step panel
     step_panel = gr.Group(visible=False)
     with step_panel:
         step_msg    = gr.Markdown("")
         step_next   = gr.Button("Process next chunk")
         step_finish = gr.Button("Finish")
+    # ---- Exports
     with gr.Row():
         with gr.Column():
             export_csv_btn  = gr.Button("Export CSV")
             export_txt_btn  = gr.Button("Export captions as .txt (zip)")
             txt_zip         = gr.File(label="TXT zip", visible=False)
+    # ---- Robust scroll sync (works with Gradio v5 Gallery)
     gr.HTML("""
 <script>
 (function () {
+  function findGal() {
     const host = document.querySelector("#cfGal");
     if (!host) return null;
+    return host.querySelector('[data-testid="gallery"]') || host;
   }
+  function findTbl() {
+    return document.querySelector("#cfTableWrap");
   }
   function syncScroll(a, b) {
     if (!a || !b) return;
     let lock = false;
+    const onA = () => { if (lock) return; lock = true; b.scrollTop = a.scrollTop; lock = false; };
+    const onB = () => { if (lock) return; lock = true; a.scrollTop = b.scrollTop; lock = false; };
+    a.addEventListener("scroll", onA, { passive: true });
+    b.addEventListener("scroll", onB, { passive: true });
   }
   let tries = 0;
+  const t = setInterval(() => {
     tries++;
+    const gal = findGal();
+    const tab = findTbl();
     if (gal && tab) {
+      const H = Math.min(520, Math.max(360, tab.clientHeight || 520));
       gal.style.maxHeight = H + "px";
       gal.style.overflowY = "auto";
       tab.style.maxHeight = H + "px";
       tab.style.overflowY = "auto";
       syncScroll(gal, tab);
+      clearInterval(t);
     }
+    if (tries > 30) clearInterval(t);
+  }, 120);
 })();
 </script>
 """)
+    # ---- Chunking logic
     def _split_chunks(files, csize: int):
         files = files or []
         c = max(1, int(csize))
     def _run_click(files, rows, instr, ms, mode, csize, budget_s):
         t, p, m = _tpms()
         files = files or []
         if mode == "Manual (step)" and files:
             chunks = _split_chunks(files, int(csize))
             batch = chunks[0]
             prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(remaining)}"
             return new_rows, gal, tbl, stamp, remaining, panel_vis, gr.update(value=msg), gr.update(value=prog)
         new_rows, gal, tbl, stamp, leftover, done, total = run_batch(
             files, rows or [], instr, t, p, m, int(ms), float(budget_s)
         )
         remain = remain or []
         if not remain:
             return rows, gr.update(value="No files remaining."), gr.update(visible=False), [], [], [], "Saved.", gr.update(value="")
         batch = remain[:int(csize)]
         leftover = remain[int(csize):]
         new_rows, gal, tbl, stamp, leftover_from_batch, done, total = run_batch(
         msg = f"{len(leftover)} files remain. Process next chunk?" if leftover else "All done."
         prog = f"Batch progress: {done}/{total} processed in this step • Remaining overall: {len(leftover)}"
         return new_rows, msg, panel_vis, leftover, gal, tbl, stamp, gr.update(value=prog)
     step_next.click(
         _step_next,
         inputs=[remaining_state, rows_state, instruction_preview, max_side, chunk_size, gpu_budget],
     def _step_finish():
         return gr.update(visible=False), gr.update(value=""), []
+    step_finish.click(_step_finish, inputs=None, outputs=[step_panel, step_msg, remaining_state])
+    # ---- Table edits → persist + refresh gallery
     def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
         session_rows = _table_to_rows(table_value, session_rows or [])
         save_session(session_rows)
                          for r in session_rows if (r.get("thumb_path") or r.get("path"))]
         return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
+    table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
+    # ---- Exports
     export_csv_btn.click(
+        lambda tbl, ds: (export_csv_from_table(tbl, ds), gr.update(visible=True)),
+        inputs=[table, dataset_name], outputs=[csv_file, csv_file]
     )
     export_xlsx_btn.click(
+        lambda tbl, rows, px, ds: (export_excel_with_thumbs(tbl, rows or [], int(px), ds), gr.update(visible=True)),
+        inputs=[table, rows_state, excel_thumb_px, dataset_name], outputs=[xlsx_file, xlsx_file]
     )
     export_txt_btn.click(
+        lambda tbl, ds: (export_txt_zip(tbl, ds), gr.update(visible=True)),
+        inputs=[table, dataset_name], outputs=[txt_zip, txt_zip]
     )
 # ------------------------------
+# 12) Launch (SSR disabled for stability on Spaces)
 # ------------------------------
 if __name__ == "__main__":
     demo.queue(max_size=64).launch(
         ssr_mode=False,
         debug=True,
         show_error=True,
+        allowed_paths=[THUMB_CACHE, EXCEL_THUMB_DIR, TXT_EXPORT_DIR],  # serve /tmp caches safely
     )