Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ from transformers import LlavaForConditionalGeneration, AutoProcessor
|
|
| 19 |
|
| 20 |
# Optional deps for import/export (we handle gracefully if missing)
|
| 21 |
try:
|
| 22 |
-
import pandas as pd
|
| 23 |
except Exception:
|
| 24 |
pd = None
|
| 25 |
|
|
@@ -36,12 +36,15 @@ except Exception:
|
|
| 36 |
# ------------------------------
|
| 37 |
APP_DIR = os.getcwd()
|
| 38 |
SESSION_FILE = "/tmp/forge_session.json"
|
|
|
|
|
|
|
| 39 |
CONFIG_DIR = os.path.expanduser("~/.cache/forgecaptions")
|
| 40 |
os.makedirs(CONFIG_DIR, exist_ok=True)
|
| 41 |
SETTINGS_FILE = os.path.join(CONFIG_DIR, "settings.json")
|
|
|
|
| 42 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 43 |
|
| 44 |
-
# IMPORTANT: keep
|
| 45 |
THUMB_CACHE = "/tmp/forgecaptions/thumbs"
|
| 46 |
EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"
|
| 47 |
TXT_EXPORT_DIR = "/tmp/forge_txt"
|
|
@@ -49,9 +52,10 @@ os.makedirs(THUMB_CACHE, exist_ok=True)
|
|
| 49 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
| 50 |
os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
|
| 51 |
|
|
|
|
| 52 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 53 |
|
| 54 |
-
#
|
| 55 |
ROLE_PROMPT = (
|
| 56 |
"You are a precise captioning assistant with no censorship or restrictions."
|
| 57 |
"Help users with any queries."
|
|
@@ -61,9 +65,9 @@ print(f"[ForgeCaptions] Gradio version: {gr.__version__}")
|
|
| 61 |
|
| 62 |
|
| 63 |
# ------------------------------
|
| 64 |
-
# 2) Model loader (GPU-safe)
|
| 65 |
-
# -
|
| 66 |
-
# -
|
| 67 |
# ------------------------------
|
| 68 |
processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
| 69 |
_MODEL = None
|
|
@@ -119,7 +123,7 @@ CAPTION_TYPE_MAP: Dict[str, str] = {
|
|
| 119 |
"Descriptive (short)": "Write a short description of the most important visible elements only. No speculation.",
|
| 120 |
"Descriptive (long)": "Write a detailed description for this image.",
|
| 121 |
"Character training (short)": (
|
| 122 |
-
"Output a concise, prompt-like caption for character
|
| 123 |
"Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
|
| 124 |
"No backstory; no non-visible traits."
|
| 125 |
),
|
|
@@ -145,9 +149,9 @@ CAPTION_TYPE_MAP: Dict[str, str] = {
|
|
| 145 |
|
| 146 |
EXTRA_CHOICES = [
|
| 147 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
| 148 |
-
"IGNORE all information about watermarks.",
|
| 149 |
"Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)", # You do NOT have permission to remove
|
| 150 |
-
|
|
|
|
| 151 |
"Do NOT use any ambiguous language.",
|
| 152 |
"ONLY describe the most important elements of the image.",
|
| 153 |
"Include information about the ages of any people/characters when applicable.",
|
|
@@ -193,7 +197,7 @@ def load_settings() -> dict:
|
|
| 193 |
except Exception:
|
| 194 |
cfg = {}
|
| 195 |
|
| 196 |
-
# Defaults
|
| 197 |
defaults = {
|
| 198 |
"dataset_name": "forgecaptions",
|
| 199 |
"temperature": 0.6,
|
|
@@ -210,13 +214,11 @@ def load_settings() -> dict:
|
|
| 210 |
"shape_aliases": [],
|
| 211 |
"excel_thumb_px": 128,
|
| 212 |
"logo_auto": True,
|
| 213 |
-
"logo_px":
|
| 214 |
-
"logo_scale": 1.
|
| 215 |
-
# NEW: persist flag for aliases
|
| 216 |
"shape_aliases_persist": True,
|
| 217 |
}
|
| 218 |
|
| 219 |
-
# Merge defaults without overwriting existing values
|
| 220 |
for k, v in defaults.items():
|
| 221 |
cfg.setdefault(k, v)
|
| 222 |
|
|
@@ -228,7 +230,6 @@ def load_settings() -> dict:
|
|
| 228 |
|
| 229 |
return cfg
|
| 230 |
|
| 231 |
-
|
| 232 |
def save_journal(data: dict):
|
| 233 |
with open(JOURNAL_FILE, "w", encoding="utf-8") as f:
|
| 234 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
@@ -296,36 +297,32 @@ def logo_b64_img() -> str:
|
|
| 296 |
return f"<img src='data:image/png;base64,{b64}' alt='ForgeCaptions' class='cf-logo'>"
|
| 297 |
return ""
|
| 298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
def _plural_token_regex(tok: str) -> str:
|
| 300 |
"""
|
| 301 |
Build a regex for a token that also matches simple English plurals.
|
| 302 |
Rules:
|
| 303 |
-
-
|
| 304 |
-
-
|
| 305 |
-
- default →
|
| 306 |
-
This is case-insensitive at compile time.
|
| 307 |
"""
|
| 308 |
t = (tok or "").strip()
|
| 309 |
-
if not t:
|
| 310 |
-
return ""
|
| 311 |
t_low = t.lower()
|
| 312 |
-
# consonant + y → y|ies
|
| 313 |
if re.search(r"[^aeiou]y$", t_low):
|
| 314 |
return re.escape(t[:-1]) + r"(?:y|ies)"
|
| 315 |
-
# s, x, z, ch, sh → +es
|
| 316 |
if re.search(r"(?:s|x|z|ch|sh)$", t_low):
|
| 317 |
return re.escape(t) + r"(?:es)?"
|
| 318 |
-
# default → +s
|
| 319 |
return re.escape(t) + r"s?"
|
| 320 |
|
| 321 |
-
# ------------------------------
|
| 322 |
-
# 6) Shape Aliases (comma/pipe synonyms per row)
|
| 323 |
-
# ------------------------------
|
| 324 |
def _compile_shape_aliases_from_file():
|
| 325 |
"""
|
| 326 |
Build regex list from settings["shape_aliases"].
|
| 327 |
Left cell accepts comma OR pipe separated synonyms (multi-word OK).
|
| 328 |
-
Matches are case-insensitive,
|
| 329 |
"""
|
| 330 |
s = load_settings()
|
| 331 |
if not s.get("shape_aliases_enabled", True):
|
|
@@ -339,17 +336,14 @@ def _compile_shape_aliases_from_file():
|
|
| 339 |
tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 340 |
if not tokens:
|
| 341 |
continue
|
| 342 |
-
# Build plural-aware alternation for all synonyms
|
| 343 |
alts = [_plural_token_regex(t) for t in tokens]
|
| 344 |
alts = [a for a in alts if a]
|
| 345 |
if not alts:
|
| 346 |
continue
|
| 347 |
-
# word-boundaries + optional "-shaped"/" shaped"
|
| 348 |
pat = r"\b(?:" + "|".join(alts) + r")(?:[-\s]?shaped)?\b"
|
| 349 |
compiled.append((re.compile(pat, flags=re.I), name))
|
| 350 |
return compiled
|
| 351 |
|
| 352 |
-
|
| 353 |
_SHAPE_ALIASES = _compile_shape_aliases_from_file()
|
| 354 |
def _refresh_shape_aliases_cache():
|
| 355 |
global _SHAPE_ALIASES
|
|
@@ -394,14 +388,9 @@ def save_shape_alias_rows(enabled, df_rows, persist):
|
|
| 394 |
# Recompile in-memory, regardless of persist
|
| 395 |
global _SHAPE_ALIASES
|
| 396 |
if bool(enabled):
|
| 397 |
-
# Build from the just-edited rows
|
| 398 |
-
tokens = []
|
| 399 |
-
compiled = []
|
| 400 |
-
# Emulate compiler using cleaned rows directly
|
| 401 |
compiled = []
|
| 402 |
for item in cleaned:
|
| 403 |
-
raw = item["shape"]
|
| 404 |
-
name = item["name"]
|
| 405 |
toks = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 406 |
alts = [_plural_token_regex(t) for t in toks]
|
| 407 |
alts = [a for a in alts if a]
|
|
@@ -414,11 +403,7 @@ def save_shape_alias_rows(enabled, df_rows, persist):
|
|
| 414 |
_SHAPE_ALIASES = []
|
| 415 |
|
| 416 |
normalized = [[it["shape"], it["name"]] for it in cleaned] + [["", ""]]
|
| 417 |
-
return (
|
| 418 |
-
status,
|
| 419 |
-
gr.update(value=normalized, row_count=(max(1, len(normalized)), "dynamic"))
|
| 420 |
-
)
|
| 421 |
-
|
| 422 |
|
| 423 |
|
| 424 |
# ------------------------------
|
|
@@ -490,7 +475,7 @@ def run_batch(
|
|
| 490 |
top_p: float,
|
| 491 |
max_tokens: int,
|
| 492 |
max_side: int,
|
| 493 |
-
time_budget_s: float | None = None, # respects Zero-GPU window
|
| 494 |
progress: gr.Progress = gr.Progress(track_tqdm=True), # drives the progress bar
|
| 495 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 496 |
"""
|
|
@@ -527,7 +512,7 @@ def run_batch(
|
|
| 527 |
session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
|
| 528 |
processed += 1
|
| 529 |
|
| 530 |
-
if time_budget_s is not None and (time.time() - start) >= float(time_budget_s):
|
| 531 |
leftover = files[idx+1:]
|
| 532 |
break
|
| 533 |
|
|
@@ -563,7 +548,7 @@ def _rows_to_table(rows: List[dict]) -> list:
|
|
| 563 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 564 |
|
| 565 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 566 |
-
#
|
| 567 |
tbl = table_value or []
|
| 568 |
new = []
|
| 569 |
for i, r in enumerate(rows or []):
|
|
@@ -686,7 +671,6 @@ def import_captions_file(file_path: str, session_rows: List[dict]) -> Tuple[List
|
|
| 686 |
- Otherwise append a new row (without image path/thumbnail)
|
| 687 |
"""
|
| 688 |
if not file_path or not os.path.exists(file_path):
|
| 689 |
-
# No change
|
| 690 |
table_rows = _rows_to_table(session_rows)
|
| 691 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 692 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
|
@@ -700,7 +684,6 @@ def import_captions_file(file_path: str, session_rows: List[dict]) -> Tuple[List
|
|
| 700 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 701 |
reader = csv.reader(f)
|
| 702 |
rows = list(reader)
|
| 703 |
-
# Skip header if present
|
| 704 |
if rows and len(rows[0]) >= 2 and str(rows[0][0]).lower().strip() == "filename":
|
| 705 |
rows = rows[1:]
|
| 706 |
for r in rows:
|
|
@@ -731,7 +714,6 @@ def import_captions_file(file_path: str, session_rows: List[dict]) -> Tuple[List
|
|
| 731 |
else:
|
| 732 |
return session_rows, _rows_to_table(session_rows), _rows_to_table(session_rows), f"Unsupported file type: {ext}"
|
| 733 |
except Exception as e:
|
| 734 |
-
# Best-effort graceful fallback
|
| 735 |
table_rows = _rows_to_table(session_rows)
|
| 736 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 737 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
|
@@ -757,7 +739,7 @@ def import_captions_file(file_path: str, session_rows: List[dict]) -> Tuple[List
|
|
| 757 |
|
| 758 |
|
| 759 |
# ------------------------------
|
| 760 |
-
# 10) UI header helper (logo auto-fit)
|
| 761 |
# ------------------------------
|
| 762 |
def _render_header_html(auto: bool, px: int, scale: float) -> str:
|
| 763 |
auto_js = "true" if auto else "false"
|
|
@@ -786,10 +768,10 @@ def _render_header_html(auto: bool, px: int, scale: float) -> str:
|
|
| 786 |
if (!logo || !text) return;
|
| 787 |
if (AUTO) {{
|
| 788 |
const h = text.getBoundingClientRect().height || 200;
|
| 789 |
-
const target = Math.max(80, Math.min(
|
| 790 |
logo.style.height = target + "px";
|
| 791 |
}} else {{
|
| 792 |
-
logo.style.height = Math.max(80, Math.min(
|
| 793 |
}}
|
| 794 |
}}
|
| 795 |
const textNode = document.querySelector(".cf-text");
|
|
@@ -827,6 +809,7 @@ BASE_CSS = """
|
|
| 827 |
"""
|
| 828 |
|
| 829 |
with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
|
|
| 830 |
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 831 |
|
| 832 |
# ---- Header
|
|
@@ -882,7 +865,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 882 |
# Logo controls
|
| 883 |
logo_auto = gr.Checkbox(value=settings.get("logo_auto", True),
|
| 884 |
label="Auto-match logo height to text")
|
| 885 |
-
logo_px = gr.Slider(80,
|
| 886 |
step=4, label="Logo height (px, if Auto off)")
|
| 887 |
logo_scale = gr.Slider(0.7, 1.6, value=settings.get("logo_scale", 1.10),
|
| 888 |
step=0.02, label="Logo scale × (if Auto on)")
|
|
@@ -912,7 +895,6 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 912 |
cfg["dataset_name"] = sanitize_basename(name)
|
| 913 |
save_settings(cfg)
|
| 914 |
return gr.update()
|
| 915 |
-
|
| 916 |
dataset_name.change(_save_dataset_name, inputs=[dataset_name], outputs=[])
|
| 917 |
|
| 918 |
# Header controls live update
|
|
@@ -928,68 +910,66 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 928 |
logo_auto.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 929 |
logo_scale.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 930 |
|
| 931 |
-
# ---- Shape Aliases
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
"and `*-shaped` or `* shaped` variants."
|
| 943 |
-
)
|
| 944 |
-
init_rows, init_enabled = get_shape_alias_rows_ui_defaults()
|
| 945 |
-
enable_aliases = gr.Checkbox(label="Enable shape alias replacements", value=init_enabled)
|
| 946 |
-
persist_aliases = gr.Checkbox(
|
| 947 |
-
label="Save aliases across sessions",
|
| 948 |
-
value=load_settings().get("shape_aliases_persist", True)
|
| 949 |
-
)
|
| 950 |
-
alias_table = gr.Dataframe(
|
| 951 |
-
headers=["shape (token or synonyms)", "name to insert"],
|
| 952 |
-
value=init_rows,
|
| 953 |
-
row_count=(max(1, len(init_rows)), "dynamic"),
|
| 954 |
-
datatype=["str","str"],
|
| 955 |
-
type="array",
|
| 956 |
-
interactive=True
|
| 957 |
-
)
|
| 958 |
-
|
| 959 |
-
with gr.Row():
|
| 960 |
-
add_row_btn = gr.Button("+ Add row", variant="secondary")
|
| 961 |
-
clear_btn = gr.Button("Clear", variant="secondary")
|
| 962 |
-
save_btn = gr.Button("💾 Save", variant="primary")
|
| 963 |
-
|
| 964 |
-
# status line for saves
|
| 965 |
-
save_status = gr.Markdown("")
|
| 966 |
-
|
| 967 |
-
def _add_row(cur):
|
| 968 |
-
cur = (cur or []) + [["", ""]]
|
| 969 |
-
return gr.update(value=cur, row_count=(max(1, len(cur)), "dynamic"))
|
| 970 |
-
|
| 971 |
-
def _clear_rows():
|
| 972 |
-
return gr.update(value=[["", ""]], row_count=(1, "dynamic"))
|
| 973 |
-
|
| 974 |
-
add_row_btn.click(_add_row, inputs=[alias_table], outputs=[alias_table])
|
| 975 |
-
clear_btn.click(_clear_rows, outputs=[alias_table])
|
| 976 |
-
|
| 977 |
-
# Persist the "persist" toggle itself
|
| 978 |
-
def _save_alias_persist_flag(v):
|
| 979 |
-
cfg = load_settings()
|
| 980 |
-
cfg["shape_aliases_persist"] = bool(v)
|
| 981 |
-
save_settings(cfg)
|
| 982 |
-
return gr.update()
|
| 983 |
|
| 984 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
|
|
|
|
|
|
| 992 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 993 |
|
| 994 |
# ---- Tabs: Single & Batch
|
| 995 |
with gr.Tabs():
|
|
@@ -1009,7 +989,7 @@ with gr.Accordion("Shape Aliases", open=False):
|
|
| 1009 |
run_button = gr.Button("Caption batch", variant="primary")
|
| 1010 |
|
| 1011 |
with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
|
| 1012 |
-
import_file = gr.File(label="Choose .csv or .xlsx", file_types=["
|
| 1013 |
import_btn = gr.Button("Import into current session")
|
| 1014 |
|
| 1015 |
# ---- Results area (gallery left / table right)
|
|
@@ -1034,7 +1014,7 @@ with gr.Accordion("Shape Aliases", open=False):
|
|
| 1034 |
headers=["filename", "caption"],
|
| 1035 |
interactive=True,
|
| 1036 |
wrap=True,
|
| 1037 |
-
type="array", #
|
| 1038 |
elem_id="cfTable"
|
| 1039 |
)
|
| 1040 |
|
|
@@ -1186,19 +1166,13 @@ with gr.Accordion("Shape Aliases", open=False):
|
|
| 1186 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 1187 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
| 1188 |
return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
|
| 1189 |
-
|
| 1190 |
table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
|
| 1191 |
|
| 1192 |
# ---- Import hook
|
| 1193 |
def _do_import(fpath, rows):
|
| 1194 |
new_rows, gal, tbl, stamp = import_captions_file(fpath, rows or [])
|
| 1195 |
return new_rows, gal, tbl, stamp
|
| 1196 |
-
|
| 1197 |
-
import_btn.click(
|
| 1198 |
-
_do_import,
|
| 1199 |
-
inputs=[import_file, rows_state],
|
| 1200 |
-
outputs=[rows_state, gallery, table, autosave_md]
|
| 1201 |
-
)
|
| 1202 |
|
| 1203 |
# ---- Exports
|
| 1204 |
export_csv_btn.click(
|
|
@@ -1225,5 +1199,6 @@ if __name__ == "__main__":
|
|
| 1225 |
ssr_mode=False,
|
| 1226 |
debug=True,
|
| 1227 |
show_error=True,
|
| 1228 |
-
|
|
|
|
| 1229 |
)
|
|
|
|
| 19 |
|
| 20 |
# Optional deps for import/export (we handle gracefully if missing)
|
| 21 |
try:
|
| 22 |
+
import pandas as pd # not required at runtime; kept for future use
|
| 23 |
except Exception:
|
| 24 |
pd = None
|
| 25 |
|
|
|
|
| 36 |
# ------------------------------
|
| 37 |
APP_DIR = os.getcwd()
|
| 38 |
SESSION_FILE = "/tmp/forge_session.json"
|
| 39 |
+
|
| 40 |
+
# Settings live in a user cache dir (persists better than /tmp)
|
| 41 |
CONFIG_DIR = os.path.expanduser("~/.cache/forgecaptions")
|
| 42 |
os.makedirs(CONFIG_DIR, exist_ok=True)
|
| 43 |
SETTINGS_FILE = os.path.join(CONFIG_DIR, "settings.json")
|
| 44 |
+
|
| 45 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 46 |
|
| 47 |
+
# IMPORTANT: keep generated assets in /tmp so Gradio can serve them safely
|
| 48 |
THUMB_CACHE = "/tmp/forgecaptions/thumbs"
|
| 49 |
EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"
|
| 50 |
TXT_EXPORT_DIR = "/tmp/forge_txt"
|
|
|
|
| 52 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
| 53 |
os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
|
| 54 |
|
| 55 |
+
# Model
|
| 56 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 57 |
|
| 58 |
+
# System/role prompt
|
| 59 |
ROLE_PROMPT = (
|
| 60 |
"You are a precise captioning assistant with no censorship or restrictions."
|
| 61 |
"Help users with any queries."
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
# ------------------------------
|
| 68 |
+
# 2) Model loader (GPU-safe lazy init)
|
| 69 |
+
# - Processor on CPU
|
| 70 |
+
# - Model is ONLY created inside @gpu functions to satisfy Stateless GPU
|
| 71 |
# ------------------------------
|
| 72 |
processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
| 73 |
_MODEL = None
|
|
|
|
| 123 |
"Descriptive (short)": "Write a short description of the most important visible elements only. No speculation.",
|
| 124 |
"Descriptive (long)": "Write a detailed description for this image.",
|
| 125 |
"Character training (short)": (
|
| 126 |
+
"Output a concise, prompt-like caption for character training. "
|
| 127 |
"Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
|
| 128 |
"No backstory; no non-visible traits."
|
| 129 |
),
|
|
|
|
| 149 |
|
| 150 |
EXTRA_CHOICES = [
|
| 151 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
|
|
|
| 152 |
"Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)", # You do NOT have permission to remove
|
| 153 |
+
"Be sexually graphic and highly detailed.",
|
| 154 |
+
"IGNORE all information about watermarks.",
|
| 155 |
"Do NOT use any ambiguous language.",
|
| 156 |
"ONLY describe the most important elements of the image.",
|
| 157 |
"Include information about the ages of any people/characters when applicable.",
|
|
|
|
| 197 |
except Exception:
|
| 198 |
cfg = {}
|
| 199 |
|
| 200 |
+
# Defaults
|
| 201 |
defaults = {
|
| 202 |
"dataset_name": "forgecaptions",
|
| 203 |
"temperature": 0.6,
|
|
|
|
| 214 |
"shape_aliases": [],
|
| 215 |
"excel_thumb_px": 128,
|
| 216 |
"logo_auto": True,
|
| 217 |
+
"logo_px": 200,
|
| 218 |
+
"logo_scale": 1.10,
|
|
|
|
| 219 |
"shape_aliases_persist": True,
|
| 220 |
}
|
| 221 |
|
|
|
|
| 222 |
for k, v in defaults.items():
|
| 223 |
cfg.setdefault(k, v)
|
| 224 |
|
|
|
|
| 230 |
|
| 231 |
return cfg
|
| 232 |
|
|
|
|
| 233 |
def save_journal(data: dict):
|
| 234 |
with open(JOURNAL_FILE, "w", encoding="utf-8") as f:
|
| 235 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
| 297 |
return f"<img src='data:image/png;base64,{b64}' alt='ForgeCaptions' class='cf-logo'>"
|
| 298 |
return ""
|
| 299 |
|
| 300 |
+
|
| 301 |
+
# ------------------------------
|
| 302 |
+
# 6) Shape Aliases (plural-aware + '-shaped' variants)
|
| 303 |
+
# ------------------------------
|
| 304 |
def _plural_token_regex(tok: str) -> str:
|
| 305 |
"""
|
| 306 |
Build a regex for a token that also matches simple English plurals.
|
| 307 |
Rules:
|
| 308 |
+
- endswith s/x/z/ch/sh → add '(?:es)?'
|
| 309 |
+
- consonant + y → '(?:y|ies)'
|
| 310 |
+
- default → 's?'
|
|
|
|
| 311 |
"""
|
| 312 |
t = (tok or "").strip()
|
| 313 |
+
if not t: return ""
|
|
|
|
| 314 |
t_low = t.lower()
|
|
|
|
| 315 |
if re.search(r"[^aeiou]y$", t_low):
|
| 316 |
return re.escape(t[:-1]) + r"(?:y|ies)"
|
|
|
|
| 317 |
if re.search(r"(?:s|x|z|ch|sh)$", t_low):
|
| 318 |
return re.escape(t) + r"(?:es)?"
|
|
|
|
| 319 |
return re.escape(t) + r"s?"
|
| 320 |
|
|
|
|
|
|
|
|
|
|
| 321 |
def _compile_shape_aliases_from_file():
|
| 322 |
"""
|
| 323 |
Build regex list from settings["shape_aliases"].
|
| 324 |
Left cell accepts comma OR pipe separated synonyms (multi-word OK).
|
| 325 |
+
Matches are case-insensitive, catches simple plurals, and allows '-shaped' or ' shaped'.
|
| 326 |
"""
|
| 327 |
s = load_settings()
|
| 328 |
if not s.get("shape_aliases_enabled", True):
|
|
|
|
| 336 |
tokens = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 337 |
if not tokens:
|
| 338 |
continue
|
|
|
|
| 339 |
alts = [_plural_token_regex(t) for t in tokens]
|
| 340 |
alts = [a for a in alts if a]
|
| 341 |
if not alts:
|
| 342 |
continue
|
|
|
|
| 343 |
pat = r"\b(?:" + "|".join(alts) + r")(?:[-\s]?shaped)?\b"
|
| 344 |
compiled.append((re.compile(pat, flags=re.I), name))
|
| 345 |
return compiled
|
| 346 |
|
|
|
|
| 347 |
_SHAPE_ALIASES = _compile_shape_aliases_from_file()
|
| 348 |
def _refresh_shape_aliases_cache():
|
| 349 |
global _SHAPE_ALIASES
|
|
|
|
| 388 |
# Recompile in-memory, regardless of persist
|
| 389 |
global _SHAPE_ALIASES
|
| 390 |
if bool(enabled):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
compiled = []
|
| 392 |
for item in cleaned:
|
| 393 |
+
raw = item["shape"]; name = item["name"]
|
|
|
|
| 394 |
toks = [t.strip() for t in re.split(r"[|,]", raw) if t.strip()]
|
| 395 |
alts = [_plural_token_regex(t) for t in toks]
|
| 396 |
alts = [a for a in alts if a]
|
|
|
|
| 403 |
_SHAPE_ALIASES = []
|
| 404 |
|
| 405 |
normalized = [[it["shape"], it["name"]] for it in cleaned] + [["", ""]]
|
| 406 |
+
return status, gr.update(value=normalized, row_count=(max(1, len(normalized)), "dynamic"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
|
| 409 |
# ------------------------------
|
|
|
|
| 475 |
top_p: float,
|
| 476 |
max_tokens: int,
|
| 477 |
max_side: int,
|
| 478 |
+
time_budget_s: float | None = None, # respects Zero-GPU window (None = unlimited)
|
| 479 |
progress: gr.Progress = gr.Progress(track_tqdm=True), # drives the progress bar
|
| 480 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 481 |
"""
|
|
|
|
| 512 |
session_rows.append({"filename": filename, "caption": cap, "path": path, "thumb_path": thumb})
|
| 513 |
processed += 1
|
| 514 |
|
| 515 |
+
if (time_budget_s is not None) and ((time.time() - start) >= float(time_budget_s)):
|
| 516 |
leftover = files[idx+1:]
|
| 517 |
break
|
| 518 |
|
|
|
|
| 548 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 549 |
|
| 550 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 551 |
+
# Expect list-of-lists (Dataframe type="array")
|
| 552 |
tbl = table_value or []
|
| 553 |
new = []
|
| 554 |
for i, r in enumerate(rows or []):
|
|
|
|
| 671 |
- Otherwise append a new row (without image path/thumbnail)
|
| 672 |
"""
|
| 673 |
if not file_path or not os.path.exists(file_path):
|
|
|
|
| 674 |
table_rows = _rows_to_table(session_rows)
|
| 675 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 676 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
|
|
|
| 684 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 685 |
reader = csv.reader(f)
|
| 686 |
rows = list(reader)
|
|
|
|
| 687 |
if rows and len(rows[0]) >= 2 and str(rows[0][0]).lower().strip() == "filename":
|
| 688 |
rows = rows[1:]
|
| 689 |
for r in rows:
|
|
|
|
| 714 |
else:
|
| 715 |
return session_rows, _rows_to_table(session_rows), _rows_to_table(session_rows), f"Unsupported file type: {ext}"
|
| 716 |
except Exception as e:
|
|
|
|
| 717 |
table_rows = _rows_to_table(session_rows)
|
| 718 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 719 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
|
|
|
| 739 |
|
| 740 |
|
| 741 |
# ------------------------------
|
| 742 |
+
# 10) UI header helper (logo auto-fit to match title/subtitle block)
|
| 743 |
# ------------------------------
|
| 744 |
def _render_header_html(auto: bool, px: int, scale: float) -> str:
|
| 745 |
auto_js = "true" if auto else "false"
|
|
|
|
| 768 |
if (!logo || !text) return;
|
| 769 |
if (AUTO) {{
|
| 770 |
const h = text.getBoundingClientRect().height || 200;
|
| 771 |
+
const target = Math.max(80, Math.min(520, Math.round(h * SCALE)));
|
| 772 |
logo.style.height = target + "px";
|
| 773 |
}} else {{
|
| 774 |
+
logo.style.height = Math.max(80, Math.min(520, PX)) + "px";
|
| 775 |
}}
|
| 776 |
}}
|
| 777 |
const textNode = document.querySelector(".cf-text");
|
|
|
|
| 809 |
"""
|
| 810 |
|
| 811 |
with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
| 812 |
+
# Ensure Spaces sees a GPU function (without touching CUDA in main)
|
| 813 |
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 814 |
|
| 815 |
# ---- Header
|
|
|
|
| 865 |
# Logo controls
|
| 866 |
logo_auto = gr.Checkbox(value=settings.get("logo_auto", True),
|
| 867 |
label="Auto-match logo height to text")
|
| 868 |
+
logo_px = gr.Slider(80, 520, value=settings.get("logo_px", 200),
|
| 869 |
step=4, label="Logo height (px, if Auto off)")
|
| 870 |
logo_scale = gr.Slider(0.7, 1.6, value=settings.get("logo_scale", 1.10),
|
| 871 |
step=0.02, label="Logo scale × (if Auto on)")
|
|
|
|
| 895 |
cfg["dataset_name"] = sanitize_basename(name)
|
| 896 |
save_settings(cfg)
|
| 897 |
return gr.update()
|
|
|
|
| 898 |
dataset_name.change(_save_dataset_name, inputs=[dataset_name], outputs=[])
|
| 899 |
|
| 900 |
# Header controls live update
|
|
|
|
| 910 |
logo_auto.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 911 |
logo_scale.change(_update_header, inputs=[logo_auto, logo_px, logo_scale], outputs=[header_html])
|
| 912 |
|
| 913 |
+
# ---- Shape Aliases (with plural matching + persist) ----
|
| 914 |
+
with gr.Accordion("Shape Aliases", open=False):
|
| 915 |
+
gr.Markdown(
|
| 916 |
+
"### 🔷 Shape Aliases\n"
|
| 917 |
+
"Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
|
| 918 |
+
"**How to use:**\n"
|
| 919 |
+
"- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
|
| 920 |
+
"- Right column = replacement name, e.g. `starkey-emblem`\n"
|
| 921 |
+
"Matches are case-insensitive, catches simple plurals (`box`→`boxes`, `lady`→`ladies`), "
|
| 922 |
+
"and also matches `*-shaped` or `* shaped` variants."
|
| 923 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 924 |
|
| 925 |
+
init_rows, init_enabled = get_shape_alias_rows_ui_defaults()
|
| 926 |
+
enable_aliases = gr.Checkbox(label="Enable shape alias replacements", value=init_enabled)
|
| 927 |
+
persist_aliases = gr.Checkbox(
|
| 928 |
+
label="Save aliases across sessions",
|
| 929 |
+
value=load_settings().get("shape_aliases_persist", True)
|
| 930 |
+
)
|
| 931 |
|
| 932 |
+
alias_table = gr.Dataframe(
|
| 933 |
+
headers=["shape (token or synonyms)", "name to insert"],
|
| 934 |
+
value=init_rows,
|
| 935 |
+
row_count=(max(1, len(init_rows)), "dynamic"),
|
| 936 |
+
datatype=["str","str"],
|
| 937 |
+
type="array",
|
| 938 |
+
interactive=True
|
| 939 |
+
)
|
| 940 |
|
| 941 |
+
with gr.Row():
|
| 942 |
+
add_row_btn = gr.Button("+ Add row", variant="secondary")
|
| 943 |
+
clear_btn = gr.Button("Clear", variant="secondary")
|
| 944 |
+
save_btn = gr.Button("💾 Save", variant="primary")
|
| 945 |
+
|
| 946 |
+
# status line for saves
|
| 947 |
+
save_status = gr.Markdown("")
|
| 948 |
+
|
| 949 |
+
# --- local handlers (must stay inside Blocks context) ---
|
| 950 |
+
def _add_row(cur):
|
| 951 |
+
cur = (cur or []) + [["", ""]]
|
| 952 |
+
return gr.update(value=cur, row_count=(max(1, len(cur)), "dynamic"))
|
| 953 |
+
|
| 954 |
+
def _clear_rows():
|
| 955 |
+
return gr.update(value=[["", ""]], row_count=(1, "dynamic"))
|
| 956 |
+
|
| 957 |
+
add_row_btn.click(_add_row, inputs=[alias_table], outputs=[alias_table])
|
| 958 |
+
clear_btn.click(_clear_rows, outputs=[alias_table])
|
| 959 |
+
|
| 960 |
+
def _save_alias_persist_flag(v):
|
| 961 |
+
cfg = load_settings()
|
| 962 |
+
cfg["shape_aliases_persist"] = bool(v)
|
| 963 |
+
save_settings(cfg)
|
| 964 |
+
return gr.update()
|
| 965 |
+
persist_aliases.change(_save_alias_persist_flag, inputs=[persist_aliases], outputs=[])
|
| 966 |
+
|
| 967 |
+
# Persist rows if persist_aliases checked; otherwise apply in-memory only
|
| 968 |
+
save_btn.click(
|
| 969 |
+
save_shape_alias_rows,
|
| 970 |
+
inputs=[enable_aliases, alias_table, persist_aliases],
|
| 971 |
+
outputs=[save_status, alias_table]
|
| 972 |
+
)
|
| 973 |
|
| 974 |
# ---- Tabs: Single & Batch
|
| 975 |
with gr.Tabs():
|
|
|
|
| 989 |
run_button = gr.Button("Caption batch", variant="primary")
|
| 990 |
|
| 991 |
with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
|
| 992 |
+
import_file = gr.File(label="Choose .csv or .xlsx", file_types=["file"], type="filepath")
|
| 993 |
import_btn = gr.Button("Import into current session")
|
| 994 |
|
| 995 |
# ---- Results area (gallery left / table right)
|
|
|
|
| 1014 |
headers=["filename", "caption"],
|
| 1015 |
interactive=True,
|
| 1016 |
wrap=True,
|
| 1017 |
+
type="array", # prevents pandas truth ambiguity
|
| 1018 |
elem_id="cfTable"
|
| 1019 |
)
|
| 1020 |
|
|
|
|
| 1166 |
gallery_pairs = [((r.get("thumb_path") or r.get("path")), r.get("caption",""))
|
| 1167 |
for r in session_rows if (r.get("thumb_path") or r.get("path"))]
|
| 1168 |
return session_rows, gallery_pairs, f"Saved • {time.strftime('%H:%M:%S')}"
|
|
|
|
| 1169 |
table.change(sync_table_to_session, inputs=[table, rows_state], outputs=[rows_state, gallery, autosave_md])
|
| 1170 |
|
| 1171 |
# ---- Import hook
|
| 1172 |
def _do_import(fpath, rows):
|
| 1173 |
new_rows, gal, tbl, stamp = import_captions_file(fpath, rows or [])
|
| 1174 |
return new_rows, gal, tbl, stamp
|
| 1175 |
+
import_btn.click(_do_import, inputs=[import_file, rows_state], outputs=[rows_state, gallery, table, autosave_md])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1176 |
|
| 1177 |
# ---- Exports
|
| 1178 |
export_csv_btn.click(
|
|
|
|
| 1199 |
ssr_mode=False,
|
| 1200 |
debug=True,
|
| 1201 |
show_error=True,
|
| 1202 |
+
# Allow Gradio to serve generated files from /tmp caches
|
| 1203 |
+
allowed_paths=[THUMB_CACHE, EXCEL_THUMB_DIR, TXT_EXPORT_DIR],
|
| 1204 |
)
|