Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
# ------------------------------
|
| 6 |
# 0) Imports & environment
|
| 7 |
# ------------------------------
|
| 8 |
-
import os, io, csv, time, json, base64, re
|
| 9 |
from typing import List, Tuple, Dict, Any
|
| 10 |
|
| 11 |
# Persist model caches between restarts
|
|
@@ -17,6 +17,12 @@ from PIL import Image
|
|
| 17 |
import torch
|
| 18 |
from transformers import LlavaForConditionalGeneration, AutoProcessor
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Hugging Face Spaces GPU decorator (no-op locally)
|
| 21 |
try:
|
| 22 |
import spaces
|
|
@@ -32,16 +38,19 @@ APP_DIR = os.getcwd()
|
|
| 32 |
SESSION_FILE = "/tmp/forge_session.json"
|
| 33 |
SETTINGS_FILE = "/tmp/forge_settings.json"
|
| 34 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 35 |
-
THUMB_CACHE =
|
| 36 |
-
EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs"
|
|
|
|
| 37 |
os.makedirs(THUMB_CACHE, exist_ok=True)
|
| 38 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
|
|
|
| 39 |
|
| 40 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 41 |
|
| 42 |
-
# Role prompt
|
| 43 |
ROLE_PROMPT = (
|
| 44 |
-
"You are a precise
|
|
|
|
| 45 |
)
|
| 46 |
|
| 47 |
print(f"[ForgeCaptions] Gradio version: {gr.__version__}")
|
|
@@ -93,7 +102,7 @@ def get_model():
|
|
| 93 |
STYLE_OPTIONS = [
|
| 94 |
"Descriptive (short)", "Descriptive (long)",
|
| 95 |
"Character training (short)", "Character training (long)",
|
| 96 |
-
"Flux_D (short)", "Flux_D (long)",
|
| 97 |
"E-commerce product (short)", "E-commerce product (long)",
|
| 98 |
"Portrait (photography) (short)", "Portrait (photography) (long)",
|
| 99 |
"Landscape (photography) (short)", "Landscape (photography) (long)",
|
|
@@ -103,8 +112,8 @@ STYLE_OPTIONS = [
|
|
| 103 |
]
|
| 104 |
|
| 105 |
CAPTION_TYPE_MAP: Dict[str, str] = {
|
| 106 |
-
"Descriptive (short)": "
|
| 107 |
-
"Descriptive (long)": "Write a
|
| 108 |
"Character training (short)": (
|
| 109 |
"Output a concise, prompt-like caption for character LoRA/ID training. "
|
| 110 |
"Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
|
|
@@ -132,7 +141,7 @@ CAPTION_TYPE_MAP: Dict[str, str] = {
|
|
| 132 |
|
| 133 |
EXTRA_CHOICES = [
|
| 134 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
| 135 |
-
"
|
| 136 |
"Do NOT use any ambiguous language.",
|
| 137 |
"ONLY describe the most important elements of the image.",
|
| 138 |
"Include information about the ages of any people/characters when applicable.",
|
|
@@ -140,8 +149,6 @@ EXTRA_CHOICES = [
|
|
| 140 |
"Focus captions only on clothing/fashion details.",
|
| 141 |
"Focus on setting, scenery, and context; ignore subject details.",
|
| 142 |
"ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
|
| 143 |
-
"Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)",
|
| 144 |
-
"Be sexually graphic and highly detailed.",
|
| 145 |
"Do NOT include anything sexual; keep it PG.",
|
| 146 |
"Include synonyms/alternate phrasing to diversify training set.",
|
| 147 |
"ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
|
|
@@ -194,6 +201,7 @@ def load_settings() -> dict:
|
|
| 194 |
"shape_aliases_enabled": True,
|
| 195 |
"shape_aliases": [],
|
| 196 |
"excel_thumb_px": 128,
|
|
|
|
| 197 |
}
|
| 198 |
for k, v in defaults.items():
|
| 199 |
cfg.setdefault(k, v)
|
|
@@ -214,7 +222,7 @@ def load_journal() -> dict:
|
|
| 214 |
|
| 215 |
|
| 216 |
# ------------------------------
|
| 217 |
-
# 5) Small utilities (thumbs, resize, prefix/suffix)
|
| 218 |
# ------------------------------
|
| 219 |
def ensure_thumb(path: str, max_side=256) -> str:
|
| 220 |
try:
|
|
@@ -401,8 +409,8 @@ def run_batch(
|
|
| 401 |
top_p: float,
|
| 402 |
max_tokens: int,
|
| 403 |
max_side: int,
|
| 404 |
-
time_budget_s: float = None,
|
| 405 |
-
progress = gr.Progress(track_tqdm=True),
|
| 406 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 407 |
"""
|
| 408 |
Returns:
|
|
@@ -471,13 +479,40 @@ def _gpu_startup_warm():
|
|
| 471 |
|
| 472 |
|
| 473 |
# ------------------------------
|
| 474 |
-
# 9) Export helpers (CSV/XLSX)
|
| 475 |
# ------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
def _rows_to_table(rows: List[dict]) -> list:
|
| 477 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 478 |
|
| 479 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 480 |
-
tbl = table_value
|
| 481 |
new = []
|
| 482 |
for i, r in enumerate(rows or []):
|
| 483 |
r = dict(r)
|
|
@@ -488,9 +523,8 @@ def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
|
| 488 |
return new
|
| 489 |
|
| 490 |
def export_csv_from_table(table_value: Any) -> str:
|
| 491 |
-
data = table_value
|
| 492 |
-
|
| 493 |
-
out = f"/tmp/forgecaptions_{int(time.time())}.csv"
|
| 494 |
with open(out, "w", newline="", encoding="utf-8") as f:
|
| 495 |
w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
|
| 496 |
return out
|
|
@@ -513,8 +547,6 @@ def _resize_for_excel(path: str, px: int) -> str:
|
|
| 513 |
return path
|
| 514 |
|
| 515 |
def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int) -> str:
|
| 516 |
-
ds = load_settings().get("dataset_name", "forgecaptions")
|
| 517 |
-
out = f"/tmp/{ds}_{int(time.time())}.xlsx"
|
| 518 |
try:
|
| 519 |
from openpyxl import Workbook
|
| 520 |
from openpyxl.drawing.image import Image as XLImage
|
|
@@ -522,7 +554,7 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
|
|
| 522 |
raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
|
| 523 |
|
| 524 |
caption_by_file = {}
|
| 525 |
-
for row in (table_value
|
| 526 |
if not row:
|
| 527 |
continue
|
| 528 |
fn = str(row[0]) if len(row) > 0 else ""
|
|
@@ -554,10 +586,40 @@ def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_p
|
|
| 554 |
pass
|
| 555 |
r_i += 1
|
| 556 |
|
| 557 |
-
out =
|
| 558 |
wb.save(out)
|
| 559 |
return out
|
| 560 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
# ------------------------------
|
| 563 |
# 10) UI (Blocks)
|
|
@@ -571,9 +633,7 @@ BASE_CSS = """
|
|
| 571 |
}
|
| 572 |
.cf-hero .cf-text{ text-align:center; }
|
| 573 |
.cf-logo{
|
| 574 |
-
|
| 575 |
-
height: clamp(120px, calc(3.25rem + 3 * 1.1rem + 24px), 180px);
|
| 576 |
-
width:auto; object-fit:contain; display:block; flex:0 0 auto;
|
| 577 |
}
|
| 578 |
.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
|
| 579 |
.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
|
|
@@ -583,12 +643,8 @@ BASE_CSS = """
|
|
| 583 |
#cfGal .grid > div { height: 96px; }
|
| 584 |
"""
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 589 |
-
|
| 590 |
-
# ---- Header (logo + title center). Script sets logo height to match text exactly.
|
| 591 |
-
gr.HTML(value=f"""
|
| 592 |
<div class="cf-hero">
|
| 593 |
{logo_b64_img()}
|
| 594 |
<div class="cf-text">
|
|
@@ -599,22 +655,25 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
|
| 599 |
</div>
|
| 600 |
</div>
|
| 601 |
<hr>
|
| 602 |
-
<
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
|
| 611 |
-
#
|
| 612 |
settings = load_settings()
|
| 613 |
|
|
|
|
|
|
|
|
|
|
| 614 |
# ---- Controls group (left/right columns)
|
| 615 |
with gr.Group():
|
| 616 |
with gr.Row():
|
| 617 |
-
# LEFT: Style + Extra + Name/Prefix/Suffix
|
| 618 |
with gr.Column(scale=2):
|
| 619 |
with gr.Accordion("Caption style (choose one or combine)", open=True):
|
| 620 |
style_checks = gr.CheckboxGroup(
|
|
@@ -638,32 +697,31 @@ setTimeout(() => {{
|
|
| 638 |
with gr.Column(scale=1):
|
| 639 |
with gr.Accordion("Model Instructions", open=False):
|
| 640 |
instruction_preview = gr.Textbox(
|
| 641 |
-
label=None,
|
| 642 |
-
lines=12,
|
| 643 |
value=final_instruction(
|
| 644 |
settings.get("styles", ["Character training (long)"]),
|
| 645 |
settings.get("extras", []),
|
| 646 |
settings.get("name", ""),
|
| 647 |
-
)
|
| 648 |
)
|
| 649 |
-
dataset_name = gr.Textbox(
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
)
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
# Chunking controls
|
| 657 |
chunk_mode = gr.Radio(
|
| 658 |
choices=["Auto", "Manual (all at once)", "Manual (step)"],
|
| 659 |
value="Manual (step)",
|
| 660 |
-
label="Batch mode"
|
| 661 |
)
|
| 662 |
chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
|
| 663 |
gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
|
| 664 |
|
| 665 |
-
#
|
| 666 |
-
def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
|
| 667 |
instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
|
| 668 |
cfg = load_settings()
|
| 669 |
cfg.update({
|
|
@@ -673,27 +731,31 @@ setTimeout(() => {{
|
|
| 673 |
"trigger": trigv, "begin": begv, "end": endv,
|
| 674 |
"excel_thumb_px": int(excel_px),
|
| 675 |
"max_side": int(ms),
|
|
|
|
| 676 |
})
|
| 677 |
save_settings(cfg)
|
| 678 |
return instr
|
| 679 |
-
|
| 680 |
-
for comp in
|
| 681 |
-
style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side
|
| 682 |
-
):
|
| 683 |
comp.change(
|
| 684 |
_refresh_instruction,
|
| 685 |
-
inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side],
|
| 686 |
-
outputs=[instruction_preview]
|
| 687 |
)
|
| 688 |
-
|
| 689 |
-
# Also set the initial instruction on load (optional but nice)
|
| 690 |
-
demo.load(
|
| 691 |
-
lambda s, e, n: final_instruction(s or ["Character training (long)"], e or [], n),
|
| 692 |
-
inputs=[style_checks, extra_opts, name_input],
|
| 693 |
-
outputs=[instruction_preview],
|
| 694 |
-
)
|
| 695 |
|
| 696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
with gr.Accordion("Shape Aliases", open=False):
|
| 698 |
gr.Markdown(
|
| 699 |
"### 🔷 Shape Aliases\n"
|
|
@@ -726,7 +788,7 @@ setTimeout(() => {{
|
|
| 726 |
add_row_btn.click(_add_row, inputs=[alias_table], outputs=[alias_table])
|
| 727 |
clear_btn.click(_clear_rows, outputs=[alias_table])
|
| 728 |
save_btn.click(save_shape_alias_rows, inputs=[enable_aliases, alias_table], outputs=[save_status, alias_table])
|
| 729 |
-
|
| 730 |
# ---- Tabs: Single & Batch
|
| 731 |
with gr.Tabs():
|
| 732 |
with gr.Tab("Single"):
|
|
@@ -778,7 +840,7 @@ setTimeout(() => {{
|
|
| 778 |
step_next = gr.Button("Process next chunk")
|
| 779 |
step_finish = gr.Button("Finish")
|
| 780 |
|
| 781 |
-
# ---- Exports
|
| 782 |
with gr.Row():
|
| 783 |
with gr.Column():
|
| 784 |
export_csv_btn = gr.Button("Export CSV")
|
|
@@ -786,6 +848,9 @@ setTimeout(() => {{
|
|
| 786 |
with gr.Column():
|
| 787 |
export_xlsx_btn = gr.Button("Export Excel (.xlsx) with thumbnails")
|
| 788 |
xlsx_file = gr.File(label="Excel file", visible=False)
|
|
|
|
|
|
|
|
|
|
| 789 |
|
| 790 |
# ---- Scroll sync (gallery ↔ table)
|
| 791 |
gr.HTML("""
|
|
@@ -832,7 +897,7 @@ setTimeout(() => {{
|
|
| 832 |
</script>
|
| 833 |
""")
|
| 834 |
|
| 835 |
-
# ---- Batch chunking logic
|
| 836 |
def _split_chunks(files, csize: int):
|
| 837 |
files = files or []
|
| 838 |
c = max(1, int(csize))
|
|
@@ -907,7 +972,7 @@ setTimeout(() => {{
|
|
| 907 |
outputs=[step_panel, step_msg, remaining_state]
|
| 908 |
)
|
| 909 |
|
| 910 |
-
#
|
| 911 |
def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
|
| 912 |
session_rows = _table_to_rows(table_value, session_rows or [])
|
| 913 |
save_session(session_rows)
|
|
@@ -921,7 +986,7 @@ setTimeout(() => {{
|
|
| 921 |
outputs=[rows_state, gallery, autosave_md]
|
| 922 |
)
|
| 923 |
|
| 924 |
-
#
|
| 925 |
export_csv_btn.click(
|
| 926 |
lambda tbl: (export_csv_from_table(tbl), gr.update(visible=True)),
|
| 927 |
inputs=[table], outputs=[csv_file, csv_file]
|
|
@@ -930,6 +995,10 @@ setTimeout(() => {{
|
|
| 930 |
lambda tbl, rows, px: (export_excel_with_thumbs(tbl, rows or [], int(px)), gr.update(visible=True)),
|
| 931 |
inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
|
| 932 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 933 |
|
| 934 |
|
| 935 |
# ------------------------------
|
|
|
|
| 5 |
# ------------------------------
|
| 6 |
# 0) Imports & environment
|
| 7 |
# ------------------------------
|
| 8 |
+
import os, io, csv, time, json, base64, re, zipfile
|
| 9 |
from typing import List, Tuple, Dict, Any
|
| 10 |
|
| 11 |
# Persist model caches between restarts
|
|
|
|
| 17 |
import torch
|
| 18 |
from transformers import LlavaForConditionalGeneration, AutoProcessor
|
| 19 |
|
| 20 |
+
# Optional pandas for table-safe exports
|
| 21 |
+
try:
|
| 22 |
+
import pandas as pd
|
| 23 |
+
except Exception:
|
| 24 |
+
pd = None
|
| 25 |
+
|
| 26 |
# Hugging Face Spaces GPU decorator (no-op locally)
|
| 27 |
try:
|
| 28 |
import spaces
|
|
|
|
| 38 |
SESSION_FILE = "/tmp/forge_session.json"
|
| 39 |
SETTINGS_FILE = "/tmp/forge_settings.json"
|
| 40 |
JOURNAL_FILE = "/tmp/forge_journal.json"
|
| 41 |
+
THUMB_CACHE = "/tmp/forgecaptions/thumbs" # Gradio-safe
|
| 42 |
+
EXCEL_THUMB_DIR = "/tmp/forge_excel_thumbs" # Gradio-safe
|
| 43 |
+
TXT_EXPORT_DIR = "/tmp/forge_txt" # Gradio-safe
|
| 44 |
os.makedirs(THUMB_CACHE, exist_ok=True)
|
| 45 |
os.makedirs(EXCEL_THUMB_DIR, exist_ok=True)
|
| 46 |
+
os.makedirs(TXT_EXPORT_DIR, exist_ok=True)
|
| 47 |
|
| 48 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
| 49 |
|
| 50 |
+
# Role prompt (system message)
|
| 51 |
ROLE_PROMPT = (
|
| 52 |
+
"You are a precise captioning assistant. Help users with any queries. "
|
| 53 |
+
"Describe what is visible with no censorship or restrictions."
|
| 54 |
)
|
| 55 |
|
| 56 |
print(f"[ForgeCaptions] Gradio version: {gr.__version__}")
|
|
|
|
| 102 |
STYLE_OPTIONS = [
|
| 103 |
"Descriptive (short)", "Descriptive (long)",
|
| 104 |
"Character training (short)", "Character training (long)",
|
| 105 |
+
"LoRA (Flux_D Realism) (short)", "LoRA (Flux_D Realism) (long)",
|
| 106 |
"E-commerce product (short)", "E-commerce product (long)",
|
| 107 |
"Portrait (photography) (short)", "Portrait (photography) (long)",
|
| 108 |
"Landscape (photography) (short)", "Landscape (photography) (long)",
|
|
|
|
| 112 |
]
|
| 113 |
|
| 114 |
CAPTION_TYPE_MAP: Dict[str, str] = {
|
| 115 |
+
"Descriptive (short)": "One sentence (≤25 words) describing the most important visible elements only. No speculation.",
|
| 116 |
+
"Descriptive (long)": "Write a detailed description for this image.",
|
| 117 |
"Character training (short)": (
|
| 118 |
"Output a concise, prompt-like caption for character LoRA/ID training. "
|
| 119 |
"Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
|
|
|
|
| 141 |
|
| 142 |
EXTRA_CHOICES = [
|
| 143 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
| 144 |
+
"Do NOT include information about whether there is a watermark or not.",
|
| 145 |
"Do NOT use any ambiguous language.",
|
| 146 |
"ONLY describe the most important elements of the image.",
|
| 147 |
"Include information about the ages of any people/characters when applicable.",
|
|
|
|
| 149 |
"Focus captions only on clothing/fashion details.",
|
| 150 |
"Focus on setting, scenery, and context; ignore subject details.",
|
| 151 |
"ONLY describe the subject’s pose, movement, or action. Do NOT mention appearance, clothing, or setting.",
|
|
|
|
|
|
|
| 152 |
"Do NOT include anything sexual; keep it PG.",
|
| 153 |
"Include synonyms/alternate phrasing to diversify training set.",
|
| 154 |
"ALWAYS arrange caption elements in the order → Subject, Clothing/Accessories, Action/Pose, Setting/Environment, Lighting/Camera/Style.",
|
|
|
|
| 201 |
"shape_aliases_enabled": True,
|
| 202 |
"shape_aliases": [],
|
| 203 |
"excel_thumb_px": 128,
|
| 204 |
+
"logo_px": 180,
|
| 205 |
}
|
| 206 |
for k, v in defaults.items():
|
| 207 |
cfg.setdefault(k, v)
|
|
|
|
| 222 |
|
| 223 |
|
| 224 |
# ------------------------------
|
| 225 |
+
# 5) Small utilities (thumbs, resize, prefix/suffix, logo)
|
| 226 |
# ------------------------------
|
| 227 |
def ensure_thumb(path: str, max_side=256) -> str:
|
| 228 |
try:
|
|
|
|
| 409 |
top_p: float,
|
| 410 |
max_tokens: int,
|
| 411 |
max_side: int,
|
| 412 |
+
time_budget_s: float | None = None, # respects Zero-GPU window
|
| 413 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True), # drives the progress bar
|
| 414 |
) -> Tuple[List[dict], list, list, str, List[str], int, int]:
|
| 415 |
"""
|
| 416 |
Returns:
|
|
|
|
| 479 |
|
| 480 |
|
| 481 |
# ------------------------------
|
| 482 |
+
# 9) Export helpers (CSV/XLSX/TXT)
|
| 483 |
# ------------------------------
|
| 484 |
+
def _as_table_list(value: Any) -> list:
|
| 485 |
+
"""Return a plain list-of-lists regardless of whether Gradio gave us a list or pandas DF."""
|
| 486 |
+
if value is None:
|
| 487 |
+
return []
|
| 488 |
+
if pd is not None:
|
| 489 |
+
if isinstance(value, pd.DataFrame):
|
| 490 |
+
return value.reset_index(drop=True).values.tolist()
|
| 491 |
+
if isinstance(value, pd.Series):
|
| 492 |
+
return [[x] for x in value.to_list()]
|
| 493 |
+
if isinstance(value, list):
|
| 494 |
+
return value
|
| 495 |
+
try:
|
| 496 |
+
return list(value)
|
| 497 |
+
except Exception:
|
| 498 |
+
return []
|
| 499 |
+
|
| 500 |
+
def _sanitize_basename(s: str) -> str:
|
| 501 |
+
s = (s or "").strip() or "forgecaptions"
|
| 502 |
+
return re.sub(r"[^A-Za-z0-9._-]+", "_", s)[:120]
|
| 503 |
+
|
| 504 |
+
def _ts() -> str:
|
| 505 |
+
return time.strftime("%Y%m%d_%H%M%S")
|
| 506 |
+
|
| 507 |
+
def _export_prefix() -> str:
|
| 508 |
+
base = _sanitize_basename(load_settings().get("dataset_name", "forgecaptions"))
|
| 509 |
+
return f"/tmp/{base}_{_ts()}"
|
| 510 |
+
|
| 511 |
def _rows_to_table(rows: List[dict]) -> list:
|
| 512 |
return [[r.get("filename",""), r.get("caption","")] for r in (rows or [])]
|
| 513 |
|
| 514 |
def _table_to_rows(table_value: Any, rows: List[dict]) -> List[dict]:
|
| 515 |
+
tbl = _as_table_list(table_value)
|
| 516 |
new = []
|
| 517 |
for i, r in enumerate(rows or []):
|
| 518 |
r = dict(r)
|
|
|
|
| 523 |
return new
|
| 524 |
|
| 525 |
def export_csv_from_table(table_value: Any) -> str:
|
| 526 |
+
data = _as_table_list(table_value)
|
| 527 |
+
out = _export_prefix() + ".csv"
|
|
|
|
| 528 |
with open(out, "w", newline="", encoding="utf-8") as f:
|
| 529 |
w = csv.writer(f); w.writerow(["filename", "caption"]); w.writerows(data)
|
| 530 |
return out
|
|
|
|
| 547 |
return path
|
| 548 |
|
| 549 |
def export_excel_with_thumbs(table_value: Any, session_rows: List[dict], thumb_px: int) -> str:
|
|
|
|
|
|
|
| 550 |
try:
|
| 551 |
from openpyxl import Workbook
|
| 552 |
from openpyxl.drawing.image import Image as XLImage
|
|
|
|
| 554 |
raise RuntimeError("Excel export requires 'openpyxl' in requirements.txt.") from e
|
| 555 |
|
| 556 |
caption_by_file = {}
|
| 557 |
+
for row in _as_table_list(table_value):
|
| 558 |
if not row:
|
| 559 |
continue
|
| 560 |
fn = str(row[0]) if len(row) > 0 else ""
|
|
|
|
| 586 |
pass
|
| 587 |
r_i += 1
|
| 588 |
|
| 589 |
+
out = _export_prefix() + ".xlsx"
|
| 590 |
wb.save(out)
|
| 591 |
return out
|
| 592 |
|
| 593 |
+
def export_txt_zip_from_table(table_value: Any) -> str:
|
| 594 |
+
"""Create one .txt per row (filename-based) and zip them."""
|
| 595 |
+
data = _as_table_list(table_value)
|
| 596 |
+
# Clear previous txt staging
|
| 597 |
+
for name in os.listdir(TXT_EXPORT_DIR):
|
| 598 |
+
try:
|
| 599 |
+
os.remove(os.path.join(TXT_EXPORT_DIR, name))
|
| 600 |
+
except Exception:
|
| 601 |
+
pass
|
| 602 |
+
|
| 603 |
+
used = {}
|
| 604 |
+
for row in data:
|
| 605 |
+
if not row:
|
| 606 |
+
continue
|
| 607 |
+
fn = str(row[0]) if len(row) > 0 and row[0] is not None else "image"
|
| 608 |
+
cap = str(row[1]) if len(row) > 1 and row[1] is not None else ""
|
| 609 |
+
stem = _sanitize_basename(re.sub(r"\.[A-Za-z0-9]+$", "", fn))
|
| 610 |
+
n = used.get(stem, 0); used[stem] = n + 1
|
| 611 |
+
if n > 0:
|
| 612 |
+
stem = f"{stem}_{n}"
|
| 613 |
+
with open(os.path.join(TXT_EXPORT_DIR, f"{stem}.txt"), "w", encoding="utf-8") as f:
|
| 614 |
+
f.write(cap)
|
| 615 |
+
|
| 616 |
+
zip_path = _export_prefix() + "_txt.zip"
|
| 617 |
+
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
|
| 618 |
+
for name in os.listdir(TXT_EXPORT_DIR):
|
| 619 |
+
if name.endswith(".txt"):
|
| 620 |
+
z.write(os.path.join(TXT_EXPORT_DIR, name), arcname=name)
|
| 621 |
+
return zip_path
|
| 622 |
+
|
| 623 |
|
| 624 |
# ------------------------------
|
| 625 |
# 10) UI (Blocks)
|
|
|
|
| 633 |
}
|
| 634 |
.cf-hero .cf-text{ text-align:center; }
|
| 635 |
.cf-logo{
|
| 636 |
+
height: auto; width:auto; object-fit:contain; display:block; flex:0 0 auto;
|
|
|
|
|
|
|
| 637 |
}
|
| 638 |
.cf-title{margin:0;font-size:3.25rem;line-height:1;letter-spacing:.2px}
|
| 639 |
.cf-sub{margin:6px 0 0;font-size:1.1rem;color:#cfd3da}
|
|
|
|
| 643 |
#cfGal .grid > div { height: 96px; }
|
| 644 |
"""
|
| 645 |
|
| 646 |
+
def _render_header_html(logo_px: int) -> str:
|
| 647 |
+
return f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
<div class="cf-hero">
|
| 649 |
{logo_b64_img()}
|
| 650 |
<div class="cf-text">
|
|
|
|
| 655 |
</div>
|
| 656 |
</div>
|
| 657 |
<hr>
|
| 658 |
+
<style>
|
| 659 |
+
.cf-logo {{ height: {int(logo_px)}px; width: auto; object-fit: contain; }}
|
| 660 |
+
</style>
|
| 661 |
+
"""
|
| 662 |
+
|
| 663 |
+
with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
|
| 664 |
+
# Ensure Spaces sees a GPU function (without touching CUDA in main)
|
| 665 |
+
demo.load(_gpu_startup_warm, inputs=None, outputs=None)
|
| 666 |
|
| 667 |
+
# Settings
|
| 668 |
settings = load_settings()
|
| 669 |
|
| 670 |
+
# Header (live size)
|
| 671 |
+
header_html = gr.HTML(_render_header_html(settings.get("logo_px", 180)))
|
| 672 |
+
|
| 673 |
# ---- Controls group (left/right columns)
|
| 674 |
with gr.Group():
|
| 675 |
with gr.Row():
|
| 676 |
+
# LEFT: Style + Extra + Name/Prefix/Suffix
|
| 677 |
with gr.Column(scale=2):
|
| 678 |
with gr.Accordion("Caption style (choose one or combine)", open=True):
|
| 679 |
style_checks = gr.CheckboxGroup(
|
|
|
|
| 697 |
with gr.Column(scale=1):
|
| 698 |
with gr.Accordion("Model Instructions", open=False):
|
| 699 |
instruction_preview = gr.Textbox(
|
| 700 |
+
label=None, lines=12,
|
|
|
|
| 701 |
value=final_instruction(
|
| 702 |
settings.get("styles", ["Character training (long)"]),
|
| 703 |
settings.get("extras", []),
|
| 704 |
settings.get("name", ""),
|
| 705 |
+
)
|
| 706 |
)
|
| 707 |
+
dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
|
| 708 |
+
value=settings.get("dataset_name", "forgecaptions"))
|
| 709 |
+
max_side = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
|
| 710 |
+
excel_thumb_px = gr.Slider(64, 256, value=settings.get("excel_thumb_px", 128),
|
| 711 |
+
step=8, label="Excel thumbnail size (px)")
|
| 712 |
+
logo_px = gr.Slider(80, 400, value=settings.get("logo_px", 180),
|
| 713 |
+
step=10, label="Logo height (px)")
|
| 714 |
+
# Chunking controls
|
| 715 |
chunk_mode = gr.Radio(
|
| 716 |
choices=["Auto", "Manual (all at once)", "Manual (step)"],
|
| 717 |
value="Manual (step)",
|
| 718 |
+
label="Batch mode"
|
| 719 |
)
|
| 720 |
chunk_size = gr.Slider(1, 50, value=10, step=1, label="Chunk size")
|
| 721 |
gpu_budget = gr.Slider(20, 110, value=55, step=5, label="Max seconds per GPU call")
|
| 722 |
|
| 723 |
+
# Keep instruction text in sync + persist settings
|
| 724 |
+
def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms, dsn):
|
| 725 |
instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
|
| 726 |
cfg = load_settings()
|
| 727 |
cfg.update({
|
|
|
|
| 731 |
"trigger": trigv, "begin": begv, "end": endv,
|
| 732 |
"excel_thumb_px": int(excel_px),
|
| 733 |
"max_side": int(ms),
|
| 734 |
+
"dataset_name": dsn or "forgecaptions",
|
| 735 |
})
|
| 736 |
save_settings(cfg)
|
| 737 |
return instr
|
| 738 |
+
|
| 739 |
+
for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, dataset_name]:
|
|
|
|
|
|
|
| 740 |
comp.change(
|
| 741 |
_refresh_instruction,
|
| 742 |
+
inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, dataset_name],
|
| 743 |
+
outputs=[instruction_preview]
|
| 744 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
|
| 746 |
+
def _set_logo_px(px):
|
| 747 |
+
cfg = load_settings()
|
| 748 |
+
cfg["logo_px"] = int(px)
|
| 749 |
+
save_settings(cfg)
|
| 750 |
+
return _render_header_html(int(px))
|
| 751 |
+
|
| 752 |
+
logo_px.change(_set_logo_px, inputs=[logo_px], outputs=[header_html])
|
| 753 |
+
|
| 754 |
+
# Initial instruction render on load (in case)
|
| 755 |
+
demo.load(lambda s,e,n: final_instruction(s or ["Character training (long)"], e or [], n),
|
| 756 |
+
inputs=[style_checks, extra_opts, name_input], outputs=[instruction_preview])
|
| 757 |
+
|
| 758 |
+
# ---- Shape Aliases (positioned with settings, BEFORE uploads)
|
| 759 |
with gr.Accordion("Shape Aliases", open=False):
|
| 760 |
gr.Markdown(
|
| 761 |
"### 🔷 Shape Aliases\n"
|
|
|
|
| 788 |
add_row_btn.click(_add_row, inputs=[alias_table], outputs=[alias_table])
|
| 789 |
clear_btn.click(_clear_rows, outputs=[alias_table])
|
| 790 |
save_btn.click(save_shape_alias_rows, inputs=[enable_aliases, alias_table], outputs=[save_status, alias_table])
|
| 791 |
+
|
| 792 |
# ---- Tabs: Single & Batch
|
| 793 |
with gr.Tabs():
|
| 794 |
with gr.Tab("Single"):
|
|
|
|
| 840 |
step_next = gr.Button("Process next chunk")
|
| 841 |
step_finish = gr.Button("Finish")
|
| 842 |
|
| 843 |
+
# ---- Exports (CSV / XLSX / TXT)
|
| 844 |
with gr.Row():
|
| 845 |
with gr.Column():
|
| 846 |
export_csv_btn = gr.Button("Export CSV")
|
|
|
|
| 848 |
with gr.Column():
|
| 849 |
export_xlsx_btn = gr.Button("Export Excel (.xlsx) with thumbnails")
|
| 850 |
xlsx_file = gr.File(label="Excel file", visible=False)
|
| 851 |
+
with gr.Column():
|
| 852 |
+
export_txt_btn = gr.Button("Export captions as .txt (zip)")
|
| 853 |
+
txt_zip = gr.File(label="TXT zip", visible=False)
|
| 854 |
|
| 855 |
# ---- Scroll sync (gallery ↔ table)
|
| 856 |
gr.HTML("""
|
|
|
|
| 897 |
</script>
|
| 898 |
""")
|
| 899 |
|
| 900 |
+
# ---- Batch chunking logic
|
| 901 |
def _split_chunks(files, csize: int):
|
| 902 |
files = files or []
|
| 903 |
c = max(1, int(csize))
|
|
|
|
| 972 |
outputs=[step_panel, step_msg, remaining_state]
|
| 973 |
)
|
| 974 |
|
| 975 |
+
# Table edits → persist + refresh gallery
|
| 976 |
def sync_table_to_session(table_value: Any, session_rows: List[dict]) -> Tuple[List[dict], list, str]:
|
| 977 |
session_rows = _table_to_rows(table_value, session_rows or [])
|
| 978 |
save_session(session_rows)
|
|
|
|
| 986 |
outputs=[rows_state, gallery, autosave_md]
|
| 987 |
)
|
| 988 |
|
| 989 |
+
# Exports (CSV/XLSX/TXT)
|
| 990 |
export_csv_btn.click(
|
| 991 |
lambda tbl: (export_csv_from_table(tbl), gr.update(visible=True)),
|
| 992 |
inputs=[table], outputs=[csv_file, csv_file]
|
|
|
|
| 995 |
lambda tbl, rows, px: (export_excel_with_thumbs(tbl, rows or [], int(px)), gr.update(visible=True)),
|
| 996 |
inputs=[table, rows_state, excel_thumb_px], outputs=[xlsx_file, xlsx_file]
|
| 997 |
)
|
| 998 |
+
export_txt_btn.click(
|
| 999 |
+
lambda tbl: (export_txt_zip_from_table(tbl), gr.update(visible=True)),
|
| 1000 |
+
inputs=[table], outputs=[txt_zip, txt_zip]
|
| 1001 |
+
)
|
| 1002 |
|
| 1003 |
|
| 1004 |
# ------------------------------
|