Spaces:

JS6969
/

ForgeCaptions

Sleeping

App Files Files Community

JS6969 commited on Sep 4

Commit

8313e74

verified ·

1 Parent(s): 1cf5cd2

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -48

app.py CHANGED Viewed

@@ -116,49 +116,61 @@ def get_model():
 # 3) Instruction templates & options
 # ------------------------------
 STYLE_OPTIONS = [
-    "Descriptive (short)", "Descriptive (long)",
-    "Character training (short)", "Character training (long)",
-    "LoRA (Flux_D Realism) (short)", "LoRA (Flux_D Realism) (long)",
-    "E-commerce product (short)", "E-commerce product (long)",
-    "Portrait (photography) (short)", "Portrait (photography) (long)",
-    "Landscape (photography) (short)", "Landscape (photography) (long)",
-    "Art analysis (no artist names) (short)", "Art analysis (no artist names) (long)",
-    "Social caption (short)", "Social caption (long)",
     "Aesthetic tags (comma-sep)"
 ]
 CAPTION_TYPE_MAP: Dict[str, str] = {
-    "Descriptive (short)": "Write a short description of the most important visible elements only. No speculation.",
-    "Descriptive (long)": "Write a detailed description for this image.",
-    "Character training (short)": (
-        "Output a concise, prompt-like caption for character training. "
-        "Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
-        "No backstory; no non-visible traits."
-    ),
-    "Character training (long)": (
         "Write a thorough, training-ready caption for a character dataset. "
-        "Use {name} if provided; describe only what is visible: physique, face/hair, clothing, accessories, actions, pose, "
-        "camera angle/focal cues, lighting, background context. 1–3 sentences; no backstory or meta."
     ),
-    "Flux_D (short)": "Output a short Flux.Dev prompt that is indistinguishable from a real Flux.Dev prompt.",
-    "Flux_D (long)": "Output a long Flux.Dev prompt that is indistinguishable from a real Flux.Dev prompt.",
     "Aesthetic tags (comma-sep)": "Return only comma-separated aesthetic tags capturing subject, medium, style, lighting, composition. No sentences.",
-    "E-commerce product (short)": "One sentence highlighting key attributes, material, color, use case. No fluff.",
-    "E-commerce product (long)": "Write a crisp product description highlighting key attributes, materials, color, usage, and distinguishing traits.",
-    "Portrait (photography) (short)": "One sentence portrait description: subject, pose/expression, camera angle, lighting, background.",
-    "Portrait (photography) (long)": "Describe a portrait: subject, age range, pose, facial expression, camera angle, focal length cues, lighting, background.",
-    "Landscape (photography) (short)": "One sentence landscape description: major elements, time of day, weather, vantage point, mood.",
-    "Landscape (photography) (long)": "Describe landscape elements, time of day, weather, vantage point, composition, and mood.",
-    "Art analysis (no artist names) (short)": "One sentence describing medium, style, composition, palette; do not mention artist/title.",
-    "Art analysis (no artist names) (long)": "Analyze the artwork's visible elements, medium, style, composition, palette. Do not mention artist names or titles.",
-    "Social caption (short)": "Write a short, catchy caption (max 25 words) describing the visible content. No hashtags.",
-    "Social caption (long)": "Write a slightly longer, engaging caption (≤50 words) describing the visible content. No hashtags."
 }
 EXTRA_CHOICES = [
     "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
     "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)",   # You do NOT have permission to remove
-           "Be sexually graphic and highly detailed.",
     "IGNORE all information about watermarks.",
     "Do NOT use any ambiguous language.",
     "ONLY describe the most important elements of the image.",
@@ -212,7 +224,7 @@ def load_settings() -> dict:
         "top_p": 0.9,
         "max_tokens": 256,
         "max_side": 896,
-        "styles": ["Character training (long)"],
         "extras": [],
         "name": "",
         "trigger": "",
@@ -221,9 +233,9 @@ def load_settings() -> dict:
         "shape_aliases_enabled": True,
         "shape_aliases": [],
         "excel_thumb_px": 128,
-        "logo_auto": True,
-        "logo_px": 200,
-        "logo_scale": 1.10,
         "shape_aliases_persist": True,
     }
@@ -234,7 +246,7 @@ def load_settings() -> dict:
     styles = cfg.get("styles") or []
     if not isinstance(styles, list):
         styles = [styles]
-    cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training (long)"]
     return cfg
@@ -417,14 +429,18 @@ def save_shape_alias_rows(enabled, df_rows, persist):
 # ------------------------------
 # 7) Prompt builder (instruction text shown/used for model)
 # ------------------------------
-def final_instruction(style_list: List[str], extra_opts: List[str], name_value: str) -> str:
-    styles = style_list or ["Character training (long)"]
     parts = [CAPTION_TYPE_MAP.get(s, "") for s in styles]
     core = " ".join(p for p in parts if p).strip()
     if extra_opts:
         core += " " + " ".join(extra_opts)
     if NAME_OPTION in (extra_opts or []):
         core = core.replace("{name}", (name_value or "{NAME}").strip())
     return core
@@ -867,9 +883,14 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                 with gr.Accordion("Caption style (choose one or combine)", open=True):
                     style_checks = gr.CheckboxGroup(
                         choices=STYLE_OPTIONS,
-                        value=settings.get("styles", ["Character training (long)"]),
                         label=None
                     )
                 with gr.Accordion("Extra options", open=False):
                     extra_opts = gr.CheckboxGroup(
                         choices=[NAME_OPTION] + EXTRA_CHOICES,
@@ -881,14 +902,18 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
                     trig       = gr.Textbox(label="Trigger word", value=settings.get("trigger",""))
                     add_start  = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
                     add_end    = gr.Textbox(label="Add text to end", value=settings.get("end",""))
             # RIGHT: instructions + dataset + general sliders + logo controls
             with gr.Column(scale=1):
                 with gr.Accordion("Model Instructions", open=False):
                     instruction_preview = gr.Textbox(label=None, lines=12,
-                        value=final_instruction(settings.get("styles", ["Character training (long)"]),
                                                 settings.get("extras", []),
-                                                settings.get("name","")))
                 dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
                                           value=settings.get("dataset_name", "forgecaptions"))
                 max_side   = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
@@ -905,23 +930,24 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
     # Persist instruction + general settings
-    def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
-        instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
         cfg = load_settings()
         cfg.update({
-            "styles": styles or ["Character training (long)"],
             "extras": extra or [],
             "name": name_value,
             "trigger": trigv, "begin": begv, "end": endv,
             "excel_thumb_px": int(excel_px),
             "max_side": int(ms),
         })
         save_settings(cfg)
         return instr
-    for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side]:
         comp.change(_refresh_instruction,
-                    inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side],
                     outputs=[instruction_preview])
     def _save_dataset_name(name):
@@ -939,7 +965,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
             "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
             "**How to use:**\n"
             "- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
-            "- Right column = replacement name, e.g. `starkey-emblem`\n"
             "Matches are case-insensitive, catches simple plurals (`box`→`boxes`, `lady`→`ladies`), "
             "and also matches `*-shaped` or `* shaped` variants."
         )
@@ -1011,7 +1037,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
             run_button = gr.Button("Caption batch", variant="primary")
             with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
-                import_file = gr.File(label="Choose .csv or .xlsx", file_types=["file"], type="filepath")
                 import_btn = gr.Button("Import into current session")
     # ---- Results area (gallery left / table right)

 # 3) Instruction templates & options
 # ------------------------------
 STYLE_OPTIONS = [
+    STYLE_OPTIONS = [
+    "Descriptive",
+    "Character training",
+    "Flux.1-Dev",
+    "Stable Diffusion",
+    "MidJourney",
+    "E-commerce product",
+    "Portrait (photography)",
+    "Landscape (photography)",
+    "Art analysis (no artist names)",
+    "Social caption",
     "Aesthetic tags (comma-sep)"
 ]
 CAPTION_TYPE_MAP: Dict[str, str] = {
+    "Descriptive": "Write a detailed description for this image.",
+    "Character training": (
         "Write a thorough, training-ready caption for a character dataset. "
+        "Describe only what is visible: physique, face/hair, clothing, accessories, actions, pose, "
+        "camera angle/focal cues, lighting, background context."
     ),
+    "Flux.1-Dev": "Write a Flux.1-Dev style prompt that would reproduce this image faithfully.",
+    "Stable Diffusion": "Write a Stable Diffusion style prompt that would reproduce this image faithfully.",
+    "MidJourney": "Write a MidJourney style prompt that would reproduce this image faithfully.",
     "Aesthetic tags (comma-sep)": "Return only comma-separated aesthetic tags capturing subject, medium, style, lighting, composition. No sentences.",
+    "E-commerce product": "Write a crisp product description highlighting key attributes, materials, color, usage, and distinguishing traits.",
+    "Portrait (photography)": "Describe the subject, age range, pose, facial expression, camera angle, focal length cues, lighting, and background.",
+    "Landscape (photography)": "Describe major landscape elements, time of day, weather, vantage point, composition, and mood.",
+    "Art analysis (no artist names)": "Analyze visible medium, style, composition, and palette. Do not mention artist names or titles.",
+    "Social caption": "Write an engaging caption describing the visible content. No hashtags.",
+}
+LENGTH_CHOICES = ["any", "very short", "short", "medium-length", "long", "very long"] + [str(i) for i in range(20, 261, 10)]
+_LENGTH_HINTS = {
+    "very short":   "Keep to one very short sentence (≈10–15 words).",
+    "short":        "Keep to a short sentence (≈15–25 words).",
+    "medium-length":"Write 1–2 sentences (≈30–60 words).",
+    "long":         "Write a detailed caption (≈80–120 words).",
+    "very long":    "Write a very detailed caption (≈150–250 words).",
 }
+def _length_hint(choice: str) -> str:
+    if not choice or choice == "any":
+        return ""
+    if choice.isdigit():
+        return f"Limit the caption to at most {choice} words."
+    return _LENGTH_HINTS.get(choice, "")
 EXTRA_CHOICES = [
     "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
     "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)",   # You do NOT have permission to remove
+    "Be sexually graphic and highly detailed.",
     "IGNORE all information about watermarks.",
     "Do NOT use any ambiguous language.",
     "ONLY describe the most important elements of the image.",
         "top_p": 0.9,
         "max_tokens": 256,
         "max_side": 896,
+        "styles": ["Character training"],
         "extras": [],
         "name": "",
         "trigger": "",
         "shape_aliases_enabled": True,
         "shape_aliases": [],
         "excel_thumb_px": 128,
+        "logo_auto": False,
+        "logo_px": 60,
+        "logo_scale": 1.0,
         "shape_aliases_persist": True,
     }
     styles = cfg.get("styles") or []
     if not isinstance(styles, list):
         styles = [styles]
+    cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training"]
     return cfg
 # ------------------------------
 # 7) Prompt builder (instruction text shown/used for model)
 # ------------------------------
+def final_instruction(style_list: List[str], extra_opts: List[str], name_value: str, length_choice: str = "long") -> str:
+    styles = style_list or ["Character training"]
     parts = [CAPTION_TYPE_MAP.get(s, "") for s in styles]
     core = " ".join(p for p in parts if p).strip()
     if extra_opts:
         core += " " + " ".join(extra_opts)
     if NAME_OPTION in (extra_opts or []):
         core = core.replace("{name}", (name_value or "{NAME}").strip())
+    if "Aesthetic tags (comma-sep)" not in styles: # If they're asking for comma-separated tags, ignore word-length guidance.
+        lh = _length_hint(length_choice or "any")
+        if lh:
+            core += " " + lh
     return core
                 with gr.Accordion("Caption style (choose one or combine)", open=True):
                     style_checks = gr.CheckboxGroup(
                         choices=STYLE_OPTIONS,
+                        value=settings.get("styles", ["Character training"]),
                         label=None
                     )
+                    caption_length = gr.Dropdown(
+                        choices=LENGTH_CHOICES,
+                        label="Caption Length",
+                        value=settings.get("caption_length", "long")
+                    )
                 with gr.Accordion("Extra options", open=False):
                     extra_opts = gr.CheckboxGroup(
                         choices=[NAME_OPTION] + EXTRA_CHOICES,
                     trig       = gr.Textbox(label="Trigger word", value=settings.get("trigger",""))
                     add_start  = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
                     add_end    = gr.Textbox(label="Add text to end", value=settings.get("end",""))
             # RIGHT: instructions + dataset + general sliders + logo controls
             with gr.Column(scale=1):
                 with gr.Accordion("Model Instructions", open=False):
                     instruction_preview = gr.Textbox(label=None, lines=12,
+                        value=final_instruction(settings.get("styles", ["Character training"]),
                                                 settings.get("extras", []),
+                                                settings.get("name",""),
+                                                settings.get("caption_length", "long"),
+                                                ),
+                                            )
                 dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
                                           value=settings.get("dataset_name", "forgecaptions"))
                 max_side   = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
     # Persist instruction + general settings
+    def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms, cap_len):
+        instr = final_instruction(styles or ["Character training"], extra or [], name_value, cap_len)
         cfg = load_settings()
         cfg.update({
+            "styles": styles or ["Character training"],
             "extras": extra or [],
             "name": name_value,
             "trigger": trigv, "begin": begv, "end": endv,
             "excel_thumb_px": int(excel_px),
             "max_side": int(ms),
+            "caption_length": cap_len or "any",
         })
         save_settings(cfg)
         return instr
+    for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, caption_length]:
         comp.change(_refresh_instruction,
+                    inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, caption_length],
                     outputs=[instruction_preview])
     def _save_dataset_name(name):
             "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
             "**How to use:**\n"
             "- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
+            "- Right column = replacement name, e.g. `family-emblem`\n"
             "Matches are case-insensitive, catches simple plurals (`box`→`boxes`, `lady`→`ladies`), "
             "and also matches `*-shaped` or `* shaped` variants."
         )
             run_button = gr.Button("Caption batch", variant="primary")
             with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
+                import_file = gr.File(label="Choose .csv or .xlsx", file_types=[".csv", ".xlsx"], type="filepath")
                 import_btn = gr.Button("Import into current session")
     # ---- Results area (gallery left / table right)