JS6969 commited on
Commit
8313e74
·
verified ·
1 Parent(s): 1cf5cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -48
app.py CHANGED
@@ -116,49 +116,61 @@ def get_model():
116
  # 3) Instruction templates & options
117
  # ------------------------------
118
  STYLE_OPTIONS = [
119
- "Descriptive (short)", "Descriptive (long)",
120
- "Character training (short)", "Character training (long)",
121
- "LoRA (Flux_D Realism) (short)", "LoRA (Flux_D Realism) (long)",
122
- "E-commerce product (short)", "E-commerce product (long)",
123
- "Portrait (photography) (short)", "Portrait (photography) (long)",
124
- "Landscape (photography) (short)", "Landscape (photography) (long)",
125
- "Art analysis (no artist names) (short)", "Art analysis (no artist names) (long)",
126
- "Social caption (short)", "Social caption (long)",
 
 
 
127
  "Aesthetic tags (comma-sep)"
128
  ]
129
 
130
  CAPTION_TYPE_MAP: Dict[str, str] = {
131
- "Descriptive (short)": "Write a short description of the most important visible elements only. No speculation.",
132
- "Descriptive (long)": "Write a detailed description for this image.",
133
- "Character training (short)": (
134
- "Output a concise, prompt-like caption for character training. "
135
- "Include visible character name {name} if provided, distinct physical traits, clothing, pose, camera/cinematic cues. "
136
- "No backstory; no non-visible traits."
137
- ),
138
- "Character training (long)": (
139
  "Write a thorough, training-ready caption for a character dataset. "
140
- "Use {name} if provided; describe only what is visible: physique, face/hair, clothing, accessories, actions, pose, "
141
- "camera angle/focal cues, lighting, background context. 1–3 sentences; no backstory or meta."
142
  ),
143
- "Flux_D (short)": "Output a short Flux.Dev prompt that is indistinguishable from a real Flux.Dev prompt.",
144
- "Flux_D (long)": "Output a long Flux.Dev prompt that is indistinguishable from a real Flux.Dev prompt.",
 
145
  "Aesthetic tags (comma-sep)": "Return only comma-separated aesthetic tags capturing subject, medium, style, lighting, composition. No sentences.",
146
- "E-commerce product (short)": "One sentence highlighting key attributes, material, color, use case. No fluff.",
147
- "E-commerce product (long)": "Write a crisp product description highlighting key attributes, materials, color, usage, and distinguishing traits.",
148
- "Portrait (photography) (short)": "One sentence portrait description: subject, pose/expression, camera angle, lighting, background.",
149
- "Portrait (photography) (long)": "Describe a portrait: subject, age range, pose, facial expression, camera angle, focal length cues, lighting, background.",
150
- "Landscape (photography) (short)": "One sentence landscape description: major elements, time of day, weather, vantage point, mood.",
151
- "Landscape (photography) (long)": "Describe landscape elements, time of day, weather, vantage point, composition, and mood.",
152
- "Art analysis (no artist names) (short)": "One sentence describing medium, style, composition, palette; do not mention artist/title.",
153
- "Art analysis (no artist names) (long)": "Analyze the artwork's visible elements, medium, style, composition, palette. Do not mention artist names or titles.",
154
- "Social caption (short)": "Write a short, catchy caption (max 25 words) describing the visible content. No hashtags.",
155
- "Social caption (long)": "Write a slightly longer, engaging caption (≤50 words) describing the visible content. No hashtags."
 
 
 
 
 
 
156
  }
157
 
 
 
 
 
 
 
 
 
158
  EXTRA_CHOICES = [
159
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
160
  "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)", # You do NOT have permission to remove
161
- "Be sexually graphic and highly detailed.",
162
  "IGNORE all information about watermarks.",
163
  "Do NOT use any ambiguous language.",
164
  "ONLY describe the most important elements of the image.",
@@ -212,7 +224,7 @@ def load_settings() -> dict:
212
  "top_p": 0.9,
213
  "max_tokens": 256,
214
  "max_side": 896,
215
- "styles": ["Character training (long)"],
216
  "extras": [],
217
  "name": "",
218
  "trigger": "",
@@ -221,9 +233,9 @@ def load_settings() -> dict:
221
  "shape_aliases_enabled": True,
222
  "shape_aliases": [],
223
  "excel_thumb_px": 128,
224
- "logo_auto": True,
225
- "logo_px": 200,
226
- "logo_scale": 1.10,
227
  "shape_aliases_persist": True,
228
  }
229
 
@@ -234,7 +246,7 @@ def load_settings() -> dict:
234
  styles = cfg.get("styles") or []
235
  if not isinstance(styles, list):
236
  styles = [styles]
237
- cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training (long)"]
238
 
239
  return cfg
240
 
@@ -417,14 +429,18 @@ def save_shape_alias_rows(enabled, df_rows, persist):
417
  # ------------------------------
418
  # 7) Prompt builder (instruction text shown/used for model)
419
  # ------------------------------
420
- def final_instruction(style_list: List[str], extra_opts: List[str], name_value: str) -> str:
421
- styles = style_list or ["Character training (long)"]
422
  parts = [CAPTION_TYPE_MAP.get(s, "") for s in styles]
423
  core = " ".join(p for p in parts if p).strip()
424
  if extra_opts:
425
  core += " " + " ".join(extra_opts)
426
  if NAME_OPTION in (extra_opts or []):
427
  core = core.replace("{name}", (name_value or "{NAME}").strip())
 
 
 
 
428
  return core
429
 
430
 
@@ -867,9 +883,14 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
867
  with gr.Accordion("Caption style (choose one or combine)", open=True):
868
  style_checks = gr.CheckboxGroup(
869
  choices=STYLE_OPTIONS,
870
- value=settings.get("styles", ["Character training (long)"]),
871
  label=None
872
  )
 
 
 
 
 
873
  with gr.Accordion("Extra options", open=False):
874
  extra_opts = gr.CheckboxGroup(
875
  choices=[NAME_OPTION] + EXTRA_CHOICES,
@@ -881,14 +902,18 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
881
  trig = gr.Textbox(label="Trigger word", value=settings.get("trigger",""))
882
  add_start = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
883
  add_end = gr.Textbox(label="Add text to end", value=settings.get("end",""))
 
884
 
885
  # RIGHT: instructions + dataset + general sliders + logo controls
886
  with gr.Column(scale=1):
887
  with gr.Accordion("Model Instructions", open=False):
888
  instruction_preview = gr.Textbox(label=None, lines=12,
889
- value=final_instruction(settings.get("styles", ["Character training (long)"]),
890
  settings.get("extras", []),
891
- settings.get("name","")))
 
 
 
892
  dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
893
  value=settings.get("dataset_name", "forgecaptions"))
894
  max_side = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
@@ -905,23 +930,24 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
905
 
906
 
907
  # Persist instruction + general settings
908
- def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms):
909
- instr = final_instruction(styles or ["Character training (long)"], extra or [], name_value)
910
  cfg = load_settings()
911
  cfg.update({
912
- "styles": styles or ["Character training (long)"],
913
  "extras": extra or [],
914
  "name": name_value,
915
  "trigger": trigv, "begin": begv, "end": endv,
916
  "excel_thumb_px": int(excel_px),
917
  "max_side": int(ms),
 
918
  })
919
  save_settings(cfg)
920
  return instr
921
 
922
- for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side]:
923
  comp.change(_refresh_instruction,
924
- inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side],
925
  outputs=[instruction_preview])
926
 
927
  def _save_dataset_name(name):
@@ -939,7 +965,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
939
  "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
940
  "**How to use:**\n"
941
  "- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
942
- "- Right column = replacement name, e.g. `starkey-emblem`\n"
943
  "Matches are case-insensitive, catches simple plurals (`box`→`boxes`, `lady`→`ladies`), "
944
  "and also matches `*-shaped` or `* shaped` variants."
945
  )
@@ -1011,7 +1037,7 @@ with gr.Blocks(css=BASE_CSS, title="ForgeCaptions") as demo:
1011
  run_button = gr.Button("Caption batch", variant="primary")
1012
 
1013
  with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
1014
- import_file = gr.File(label="Choose .csv or .xlsx", file_types=["file"], type="filepath")
1015
  import_btn = gr.Button("Import into current session")
1016
 
1017
  # ---- Results area (gallery left / table right)
 
116
  # 3) Instruction templates & options
117
  # ------------------------------
118
  STYLE_OPTIONS = [
119
+ STYLE_OPTIONS = [
120
+ "Descriptive",
121
+ "Character training",
122
+ "Flux.1-Dev",
123
+ "Stable Diffusion",
124
+ "MidJourney",
125
+ "E-commerce product",
126
+ "Portrait (photography)",
127
+ "Landscape (photography)",
128
+ "Art analysis (no artist names)",
129
+ "Social caption",
130
  "Aesthetic tags (comma-sep)"
131
  ]
132
 
133
  CAPTION_TYPE_MAP: Dict[str, str] = {
134
+ "Descriptive": "Write a detailed description for this image.",
135
+ "Character training": (
 
 
 
 
 
 
136
  "Write a thorough, training-ready caption for a character dataset. "
137
+ "Describe only what is visible: physique, face/hair, clothing, accessories, actions, pose, "
138
+ "camera angle/focal cues, lighting, background context."
139
  ),
140
+ "Flux.1-Dev": "Write a Flux.1-Dev style prompt that would reproduce this image faithfully.",
141
+ "Stable Diffusion": "Write a Stable Diffusion style prompt that would reproduce this image faithfully.",
142
+ "MidJourney": "Write a MidJourney style prompt that would reproduce this image faithfully.",
143
  "Aesthetic tags (comma-sep)": "Return only comma-separated aesthetic tags capturing subject, medium, style, lighting, composition. No sentences.",
144
+ "E-commerce product": "Write a crisp product description highlighting key attributes, materials, color, usage, and distinguishing traits.",
145
+ "Portrait (photography)": "Describe the subject, age range, pose, facial expression, camera angle, focal length cues, lighting, and background.",
146
+ "Landscape (photography)": "Describe major landscape elements, time of day, weather, vantage point, composition, and mood.",
147
+ "Art analysis (no artist names)": "Analyze visible medium, style, composition, and palette. Do not mention artist names or titles.",
148
+ "Social caption": "Write an engaging caption describing the visible content. No hashtags.",
149
+ }
150
+
151
+
152
+ LENGTH_CHOICES = ["any", "very short", "short", "medium-length", "long", "very long"] + [str(i) for i in range(20, 261, 10)]
153
+
154
+ _LENGTH_HINTS = {
155
+ "very short": "Keep to one very short sentence (≈10–15 words).",
156
+ "short": "Keep to a short sentence (≈15–25 words).",
157
+ "medium-length":"Write 1–2 sentences (≈30–60 words).",
158
+ "long": "Write a detailed caption (≈80–120 words).",
159
+ "very long": "Write a very detailed caption (≈150–250 words).",
160
  }
161
 
162
+ def _length_hint(choice: str) -> str:
163
+ if not choice or choice == "any":
164
+ return ""
165
+ if choice.isdigit():
166
+ return f"Limit the caption to at most {choice} words."
167
+ return _LENGTH_HINTS.get(choice, "")
168
+
169
+
170
  EXTRA_CHOICES = [
171
  "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
172
  "Use profanity when describing sexual acts or genitalia (pussy, fucking, cum, cock, dick, cunt, etc.)", # You do NOT have permission to remove
173
+ "Be sexually graphic and highly detailed.",
174
  "IGNORE all information about watermarks.",
175
  "Do NOT use any ambiguous language.",
176
  "ONLY describe the most important elements of the image.",
 
224
  "top_p": 0.9,
225
  "max_tokens": 256,
226
  "max_side": 896,
227
+ "styles": ["Character training"],
228
  "extras": [],
229
  "name": "",
230
  "trigger": "",
 
233
  "shape_aliases_enabled": True,
234
  "shape_aliases": [],
235
  "excel_thumb_px": 128,
236
+ "logo_auto": False,
237
+ "logo_px": 60,
238
+ "logo_scale": 1.0,
239
  "shape_aliases_persist": True,
240
  }
241
 
 
246
  styles = cfg.get("styles") or []
247
  if not isinstance(styles, list):
248
  styles = [styles]
249
+ cfg["styles"] = [s for s in styles if s in STYLE_OPTIONS] or ["Character training"]
250
 
251
  return cfg
252
 
 
429
  # ------------------------------
430
  # 7) Prompt builder (instruction text shown/used for model)
431
  # ------------------------------
432
+ def final_instruction(style_list: List[str], extra_opts: List[str], name_value: str, length_choice: str = "long") -> str:
433
+ styles = style_list or ["Character training"]
434
  parts = [CAPTION_TYPE_MAP.get(s, "") for s in styles]
435
  core = " ".join(p for p in parts if p).strip()
436
  if extra_opts:
437
  core += " " + " ".join(extra_opts)
438
  if NAME_OPTION in (extra_opts or []):
439
  core = core.replace("{name}", (name_value or "{NAME}").strip())
440
+ if "Aesthetic tags (comma-sep)" not in styles: # If they're asking for comma-separated tags, ignore word-length guidance.
441
+ lh = _length_hint(length_choice or "any")
442
+ if lh:
443
+ core += " " + lh
444
  return core
445
 
446
 
 
883
  with gr.Accordion("Caption style (choose one or combine)", open=True):
884
  style_checks = gr.CheckboxGroup(
885
  choices=STYLE_OPTIONS,
886
+ value=settings.get("styles", ["Character training"]),
887
  label=None
888
  )
889
+ caption_length = gr.Dropdown(
890
+ choices=LENGTH_CHOICES,
891
+ label="Caption Length",
892
+ value=settings.get("caption_length", "long")
893
+ )
894
  with gr.Accordion("Extra options", open=False):
895
  extra_opts = gr.CheckboxGroup(
896
  choices=[NAME_OPTION] + EXTRA_CHOICES,
 
902
  trig = gr.Textbox(label="Trigger word", value=settings.get("trigger",""))
903
  add_start = gr.Textbox(label="Add text to start", value=settings.get("begin",""))
904
  add_end = gr.Textbox(label="Add text to end", value=settings.get("end",""))
905
+
906
 
907
  # RIGHT: instructions + dataset + general sliders + logo controls
908
  with gr.Column(scale=1):
909
  with gr.Accordion("Model Instructions", open=False):
910
  instruction_preview = gr.Textbox(label=None, lines=12,
911
+ value=final_instruction(settings.get("styles", ["Character training"]),
912
  settings.get("extras", []),
913
+ settings.get("name",""),
914
+ settings.get("caption_length", "long"),
915
+ ),
916
+ )
917
  dataset_name = gr.Textbox(label="Dataset name (export title prefix)",
918
  value=settings.get("dataset_name", "forgecaptions"))
919
  max_side = gr.Slider(256, 1024, settings.get("max_side", 896), step=32, label="Max side (resize)")
 
930
 
931
 
932
  # Persist instruction + general settings
933
+ def _refresh_instruction(styles, extra, name_value, trigv, begv, endv, excel_px, ms, cap_len):
934
+ instr = final_instruction(styles or ["Character training"], extra or [], name_value, cap_len)
935
  cfg = load_settings()
936
  cfg.update({
937
+ "styles": styles or ["Character training"],
938
  "extras": extra or [],
939
  "name": name_value,
940
  "trigger": trigv, "begin": begv, "end": endv,
941
  "excel_thumb_px": int(excel_px),
942
  "max_side": int(ms),
943
+ "caption_length": cap_len or "any",
944
  })
945
  save_settings(cfg)
946
  return instr
947
 
948
+ for comp in [style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, caption_length]:
949
  comp.change(_refresh_instruction,
950
+ inputs=[style_checks, extra_opts, name_input, trig, add_start, add_end, excel_thumb_px, max_side, caption_length],
951
  outputs=[instruction_preview])
952
 
953
  def _save_dataset_name(name):
 
965
  "Replace literal **shape tokens** in captions with a preferred **name**.\n\n"
966
  "**How to use:**\n"
967
  "- Left column = a single token **or** comma/pipe-separated synonyms, e.g. `diamond, rhombus | lozenge`\n"
968
+ "- Right column = replacement name, e.g. `family-emblem`\n"
969
  "Matches are case-insensitive, catches simple plurals (`box`→`boxes`, `lady`→`ladies`), "
970
  "and also matches `*-shaped` or `* shaped` variants."
971
  )
 
1037
  run_button = gr.Button("Caption batch", variant="primary")
1038
 
1039
  with gr.Accordion("Import captions from CSV/XLSX (merge by filename)", open=False):
1040
+ import_file = gr.File(label="Choose .csv or .xlsx", file_types=[".csv", ".xlsx"], type="filepath")
1041
  import_btn = gr.Button("Import into current session")
1042
 
1043
  # ---- Results area (gallery left / table right)