Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -730,37 +730,29 @@ def generate_prompt_with_glm(image_description: str, user_request: str, style: s
|
|
| 730 |
style_hint = f" Apply style: {style}." if style and style != "None" else ""
|
| 731 |
desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description
|
| 732 |
|
| 733 |
-
system_prompt = """
|
| 734 |
|
| 735 |
-
|
| 736 |
-
OUTPUT: A single image prompt describing the TRANSFORMED result (50-800 tokens).
|
| 737 |
|
| 738 |
-
|
| 739 |
-
- Combine the original image elements with the requested changes
|
| 740 |
-
- Write as if describing the final transformed image
|
| 741 |
-
- Use present tense ("features", "displays", "shows")
|
| 742 |
-
- Include: style, colors, lighting, textures, mood, composition
|
| 743 |
-
- Maximum 800 tokens
|
| 744 |
|
| 745 |
-
FORBIDDEN
|
| 746 |
-
-
|
| 747 |
-
-
|
| 748 |
-
-
|
| 749 |
-
-
|
| 750 |
-
-
|
|
|
|
| 751 |
|
| 752 |
-
|
|
|
|
|
|
|
|
|
|
| 753 |
|
| 754 |
-
|
| 755 |
-
"
|
| 756 |
-
"Here is the transformed prompt: A beautiful..."
|
| 757 |
-
"I'll combine the description with the changes..."
|
| 758 |
-
"The key elements to include are..."
|
| 759 |
|
| 760 |
-
|
| 761 |
-
"A kangaroo wearing a festive red Santa hat stands at a rustic wooden workbench, warm golden workshop lighting casting soft shadows, surrounded by vintage tools and spools of thread, cozy Christmas atmosphere, photorealistic detail, shallow depth of field"
|
| 762 |
-
|
| 763 |
-
NOW OUTPUT ONLY THE IMAGE PROMPT:"""
|
| 764 |
|
| 765 |
try:
|
| 766 |
response = client.chat.completions.create(
|
|
@@ -793,6 +785,37 @@ NOW OUTPUT ONLY THE IMAGE PROMPT:"""
|
|
| 793 |
# Remove quotes
|
| 794 |
if content.startswith('"') and content.endswith('"'):
|
| 795 |
content = content[1:-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
# Truncate if too long
|
| 797 |
max_words = 800
|
| 798 |
words = content.split()
|
|
|
|
| 730 |
style_hint = f" Apply style: {style}." if style and style != "None" else ""
|
| 731 |
desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description
|
| 732 |
|
| 733 |
+
system_prompt = """You are an image prompt generator. Output ONLY the final prompt - nothing else.
|
| 734 |
|
| 735 |
+
TASK: Combine the image description with the requested changes into ONE image prompt.
|
|
|
|
| 736 |
|
| 737 |
+
GLOBAL RULE: Maximum 800 tokens. Output ONLY the prompt text itself.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
+
ABSOLUTELY FORBIDDEN - NEVER OUTPUT THESE:
|
| 740 |
+
- Planning text: "Add textures:", "Include lighting:", "The key elements are:"
|
| 741 |
+
- Meta-commentary: "Here is", "I will", "Let me", "The prompt"
|
| 742 |
+
- Reasoning: "I should", "I need to", "First I'll"
|
| 743 |
+
- Labels: "Textures:", "Colors:", "Mood:", "Style:"
|
| 744 |
+
- Word counting or token counting
|
| 745 |
+
- ANY text that is not the actual image prompt
|
| 746 |
|
| 747 |
+
WRONG (reasoning leaked):
|
| 748 |
+
"Add textures and mood: The velvet of dresses has a rich..."
|
| 749 |
+
"Here is the transformed prompt: A beautiful sunset..."
|
| 750 |
+
"The key elements to include are: lighting, colors..."
|
| 751 |
|
| 752 |
+
CORRECT (pure prompt only):
|
| 753 |
+
"A majestic ballroom scene rendered in vibrant anime style, elegant dancers in flowing silk gowns with rich velvet textures, crystal chandeliers casting warm golden light, polished marble floor with soft reflections, romantic atmosphere with floating rose petals"
|
|
|
|
|
|
|
|
|
|
| 754 |
|
| 755 |
+
OUTPUT THE IMAGE PROMPT NOW - NOTHING ELSE:"""
|
|
|
|
|
|
|
|
|
|
| 756 |
|
| 757 |
try:
|
| 758 |
response = client.chat.completions.create(
|
|
|
|
| 785 |
# Remove quotes
|
| 786 |
if content.startswith('"') and content.endswith('"'):
|
| 787 |
content = content[1:-1]
|
| 788 |
+
|
| 789 |
+
# Filter out leaked reasoning patterns
|
| 790 |
+
reasoning_patterns = [
|
| 791 |
+
"Add textures", "Add lighting", "Add colors", "Add mood", "Add style",
|
| 792 |
+
"Include textures", "Include lighting", "Include colors",
|
| 793 |
+
"The key elements", "Key elements:", "Elements to include",
|
| 794 |
+
"Here is the", "Here's the", "The prompt is",
|
| 795 |
+
"I will", "I'll", "I should", "I need to", "Let me",
|
| 796 |
+
"Textures:", "Colors:", "Mood:", "Style:", "Lighting:",
|
| 797 |
+
"First,", "Second,", "Finally,", "Now,",
|
| 798 |
+
]
|
| 799 |
+
content_lower = content.lower()
|
| 800 |
+
for pattern in reasoning_patterns:
|
| 801 |
+
if content_lower.startswith(pattern.lower()):
|
| 802 |
+
# Find the actual prompt after the reasoning
|
| 803 |
+
for sep in [':', '. ', '- ']:
|
| 804 |
+
if sep in content[:100]:
|
| 805 |
+
idx = content.find(sep)
|
| 806 |
+
if idx > 0 and idx < 100:
|
| 807 |
+
content = content[idx+len(sep):].strip()
|
| 808 |
+
content_lower = content.lower()
|
| 809 |
+
break
|
| 810 |
+
|
| 811 |
+
# Remove any remaining label prefixes
|
| 812 |
+
while content and content[0].isupper() and ':' in content[:30]:
|
| 813 |
+
idx = content.find(':')
|
| 814 |
+
if idx > 0 and idx < 30:
|
| 815 |
+
content = content[idx+1:].strip()
|
| 816 |
+
else:
|
| 817 |
+
break
|
| 818 |
+
|
| 819 |
# Truncate if too long
|
| 820 |
max_words = 800
|
| 821 |
words = content.split()
|