Spaces:

lulavc
/

Z-Image-Turbo

Running on Zero

App Files Files

lulavc commited on 2 days ago

Commit

8c9a6b5

verified ·

1 Parent(s): fbf53ee

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +48 -25

app.py CHANGED Viewed

@@ -730,37 +730,29 @@ def generate_prompt_with_glm(image_description: str, user_request: str, style: s
     style_hint = f" Apply style: {style}." if style and style != "None" else ""
     desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description
-    system_prompt = """ROLE: Silent image prompt writer. You output ONLY the final prompt text.
-INPUT: Image description + user's desired changes.
-OUTPUT: A single image prompt describing the TRANSFORMED result (50-800 tokens).
-ABSOLUTE RULES - FOLLOW SILENTLY, NEVER MENTION:
-- Combine the original image elements with the requested changes
-- Write as if describing the final transformed image
-- Use present tense ("features", "displays", "shows")
-- Include: style, colors, lighting, textures, mood, composition
-- Maximum 800 tokens
-FORBIDDEN IN OUTPUT:
-- ANY discussion of rules, instructions, or word counts
-- ANY meta-commentary ("Here is", "I will", "The prompt", "Let me")
-- ANY reasoning, thinking, planning, or analysis text
-- ANY numbered lists or word counting
-- ANY explanation of your choices
-YOUR OUTPUT IS THE PROMPT ITSELF. NOTHING BEFORE. NOTHING AFTER.
-WRONG OUTPUT EXAMPLES:
-"Let's count the words: 1. A 2. kangaroo..."
-"Here is the transformed prompt: A beautiful..."
-"I'll combine the description with the changes..."
-"The key elements to include are..."
-CORRECT OUTPUT EXAMPLE:
-"A kangaroo wearing a festive red Santa hat stands at a rustic wooden workbench, warm golden workshop lighting casting soft shadows, surrounded by vintage tools and spools of thread, cozy Christmas atmosphere, photorealistic detail, shallow depth of field"
-NOW OUTPUT ONLY THE IMAGE PROMPT:"""
     try:
         response = client.chat.completions.create(
@@ -793,6 +785,37 @@ NOW OUTPUT ONLY THE IMAGE PROMPT:"""
             # Remove quotes
             if content.startswith('"') and content.endswith('"'):
                 content = content[1:-1]
             # Truncate if too long
             max_words = 800
             words = content.split()

     style_hint = f" Apply style: {style}." if style and style != "None" else ""
     desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description
+    system_prompt = """You are an image prompt generator. Output ONLY the final prompt - nothing else.
+TASK: Combine the image description with the requested changes into ONE image prompt.
+GLOBAL RULE: Maximum 800 tokens. Output ONLY the prompt text itself.
+ABSOLUTELY FORBIDDEN - NEVER OUTPUT THESE:
+- Planning text: "Add textures:", "Include lighting:", "The key elements are:"
+- Meta-commentary: "Here is", "I will", "Let me", "The prompt"
+- Reasoning: "I should", "I need to", "First I'll"
+- Labels: "Textures:", "Colors:", "Mood:", "Style:"
+- Word counting or token counting
+- ANY text that is not the actual image prompt
+WRONG (reasoning leaked):
+"Add textures and mood: The velvet of dresses has a rich..."
+"Here is the transformed prompt: A beautiful sunset..."
+"The key elements to include are: lighting, colors..."
+CORRECT (pure prompt only):
+"A majestic ballroom scene rendered in vibrant anime style, elegant dancers in flowing silk gowns with rich velvet textures, crystal chandeliers casting warm golden light, polished marble floor with soft reflections, romantic atmosphere with floating rose petals"
+OUTPUT THE IMAGE PROMPT NOW - NOTHING ELSE:"""
     try:
         response = client.chat.completions.create(
             # Remove quotes
             if content.startswith('"') and content.endswith('"'):
                 content = content[1:-1]
+            # Filter out leaked reasoning patterns
+            reasoning_patterns = [
+                "Add textures", "Add lighting", "Add colors", "Add mood", "Add style",
+                "Include textures", "Include lighting", "Include colors",
+                "The key elements", "Key elements:", "Elements to include",
+                "Here is the", "Here's the", "The prompt is",
+                "I will", "I'll", "I should", "I need to", "Let me",
+                "Textures:", "Colors:", "Mood:", "Style:", "Lighting:",
+                "First,", "Second,", "Finally,", "Now,",
+            ]
+            content_lower = content.lower()
+            for pattern in reasoning_patterns:
+                if content_lower.startswith(pattern.lower()):
+                    # Find the actual prompt after the reasoning
+                    for sep in [':', '. ', '- ']:
+                        if sep in content[:100]:
+                            idx = content.find(sep)
+                            if idx > 0 and idx < 100:
+                                content = content[idx+len(sep):].strip()
+                                content_lower = content.lower()
+                                break
+            # Remove any remaining label prefixes
+            while content and content[0].isupper() and ':' in content[:30]:
+                idx = content.find(':')
+                if idx > 0 and idx < 30:
+                    content = content[idx+1:].strip()
+                else:
+                    break
             # Truncate if too long
             max_words = 800
             words = content.split()