Spaces:
Running
on
Zero
Running
on
Zero
| """Z-Image-Turbo v2.3 - Multilingual Support""" | |
| import os | |
| import logging | |
| import torch | |
| import spaces | |
| import gradio as gr | |
| import requests | |
| import io | |
| import base64 | |
| import tempfile | |
| import time | |
| from typing import Tuple, Optional, Dict | |
| from PIL import Image | |
| from diffusers import DiffusionPipeline, ZImageImg2ImgPipeline | |
| from openai import OpenAI | |
| # ============================================================================= | |
| # GENERATION TIMER CLASS | |
| # ============================================================================= | |
| class GenerationTimer: | |
| """Timer for tracking image generation elapsed time.""" | |
| def __init__(self): | |
| self.start_time: Optional[float] = None | |
| self.end_time: Optional[float] = None | |
| def start(self): | |
| """Start the timer.""" | |
| self.start_time = time.time() | |
| self.end_time = None | |
| def stop(self): | |
| """Stop the timer.""" | |
| self.end_time = time.time() | |
| def elapsed(self) -> float: | |
| """Get elapsed time in seconds.""" | |
| if self.start_time is None: | |
| return 0.0 | |
| end = self.end_time if self.end_time else time.time() | |
| return end - self.start_time | |
| def format(self) -> str: | |
| """Format elapsed time as string.""" | |
| elapsed = self.elapsed() | |
| if elapsed < 60: | |
| return f"{elapsed:.1f}s" | |
| minutes = int(elapsed // 60) | |
| seconds = elapsed % 60 | |
| return f"{minutes}m {seconds:.1f}s" | |
| def create_status_html(message: str, elapsed: str, is_generating: bool = True) -> str: | |
| """Create HTML status display with animation and timer.""" | |
| if is_generating: | |
| return f''' | |
| <div class="generation-status generating"> | |
| <div class="status-content"> | |
| <div class="generating-spinner-dual"></div> | |
| <div class="status-text-container"> | |
| <span class="status-text">{message}</span> | |
| <span class="status-timer">⏱️ {elapsed}</span> | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| else: | |
| return f''' | |
| <div class="generation-status complete"> | |
| <div class="status-content"> | |
| <span class="status-complete">✅ {message}</span> | |
| <span class="status-timer-final">⏱️ {elapsed}</span> | |
| </div> | |
| </div> | |
| ''' | |
| # Configure logging (replaces debug print statements) | |
| logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # ============================================================================= | |
| # MULTILINGUAL SUPPORT | |
| # ============================================================================= | |
| LANGUAGES = ["English", "Español", "Português (BR)", "العربية", "हिंदी"] | |
| TRANSLATIONS: Dict[str, Dict[str, str]] = { | |
| "English": { | |
| # Header | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "AI Image Generation & Transformation powered by DeepSeek Reasoning", | |
| "like_msg": "If you liked it, please ❤️ like it. Thank you!", | |
| # Tabs | |
| "tab_generate": "Generate", | |
| "tab_assistant": "AI Assistant", | |
| "tab_transform": "Transform", | |
| # Generate tab | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Describe your image in detail...", | |
| "polish_checkbox": "Prompt+ by deepseek-reasoner", | |
| "style": "Style", | |
| "aspect_ratio": "Aspect Ratio", | |
| "advanced_settings": "Advanced Settings", | |
| "steps": "Steps", | |
| "seed": "Seed", | |
| "random_seed": "Random Seed", | |
| "generate_btn": "Generate", | |
| "generated_image": "Generated Image", | |
| "enhanced_prompt": "Enhanced Prompt", | |
| "seed_used": "Seed Used", | |
| "share": "Share", | |
| # AI Assistant tab | |
| "ai_description": "**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.", | |
| "upload_image": "Upload Image", | |
| "analyze_btn": "Analyze Image", | |
| "image_description": "Image Description", | |
| "changes_request": "What changes do you want?", | |
| "changes_placeholder": "e.g., 'watercolor style' or 'dramatic sunset lighting'", | |
| "target_style": "Target Style", | |
| "generate_prompt_btn": "Generate Prompt", | |
| "generated_prompt": "Generated Prompt", | |
| "send_to_transform": "Send to Transform Tab", | |
| "how_to_use": "How to Use", | |
| "how_to_use_content": """1. **Upload** an image and click "Analyze Image" | |
| 2. **Describe** the changes you want | |
| 3. **Generate** an optimized prompt | |
| 4. **Send** to Transform tab to apply changes""", | |
| # Transform tab | |
| "transform_description": "**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.", | |
| "transformation_prompt": "Transformation Prompt", | |
| "transform_placeholder": "e.g., 'oil painting style, vibrant colors'", | |
| "strength": "Strength", | |
| "transform_btn": "Transform", | |
| "transformed_image": "Transformed Image", | |
| "example_prompts": "Example Prompts", | |
| # Footer | |
| "models": "Models", | |
| "by": "by", | |
| }, | |
| "Español": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "Generación y Transformación de Imágenes con IA impulsado por DeepSeek Reasoning", | |
| "like_msg": "Si te gustó, por favor dale me gusta. ¡Gracias!", | |
| "tab_generate": "Generar", | |
| "tab_assistant": "Asistente IA", | |
| "tab_transform": "Transformar", | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Describe tu imagen en detalle...", | |
| "polish_checkbox": "Prompt+ por deepseek-reasoner", | |
| "style": "Estilo", | |
| "aspect_ratio": "Relación de Aspecto", | |
| "advanced_settings": "Configuración Avanzada", | |
| "steps": "Pasos", | |
| "seed": "Semilla", | |
| "random_seed": "Semilla Aleatoria", | |
| "generate_btn": "Generar", | |
| "generated_image": "Imagen Generada", | |
| "enhanced_prompt": "Prompt Mejorado", | |
| "seed_used": "Semilla Usada", | |
| "share": "Compartir", | |
| "ai_description": "**Generador de Prompts con IA** - Sube una imagen, analízala con GLM-4.6V, y genera prompts optimizados.", | |
| "upload_image": "Subir Imagen", | |
| "analyze_btn": "Analizar Imagen", | |
| "image_description": "Descripción de la Imagen", | |
| "changes_request": "¿Qué cambios quieres?", | |
| "changes_placeholder": "ej., 'estilo acuarela' o 'iluminación de atardecer dramático'", | |
| "target_style": "Estilo Objetivo", | |
| "generate_prompt_btn": "Generar Prompt", | |
| "generated_prompt": "Prompt Generado", | |
| "send_to_transform": "Enviar a Transformar", | |
| "how_to_use": "Cómo Usar", | |
| "how_to_use_content": """1. **Sube** una imagen y haz clic en "Analizar Imagen" | |
| 2. **Describe** los cambios que quieres | |
| 3. **Genera** un prompt optimizado | |
| 4. **Envía** a la pestaña Transformar para aplicar cambios""", | |
| "transform_description": "**Transforma tu imagen** - Sube y describe la transformación. Menor fuerza = sutil, mayor = dramático.", | |
| "transformation_prompt": "Prompt de Transformación", | |
| "transform_placeholder": "ej., 'estilo pintura al óleo, colores vibrantes'", | |
| "strength": "Fuerza", | |
| "transform_btn": "Transformar", | |
| "transformed_image": "Imagen Transformada", | |
| "example_prompts": "Prompts de Ejemplo", | |
| "models": "Modelos", | |
| "by": "por", | |
| }, | |
| "Português (BR)": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "Geração e Transformação de Imagens com IA alimentado por DeepSeek Reasoning", | |
| "like_msg": "Se você gostou, por favor curta. Obrigado!", | |
| "tab_generate": "Gerar", | |
| "tab_assistant": "Assistente IA", | |
| "tab_transform": "Transformar", | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Descreva sua imagem em detalhes...", | |
| "polish_checkbox": "Prompt+ por deepseek-reasoner", | |
| "style": "Estilo", | |
| "aspect_ratio": "Proporção", | |
| "advanced_settings": "Configurações Avançadas", | |
| "steps": "Passos", | |
| "seed": "Semente", | |
| "random_seed": "Semente Aleatória", | |
| "generate_btn": "Gerar", | |
| "generated_image": "Imagem Gerada", | |
| "enhanced_prompt": "Prompt Aprimorado", | |
| "seed_used": "Semente Usada", | |
| "share": "Compartilhar", | |
| "ai_description": "**Gerador de Prompts com IA** - Envie uma imagem, analise com GLM-4.6V, e gere prompts otimizados.", | |
| "upload_image": "Enviar Imagem", | |
| "analyze_btn": "Analisar Imagem", | |
| "image_description": "Descrição da Imagem", | |
| "changes_request": "Quais mudanças você quer?", | |
| "changes_placeholder": "ex., 'estilo aquarela' ou 'iluminação dramática de pôr do sol'", | |
| "target_style": "Estilo Alvo", | |
| "generate_prompt_btn": "Gerar Prompt", | |
| "generated_prompt": "Prompt Gerado", | |
| "send_to_transform": "Enviar para Transformar", | |
| "how_to_use": "Como Usar", | |
| "how_to_use_content": """1. **Envie** uma imagem e clique em "Analisar Imagem" | |
| 2. **Descreva** as mudanças que você quer | |
| 3. **Gere** um prompt otimizado | |
| 4. **Envie** para a aba Transformar para aplicar mudanças""", | |
| "transform_description": "**Transforme sua imagem** - Envie e descreva a transformação. Menor força = sutil, maior = dramático.", | |
| "transformation_prompt": "Prompt de Transformação", | |
| "transform_placeholder": "ex., 'estilo pintura a óleo, cores vibrantes'", | |
| "strength": "Força", | |
| "transform_btn": "Transformar", | |
| "transformed_image": "Imagem Transformada", | |
| "example_prompts": "Prompts de Exemplo", | |
| "models": "Modelos", | |
| "by": "por", | |
| }, | |
| "العربية": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "توليد وتحويل الصور بالذكاء الاصطناعي مدعوم من DeepSeek Reasoning", | |
| "like_msg": "إذا أعجبك، يرجى الإعجاب. شكراً لك!", | |
| "tab_generate": "توليد", | |
| "tab_assistant": "مساعد الذكاء الاصطناعي", | |
| "tab_transform": "تحويل", | |
| "prompt": "الوصف", | |
| "prompt_placeholder": "صف صورتك بالتفصيل...", | |
| "polish_checkbox": "تحسين+ بواسطة deepseek-reasoner", | |
| "style": "النمط", | |
| "aspect_ratio": "نسبة العرض", | |
| "advanced_settings": "إعدادات متقدمة", | |
| "steps": "الخطوات", | |
| "seed": "البذرة", | |
| "random_seed": "بذرة عشوائية", | |
| "generate_btn": "توليد", | |
| "generated_image": "الصورة المولدة", | |
| "enhanced_prompt": "الوصف المحسن", | |
| "seed_used": "البذرة المستخدمة", | |
| "share": "مشاركة", | |
| "ai_description": "**مولد الأوصاف بالذكاء الاصطناعي** - ارفع صورة، حللها باستخدام GLM-4.6V، ثم أنشئ أوصافاً محسنة.", | |
| "upload_image": "رفع صورة", | |
| "analyze_btn": "تحليل الصورة", | |
| "image_description": "وصف الصورة", | |
| "changes_request": "ما التغييرات التي تريدها؟", | |
| "changes_placeholder": "مثال: 'نمط ألوان مائية' أو 'إضاءة غروب درامية'", | |
| "target_style": "النمط المستهدف", | |
| "generate_prompt_btn": "توليد الوصف", | |
| "generated_prompt": "الوصف المولد", | |
| "send_to_transform": "إرسال إلى التحويل", | |
| "how_to_use": "كيفية الاستخدام", | |
| "how_to_use_content": """1. **ارفع** صورة وانقر على "تحليل الصورة" | |
| 2. **صف** التغييرات التي تريدها | |
| 3. **أنشئ** وصفاً محسناً | |
| 4. **أرسل** إلى تبويب التحويل لتطبيق التغييرات""", | |
| "transform_description": "**حوّل صورتك** - ارفع وصف التحويل. قوة أقل = تغيير طفيف، قوة أكبر = تغيير جذري.", | |
| "transformation_prompt": "وصف التحويل", | |
| "transform_placeholder": "مثال: 'نمط لوحة زيتية، ألوان نابضة'", | |
| "strength": "القوة", | |
| "transform_btn": "تحويل", | |
| "transformed_image": "الصورة المحولة", | |
| "example_prompts": "أمثلة الأوصاف", | |
| "models": "النماذج", | |
| "by": "بواسطة", | |
| }, | |
| "हिंदी": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "DeepSeek Reasoning द्वारा संचालित AI छवि निर्माण और रूपांतरण", | |
| "like_msg": "अगर आपको पसंद आया, तो कृपया लाइक करें। धन्यवाद!", | |
| "tab_generate": "बनाएं", | |
| "tab_assistant": "AI सहायक", | |
| "tab_transform": "रूपांतरित करें", | |
| "prompt": "प्रॉम्प्ट", | |
| "prompt_placeholder": "अपनी छवि का विस्तार से वर्णन करें...", | |
| "polish_checkbox": "Prompt+ by deepseek-reasoner", | |
| "style": "शैली", | |
| "aspect_ratio": "पक्षानुपात", | |
| "advanced_settings": "उन्नत सेटिंग्स", | |
| "steps": "चरण", | |
| "seed": "बीज", | |
| "random_seed": "यादृच्छिक बीज", | |
| "generate_btn": "बनाएं", | |
| "generated_image": "बनाई गई छवि", | |
| "enhanced_prompt": "उन्नत प्रॉम्प्ट", | |
| "seed_used": "प्रयुक्त बीज", | |
| "share": "साझा करें", | |
| "ai_description": "**AI-संचालित प्रॉम्प्ट जनरेटर** - एक छवि अपलोड करें, GLM-4.6V से विश्लेषण करें, फिर अनुकूलित प्रॉम्प्ट बनाएं।", | |
| "upload_image": "छवि अपलोड करें", | |
| "analyze_btn": "छवि विश्लेषण करें", | |
| "image_description": "छवि विवरण", | |
| "changes_request": "आप क्या बदलाव चाहते हैं?", | |
| "changes_placeholder": "उदा., 'वॉटरकलर शैली' या 'नाटकीय सूर्यास्त प्रकाश'", | |
| "target_style": "लक्ष्य शैली", | |
| "generate_prompt_btn": "प्रॉम्प्ट बनाएं", | |
| "generated_prompt": "बनाया गया प्रॉम्प्ट", | |
| "send_to_transform": "रूपांतरण टैब पर भेजें", | |
| "how_to_use": "कैसे उपयोग करें", | |
| "how_to_use_content": """1. **अपलोड** करें एक छवि और "छवि विश्लेषण करें" पर क्लिक करें | |
| 2. **वर्णन** करें जो बदलाव आप चाहते हैं | |
| 3. **बनाएं** एक अनुकूलित प्रॉम्प्ट | |
| 4. **भेजें** रूपांतरण टैब पर बदलाव लागू करने के लिए""", | |
| "transform_description": "**अपनी छवि रूपांतरित करें** - अपलोड करें और रूपांतरण का वर्णन करें। कम शक्ति = सूक्ष्म, अधिक = नाटकीय।", | |
| "transformation_prompt": "रूपांतरण प्रॉम्प्ट", | |
| "transform_placeholder": "उदा., 'तेल चित्रकला शैली, जीवंत रंग'", | |
| "strength": "शक्ति", | |
| "transform_btn": "रूपांतरित करें", | |
| "transformed_image": "रूपांतरित छवि", | |
| "example_prompts": "उदाहरण प्रॉम्प्ट", | |
| "models": "मॉडल", | |
| "by": "द्वारा", | |
| }, | |
| } | |
| def get_text(lang: str, key: str) -> str: | |
| """Get translated text for a key.""" | |
| return TRANSLATIONS.get(lang, TRANSLATIONS["English"]).get(key, key) | |
| def change_language(lang_name: str): | |
| """Update all component labels when language changes.""" | |
| t = TRANSLATIONS.get(lang_name, TRANSLATIONS["English"]) | |
| return [ | |
| # Generate tab | |
| gr.update(label=t["prompt"], placeholder=t["prompt_placeholder"]), | |
| gr.update(label=t["polish_checkbox"], interactive=True), | |
| gr.update(label=t["style"]), | |
| gr.update(label=t["aspect_ratio"]), | |
| gr.update(label=t["steps"]), | |
| gr.update(label=t["seed"]), | |
| gr.update(label=t["random_seed"], interactive=True), | |
| gr.update(value=t["generate_btn"]), | |
| gr.update(label=t["generated_image"]), | |
| gr.update(label=t["enhanced_prompt"]), | |
| gr.update(label=t["seed_used"]), | |
| gr.update(value=t["share"]), | |
| # AI Assistant tab | |
| gr.update(value=t["ai_description"]), | |
| gr.update(label=t["upload_image"]), | |
| gr.update(value=t["analyze_btn"]), | |
| gr.update(label=t["image_description"]), | |
| gr.update(label=t["changes_request"], placeholder=t["changes_placeholder"]), | |
| gr.update(label=t["target_style"]), | |
| gr.update(value=t["generate_prompt_btn"]), | |
| gr.update(label=t["generated_prompt"]), | |
| gr.update(value=t["send_to_transform"]), | |
| gr.update(value=t["how_to_use_content"]), | |
| # Transform tab | |
| gr.update(value=t["transform_description"]), | |
| gr.update(label=t["upload_image"]), | |
| gr.update(label=t["transformation_prompt"], placeholder=t["transform_placeholder"]), | |
| gr.update(label=t["polish_checkbox"], interactive=True), | |
| gr.update(label=t["style"]), | |
| gr.update(label=t["strength"]), | |
| gr.update(label=t["steps"]), | |
| gr.update(label=t["seed"]), | |
| gr.update(label=t["random_seed"], interactive=True), | |
| gr.update(value=t["transform_btn"]), | |
| gr.update(label=t["transformed_image"]), | |
| gr.update(label=t["enhanced_prompt"]), | |
| gr.update(label=t["seed_used"]), | |
| gr.update(value=t["share"]), | |
| ] | |
| # ============================================================================= | |
| # Constants (replaces magic numbers) | |
| MIN_IMAGE_DIM = 512 | |
| MAX_IMAGE_DIM = 2048 | |
| IMAGE_ALIGNMENT = 16 | |
| API_TIMEOUT = 90.0 | |
| API_MAX_RETRIES = 2 | |
| MAX_DESCRIPTION_LENGTH = 6000 # For GLM prompt generation - doubled for very detailed descriptions | |
| # Backend settings will be applied when GPU is available (inside @spaces.GPU functions) | |
| # Don't set them here to avoid CUDA initialization at module load time | |
| # Singleton clients with timeout and retry | |
| _deepseek_client: Optional[OpenAI] = None | |
| _glm_client: Optional[OpenAI] = None | |
| def get_deepseek_client() -> Optional[OpenAI]: | |
| """Get DeepSeek API client (singleton with timeout).""" | |
| global _deepseek_client | |
| if _deepseek_client is None: | |
| api_key = os.environ.get("DEEPSEEK_API_KEY") | |
| if not api_key: | |
| logger.warning("DEEPSEEK_API_KEY not configured") | |
| return None | |
| _deepseek_client = OpenAI( | |
| base_url="https://api.deepseek.com", | |
| api_key=api_key, | |
| timeout=API_TIMEOUT, | |
| max_retries=API_MAX_RETRIES, | |
| ) | |
| return _deepseek_client | |
| def polish_prompt(original_prompt: str, mode: str = "generate") -> str: | |
| """Expand short prompts into detailed, high-quality prompts using deepseek-reasoner.""" | |
| logger.info(f"polish_prompt called: mode={mode}, prompt_len={len(original_prompt) if original_prompt else 0}") | |
| if not original_prompt or not original_prompt.strip(): | |
| logger.info("polish_prompt: empty input, using default") | |
| if mode == "transform": | |
| return "high quality, enhanced details, professional finish" | |
| return "Ultra HD, 4K, cinematic composition, highly detailed" | |
| client = get_deepseek_client() | |
| if not client: | |
| logger.warning("polish_prompt: DeepSeek client not available, returning original") | |
| return original_prompt | |
| if mode == "transform": | |
| system_prompt = """ROLE: Silent image prompt writer. You output ONLY the final prompt text. | |
| INPUT: User describes a transformation. | |
| OUTPUT: A descriptive image prompt for the RESULT (50-800 tokens). | |
| ABSOLUTE RULES - FOLLOW SILENTLY, NEVER MENTION: | |
| - Write as if describing a finished photograph or artwork | |
| - Use present tense ("features", "displays", "shows") | |
| - Include: style, colors, lighting, textures, mood, composition | |
| - Maximum 800 tokens | |
| FORBIDDEN IN OUTPUT: | |
| - ANY discussion of rules or instructions | |
| - ANY meta-commentary ("Here is", "I will", "The prompt") | |
| - ANY reasoning or thinking text | |
| - ANY explanation of choices | |
| YOUR OUTPUT IS THE PROMPT ITSELF. NOTHING BEFORE. NOTHING AFTER. | |
| WRONG OUTPUT EXAMPLES: | |
| "Rule says NO action verbs like transform, but descriptive verbs..." | |
| "Here is the refined prompt: A beautiful..." | |
| "I'll describe this as..." | |
| CORRECT OUTPUT EXAMPLE: | |
| "A serene mountain landscape bathed in golden hour light, soft mist rolling through pine valleys, impressionist oil painting style with visible brushstrokes, warm amber and soft blue color palette, ethereal atmosphere" | |
| NOW OUTPUT ONLY THE IMAGE PROMPT:""" | |
| else: | |
| system_prompt = """ROLE: Silent image prompt writer. You output ONLY the final prompt text. | |
| INPUT: User provides a concept or idea. | |
| OUTPUT: A detailed, expressive image prompt (50-800 tokens). | |
| ABSOLUTE RULES - FOLLOW SILENTLY, NEVER MENTION: | |
| - Be descriptive: subject, lighting, atmosphere, style, composition, fine details | |
| - Use vivid, specific, evocative language | |
| - Include artistic style references when appropriate | |
| - Add technical quality terms: resolution, rendering quality, detail level | |
| - Maximum 800 tokens | |
| FORBIDDEN IN OUTPUT: | |
| - ANY discussion of rules or instructions | |
| - ANY meta-commentary ("Here is", "I will", "The prompt") | |
| - ANY reasoning or thinking text | |
| - ANY explanation of choices | |
| YOUR OUTPUT IS THE PROMPT ITSELF. NOTHING BEFORE. NOTHING AFTER. | |
| WRONG OUTPUT EXAMPLES: | |
| "Following the guidelines, I will create..." | |
| "Here is the enhanced prompt: A beautiful..." | |
| "Let me think about how to describe..." | |
| CORRECT OUTPUT EXAMPLE: | |
| "A majestic snow leopard perched on a rocky outcrop, piercing blue eyes gazing into the distance, soft morning light filtering through mountain mist, hyperrealistic digital art, intricate fur detail, cinematic composition, 8K resolution" | |
| NOW OUTPUT ONLY THE IMAGE PROMPT:""" | |
| try: | |
| response = client.chat.completions.create( | |
| model="deepseek-reasoner", | |
| max_tokens=800, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": original_prompt} | |
| ], | |
| ) | |
| msg = response.choices[0].message | |
| content = msg.content if msg.content else "" | |
| logger.info(f"polish_prompt API response: content_len={len(content)}, has_reasoning={hasattr(msg, 'reasoning_content') and bool(msg.reasoning_content)}") | |
| # If content is empty, try to extract final answer from reasoning_content | |
| if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| text = msg.reasoning_content.strip() | |
| paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] | |
| if paragraphs: | |
| content = paragraphs[-1] | |
| logger.info(f"polish_prompt: extracted from reasoning_content, len={len(content)}") | |
| if content: | |
| content = content.strip().replace("\n", " ") | |
| if "<think>" in content: | |
| content = content.split("</think>")[-1].strip() | |
| if content.startswith('"') and content.endswith('"'): | |
| content = content[1:-1] | |
| max_words = 800 # 800 tokens limit for all modes (Global rule) | |
| words = content.split() | |
| if len(words) > max_words: | |
| content = " ".join(words[:max_words]) | |
| logger.info(f"polish_prompt SUCCESS: enhanced from {len(original_prompt)} to {len(content)} chars") | |
| return content | |
| logger.warning(f"polish_prompt: no content extracted, returning original prompt") | |
| return original_prompt | |
| except Exception as e: | |
| logger.error(f"polish_prompt FAILED: {type(e).__name__}: {str(e)}") | |
| return original_prompt | |
| # GLM-4V Vision AI functions (runs on CPU - API calls) | |
| def get_glm_client() -> Optional[OpenAI]: | |
| """Get GLM API client (singleton with timeout).""" | |
| global _glm_client | |
| if _glm_client is None: | |
| api_key = os.environ.get("GLM_API_KEY") | |
| if not api_key: | |
| return None | |
| _glm_client = OpenAI( | |
| base_url="https://api.z.ai/api/paas/v4", | |
| api_key=api_key, | |
| timeout=API_TIMEOUT, | |
| max_retries=API_MAX_RETRIES, | |
| ) | |
| return _glm_client | |
| def encode_image_base64(image: Optional[Image.Image]) -> Optional[str]: | |
| """Convert PIL image to base64 with proper memory cleanup.""" | |
| if image is None: | |
| return None | |
| buf = io.BytesIO() | |
| try: | |
| image.save(buf, format='JPEG', quality=90) # JPEG is faster for API calls | |
| buf.seek(0) | |
| return base64.b64encode(buf.getvalue()).decode('utf-8') | |
| finally: | |
| buf.close() | |
| def clean_glm_response(text: str) -> str: | |
| """Remove GLM special tokens and clean up text.""" | |
| if not text: | |
| return "" | |
| text = text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '') | |
| text = text.strip() | |
| return text | |
| def is_thinking_text(text: str) -> bool: | |
| """Check if text looks like GLM thinking/reasoning rather than actual content.""" | |
| if not text: | |
| return True | |
| text_lower = text.lower().strip() | |
| # Reject if starts with planning/markdown headers | |
| planning_starts = ( | |
| '**plan', '## plan', '# plan', 'plan:', | |
| '**step', '## step', '# step', | |
| '**analysis', '**approach', '**strategy', | |
| 'here is my', 'here\'s my', | |
| ) | |
| if any(text_lower.startswith(pat) for pat in planning_starts): | |
| return True | |
| # Reject if starts with clear meta-language | |
| thinking_starts = ( | |
| 'let me ', 'i need to', 'i should ', 'i will ', "i'll ", | |
| 'got it', 'okay, ', 'okay ', 'alright, ', 'alright ', | |
| 'the user ', 'the request ', 'based on ', 'following the ', | |
| 'now i ', 'my prompt ', 'for this task', 'considering ', | |
| 'understood', 'i understand', 'sure, ', 'sure ', | |
| '1. ', '1) ', # Numbered lists = planning | |
| ) | |
| if any(text_lower.startswith(pat) for pat in thinking_starts): | |
| return True | |
| # Check for planning phrases ANYWHERE in text (these are NEVER in good prompts) | |
| planning_phrases = ( | |
| 'i need to describe', 'i should ', 'i\'ll describe', 'i\'ll keep', | |
| 'i will describe', 'i will keep', 'this includes', | |
| 'the key change', 'key part of the scene', 'is a defining feature', | |
| 'is crucial', 'is important', 'should remain', 'should be', | |
| '**main subject:**', '**weapon:**', '**setting:**', '**mood:**', | |
| '**colors', '**lighting', '**plan:**', | |
| ) | |
| if any(phrase in text_lower for phrase in planning_phrases): | |
| return True | |
| return False | |
| def analyze_image_with_glm(image: Optional[Image.Image]) -> str: | |
| """Analyze image using GLM-4V and return description. | |
| FIXED: Removed double filtering, lowered thresholds, added debug logging. | |
| """ | |
| if image is None: | |
| return "Please upload an image first." | |
| client = get_glm_client() | |
| if not client: | |
| return "GLM API key not configured. Please add GLM_API_KEY to space secrets." | |
| try: | |
| base64_image = encode_image_base64(image) | |
| response = client.chat.completions.create( | |
| model="glm-4.6v-flash", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} | |
| }, | |
| { | |
| "type": "text", | |
| "text": """Describe this image in EXTREME DETAIL. | |
| GLOBAL RULE: Your output text CANNOT exceed 4000 TOKENS. This is a strict limit. | |
| START directly with the main subject. NO preambles like "This image shows" or "I can see". | |
| DESCRIBE EVERYTHING VISIBLE IN EXHAUSTIVE DETAIL: | |
| - MAIN SUBJECT: Full description (shape, form, position, action, every visible feature) | |
| - If people/animals: face, hair, expression, pose, clothing details, accessories, skin/fur texture | |
| - If objects: material, condition, design, function, wear marks, reflections | |
| - If landscape/architecture: structures, terrain, scale, perspective, distance | |
| - COLORS: Specific hues ("deep cobalt blue", "warm amber gold", "dusty rose pink") | |
| - LIGHTING: Source, direction, quality, shadows, highlights, reflections, time of day | |
| - TEXTURES: Every material (silk, velvet, metal, wood, stone, glass, skin, fur, water) | |
| - SETTING/ENVIRONMENT: Location type, surroundings, furniture, props, floor, walls, ceiling | |
| - BACKGROUND: Everything behind main subject - other people, objects, architecture | |
| - FOREGROUND: Elements in front or nearby | |
| - ATMOSPHERE: Mood, emotion, weather, season, energy | |
| - SMALL DETAILS: Patterns, embroidery, buttons, jewelry, stitching, imperfections | |
| Describe EVERY person, object, and detail visible. Do NOT summarize - describe each element individually. Use all available space up to 4000 tokens maximum.""" | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=4000, | |
| ) | |
| msg = response.choices[0].message | |
| raw_content = msg.content if msg.content else "" | |
| # Debug logging | |
| logger.debug(f"GLM Analyze: raw content length={len(raw_content)}") | |
| if raw_content: | |
| logger.debug(f"GLM Analyze preview: {raw_content[:200]}...") | |
| # For image descriptions, use the FULL content (don't split by paragraphs) | |
| # Only apply minimal cleaning | |
| result = clean_glm_response(raw_content) | |
| # Remove common bad starts but keep the rest | |
| bad_starts = ('here is', 'here\'s', 'the image shows', 'this image', 'i can see') | |
| result_lower = result.lower() | |
| for bad in bad_starts: | |
| if result_lower.startswith(bad): | |
| # Find the first period or comma and start after it | |
| for i, c in enumerate(result): | |
| if c in '.,:' and i < 50: | |
| result = result[i+1:].strip() | |
| break | |
| break | |
| # Strip quotes | |
| result = result.strip('"\'""') | |
| # If content is too short, try reasoning_content | |
| if len(result) < 100: | |
| if hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| reasoning = clean_glm_response(msg.reasoning_content) | |
| # Take the longest paragraph from reasoning as fallback | |
| paragraphs = [p.strip() for p in reasoning.split('\n\n') if len(p.strip()) > 50] | |
| if paragraphs: | |
| longest = max(paragraphs, key=len) | |
| if len(longest) > len(result): | |
| result = longest.strip('"\'""') | |
| logger.debug(f"GLM Analyze: using reasoning content ({len(result)} chars)") | |
| if result and len(result) >= 50: | |
| logger.info(f"GLM Analyze: success ({len(result)} chars)") | |
| return result | |
| error_details = f"content_len={len(raw_content)}" | |
| logger.warning(f"GLM Analyze: result too short ({error_details})") | |
| return f"Description too short ({error_details}). Please try again." | |
| except Exception as e: | |
| logger.error(f"GLM Analyze exception: {type(e).__name__}: {str(e)}") | |
| return f"Error analyzing image: {str(e)}" | |
| def generate_prompt_with_glm(image_description: str, user_request: str, style: str) -> str: | |
| """Generate transformation prompt using DeepSeek Reasoner based on GLM image analysis. | |
| GLM is used ONLY for image analysis. DeepSeek Reasoner generates the final prompt. | |
| """ | |
| if not image_description or image_description.startswith("Please") or image_description.startswith("Error") or image_description.startswith("GLM API") or image_description.startswith("Could not"): | |
| return "Please analyze the image first." | |
| has_style = style and style != "None" | |
| has_request = user_request and user_request.strip() | |
| # Allow style-only generation (no user request needed if style is selected) | |
| if not has_request and not has_style: | |
| return "Please describe what changes you want or select a style." | |
| client = get_deepseek_client() | |
| if not client: | |
| return "DeepSeek API key not configured. Please add DEEPSEEK_API_KEY to space secrets." | |
| desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description | |
| # Get the full style details from STYLE_SUFFIXES | |
| style_details = STYLE_SUFFIXES.get(style, "").lstrip(", ").strip() if has_style else "" | |
| # Build the user message based on what's provided | |
| if has_style and has_request: | |
| # Both style and custom request | |
| user_content = f"""ORIGINAL IMAGE DESCRIPTION: | |
| {desc} | |
| STYLE TO APPLY: {style} | |
| STYLE DETAILS (use these painting techniques): {style_details} | |
| ADDITIONAL CHANGES REQUESTED: {user_request} | |
| Generate a prompt that transforms the image into this painting style while incorporating the requested changes.""" | |
| elif has_style: | |
| # Style only - no custom request | |
| user_content = f"""ORIGINAL IMAGE DESCRIPTION: | |
| {desc} | |
| STYLE TO APPLY: {style} | |
| STYLE DETAILS (use these painting techniques): {style_details} | |
| Generate a prompt that transforms this image into a {style}. Describe the scene as it would appear painted in this style, incorporating all the painting techniques and visual characteristics listed above.""" | |
| else: | |
| # Custom request only - no style | |
| user_content = f"""ORIGINAL IMAGE DESCRIPTION: | |
| {desc} | |
| REQUESTED CHANGES: {user_request} | |
| Generate a prompt that describes the transformed image.""" | |
| system_prompt = """You are an image prompt generator specialized in painting style transformations. Output ONLY the final prompt - nothing else. | |
| TASK: Generate a detailed image prompt that describes how the original image would look after transformation. | |
| When a PAINTING STYLE is specified (Van Gogh, Picasso, etc.): | |
| - You MUST incorporate ALL the painting technique details provided | |
| - Describe the scene AS A PAINTING with visible brushstrokes, paint textures, canvas texture | |
| - Include the specific color palette, brushwork style, and artistic characteristics of that painter | |
| - The output should clearly be a PAINTING, not a photo | |
| GLOBAL RULE: Your output text CANNOT exceed 4000 TOKENS. This is a strict limit. Output ONLY the prompt text itself. | |
| ABSOLUTELY FORBIDDEN - NEVER OUTPUT THESE: | |
| - Planning text: "Add textures:", "Include lighting:", "The key elements are:" | |
| - Meta-commentary: "Here is", "I will", "Let me", "The prompt" | |
| - Reasoning: "I should", "I need to", "First I'll" | |
| - Labels: "Textures:", "Colors:", "Mood:", "Style:" | |
| - Word counting or token counting | |
| - ANY text that is not the actual image prompt | |
| CORRECT OUTPUT EXAMPLE (Van Gogh style): | |
| "A post-impressionist oil painting of a ballroom scene in the style of Vincent van Gogh, elegant dancers rendered with thick impasto brushstrokes, swirling dynamic patterns in the ceiling and walls, vibrant cadmium yellows and cobalt blues in the chandeliers creating luminous glowing halos, visible canvas texture beneath bold expressive paint layers, the figures painted with short choppy directional marks, emotional intensity through saturated complementary colors" | |
| OUTPUT THE IMAGE PROMPT NOW - NOTHING ELSE:""" | |
| try: | |
| response = client.chat.completions.create( | |
| model="deepseek-reasoner", | |
| max_tokens=4000, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_content} | |
| ], | |
| ) | |
| msg = response.choices[0].message | |
| content = msg.content if msg.content else "" | |
| logger.info(f"DeepSeek Prompt: content_len={len(content)}, has_reasoning={hasattr(msg, 'reasoning_content') and bool(msg.reasoning_content)}") | |
| # If content is empty, try to extract from reasoning_content | |
| if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| text = msg.reasoning_content.strip() | |
| paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] | |
| if paragraphs: | |
| content = paragraphs[-1] | |
| logger.info(f"DeepSeek Prompt: extracted from reasoning ({len(content)} chars)") | |
| if content: | |
| content = content.strip().replace("\n", " ") | |
| # Remove any thinking tags | |
| if "<think>" in content: | |
| content = content.split("</think>")[-1].strip() | |
| # Remove quotes | |
| if content.startswith('"') and content.endswith('"'): | |
| content = content[1:-1] | |
| # Filter out leaked reasoning patterns | |
| reasoning_patterns = [ | |
| "Add textures", "Add lighting", "Add colors", "Add mood", "Add style", | |
| "Include textures", "Include lighting", "Include colors", | |
| "The key elements", "Key elements:", "Elements to include", | |
| "Here is the", "Here's the", "The prompt is", | |
| "I will", "I'll", "I should", "I need to", "Let me", | |
| "Textures:", "Colors:", "Mood:", "Style:", "Lighting:", | |
| "First,", "Second,", "Finally,", "Now,", | |
| ] | |
| content_lower = content.lower() | |
| for pattern in reasoning_patterns: | |
| if content_lower.startswith(pattern.lower()): | |
| # Find the actual prompt after the reasoning | |
| for sep in [':', '. ', '- ']: | |
| if sep in content[:100]: | |
| idx = content.find(sep) | |
| if idx > 0 and idx < 100: | |
| content = content[idx+len(sep):].strip() | |
| content_lower = content.lower() | |
| break | |
| # Remove any remaining label prefixes | |
| while content and content[0].isupper() and ':' in content[:30]: | |
| idx = content.find(':') | |
| if idx > 0 and idx < 30: | |
| content = content[idx+1:].strip() | |
| else: | |
| break | |
| # Truncate if too long (4000 tokens ~ 3000 words max) | |
| max_words = 3000 | |
| words = content.split() | |
| if len(words) > max_words: | |
| content = " ".join(words[:max_words]) | |
| logger.info(f"DeepSeek Prompt SUCCESS: {len(content)} chars") | |
| return content | |
| logger.warning("DeepSeek Prompt: no content extracted") | |
| return "Could not generate prompt. Please try again." | |
| except Exception as e: | |
| logger.error(f"DeepSeek Prompt exception: {type(e).__name__}: {str(e)}") | |
| return f"Error: {str(e)}" | |
| # ============================================================================= | |
| # ZEROGPU AOTI CONFIGURATION | |
| # ============================================================================= | |
| # Import the corrected AoTI compilation function | |
| from aoti import compile_transformer_aoti | |
| # Inductor configuration optimized for diffusion transformers | |
| INDUCTOR_CONFIGS = { | |
| "conv_1x1_as_mm": True, | |
| "epilogue_fusion": False, | |
| "coordinate_descent_tuning": True, | |
| "coordinate_descent_check_all_directions": True, | |
| "max_autotune": True, | |
| "triton.cudagraphs": False, # Disabled for ZeroGPU compatibility | |
| "shape_padding": True, | |
| } | |
| # Dynamic shapes bounds for Z-Image-Turbo (18 resolutions) | |
| # Latent dimensions = image_dim / 8 (VAE scale factor) | |
| # Sequence length = latent_h * latent_w | |
| MIN_SEQ_LEN = 15360 # 1536x640 -> 192x80 -> 15,360 | |
| MAX_SEQ_LEN = 65536 # 2048x2048 -> 256x256 -> 65,536 | |
| # Environment variable to enable/disable AoTI compilation | |
| # Disabled by default - Z-Image-Turbo transformer uses positional args (x, t, cap_feats) | |
| # which requires special handling in torch.export. Enable with ENABLE_AOTI=true once fixed. | |
| ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "false").lower() == "true" | |
| logger.info("Loading Z-Image-Turbo pipeline (CPU)...") | |
| pipe_t2i = DiffusionPipeline.from_pretrained( | |
| "Tongyi-MAI/Z-Image-Turbo", | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| # Don't move to CUDA here - ZeroGPU requires GPU ops inside @spaces.GPU functions | |
| # Create image-to-image pipeline (shares components) | |
| pipe_i2i = ZImageImg2ImgPipeline( | |
| transformer=pipe_t2i.transformer, | |
| vae=pipe_t2i.vae, | |
| text_encoder=pipe_t2i.text_encoder, | |
| tokenizer=pipe_t2i.tokenizer, | |
| scheduler=pipe_t2i.scheduler, | |
| ) | |
| # Track if pipelines have been moved to GPU and optimized | |
| _gpu_initialized = False | |
| def _ensure_gpu(): | |
| """Move pipelines to GPU and apply optimizations (called inside @spaces.GPU functions).""" | |
| global _gpu_initialized | |
| if _gpu_initialized: | |
| return | |
| try: | |
| # Check if CUDA is actually available | |
| if not torch.cuda.is_available(): | |
| logger.warning("[GPU] CUDA not available, waiting for ZeroGPU allocation...") | |
| return | |
| # Enable optimized backends | |
| torch.backends.cuda.enable_flash_sdp(True) | |
| torch.backends.cuda.enable_mem_efficient_sdp(True) | |
| torch.backends.cudnn.benchmark = True | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| torch.backends.cudnn.allow_tf32 = True | |
| # Move to GPU (check if already on GPU) | |
| if not next(pipe_t2i.transformer.parameters()).is_cuda: | |
| pipe_t2i.to("cuda") | |
| # Enable FlashAttention-3 via kernels library (H100/H200 Hopper GPUs) | |
| try: | |
| pipe_t2i.transformer.set_attention_backend("_flash_3_hub") | |
| logger.info("[GPU] FlashAttention-3 enabled via kernels library") | |
| except Exception as e: | |
| logger.warning(f"[GPU] FA3 not available, using default SDPA attention: {e}") | |
| # Enable torch.compile for VAE decoder (only if not already compiled) | |
| if not hasattr(pipe_t2i.vae, '_compiled'): | |
| try: | |
| pipe_t2i.vae.decode = torch.compile( | |
| pipe_t2i.vae.decode, | |
| mode="reduce-overhead", | |
| ) | |
| pipe_t2i.vae._compiled = True | |
| logger.info("[GPU] torch.compile enabled for VAE decoder") | |
| except Exception as e: | |
| logger.warning(f"[GPU] VAE torch.compile failed: {e}") | |
| _gpu_initialized = True | |
| logger.info("[GPU] Pipelines ready! (TF32 + FA3 + VAE compile)") | |
| except Exception as e: | |
| logger.error(f"[GPU] Initialization failed: {type(e).__name__}: {str(e)}") | |
| raise | |
| logger.info("Pipelines loaded on CPU - will move to GPU on first generation") | |
| STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art", | |
| "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi", | |
| "Van Gogh Painting", "Picasso Painting"] | |
| STYLE_SUFFIXES = { | |
| "None": "", | |
| "Photorealistic": ", photorealistic, ultra detailed, 8k, professional photography", | |
| "Cinematic": ", cinematic lighting, movie scene, dramatic atmosphere, film grain", | |
| "Anime": ", anime style, vibrant colors, cel shaded, studio ghibli inspired", | |
| "Digital Art": ", digital art, artstation trending, concept art, highly detailed", | |
| "Oil Painting": ", oil painting style, classical art, brush strokes visible", | |
| "Watercolor": ", watercolor painting, soft edges, artistic, delicate colors", | |
| "3D Render": ", 3D render, octane render, unreal engine 5, ray tracing", | |
| "Fantasy": ", fantasy art, magical, ethereal glow, mystical atmosphere", | |
| "Sci-Fi": ", science fiction, futuristic, advanced technology, neon accents", | |
| "Van Gogh Painting": ", THIS IS A PAINTING NOT A PHOTO, masterpiece oil painting on canvas in the style of Vincent van Gogh, post-impressionist painted artwork, thick heavy impasto oil paint texture with three-dimensional brushstroke relief, bold expressive painted brushstrokes in short choppy directional marks, swirling dynamic spiral patterns painted in sky and backgrounds, vibrant saturated complementary oil paint colors with cadmium yellows and cobalt blues and chrome greens, dark prussian blue painted outlines around forms inspired by Japanese ukiyo-e woodblock prints, emotional intensity through painted color symbolism, luminous glowing painted halos around light sources, cypress tree flame-like painted shapes, golden wheat field painted textures, starry night swirling cosmos painting aesthetic, cafe terrace warm lamplight painted glow, sunflower petal radiating painted strokes, visible canvas weave texture beneath oil paint layers, energetic rhythmic painted movement throughout composition, traditional painting medium, fine art painting", | |
| "Picasso Painting": ", THIS IS A PAINTING NOT A PHOTO, masterpiece oil painting on canvas in the style of Pablo Picasso, cubist painted artwork with bold black ink outlines, geometric painted fragmentation breaking forms into angular planes, multiple simultaneous painted viewpoints showing front and profile together, flat two-dimensional painted picture plane, analytic cubism monochromatic painted browns and grays with fragmented overlapping facets, synthetic cubism bright painted colors with collage-like flat shapes, African tribal mask influences with simplified angular painted facial features, blue period melancholic cobalt and prussian blue painted tones with elongated thin figures, rose period warm terracotta and pink ochre painted circus themes, Guernica stark black white and gray painted political intensity, bold confident painted brushwork with visible paint texture, stark contrasting painted light and shadow, simplified abstracted painted human forms with displaced features, traditional painting medium, fine art painting", | |
| } | |
| RATIOS = [ | |
| "1:1 Square (1024x1024)", "16:9 Landscape (1344x768)", "9:16 Portrait (768x1344)", | |
| "4:3 Standard (1152x896)", "3:4 Vertical (896x1152)", "21:9 Cinematic (1536x640)", | |
| "3:2 Photo (1216x832)", "2:3 Photo Portrait (832x1216)", "1:1 XL (1536x1536)", | |
| "16:9 XL (1920x1088)", "9:16 XL (1088x1920)", "4:3 XL (1536x1152)", | |
| "3:4 XL (1152x1536)", "1:1 MAX (2048x2048)", "16:9 MAX (2048x1152)", | |
| "9:16 MAX (1152x2048)", "4:3 MAX (2048x1536)", "3:4 MAX (1536x2048)", | |
| ] | |
| RATIO_DIMS = { | |
| "1:1 Square (1024x1024)": (1024, 1024), "16:9 Landscape (1344x768)": (1344, 768), | |
| "9:16 Portrait (768x1344)": (768, 1344), "4:3 Standard (1152x896)": (1152, 896), | |
| "3:4 Vertical (896x1152)": (896, 1152), "21:9 Cinematic (1536x640)": (1536, 640), | |
| "3:2 Photo (1216x832)": (1216, 832), "2:3 Photo Portrait (832x1216)": (832, 1216), | |
| "1:1 XL (1536x1536)": (1536, 1536), "16:9 XL (1920x1088)": (1920, 1088), | |
| "9:16 XL (1088x1920)": (1088, 1920), "4:3 XL (1536x1152)": (1536, 1152), | |
| "3:4 XL (1152x1536)": (1152, 1536), "1:1 MAX (2048x2048)": (2048, 2048), | |
| "16:9 MAX (2048x1152)": (2048, 1152), "9:16 MAX (1152x2048)": (1152, 2048), | |
| "4:3 MAX (2048x1536)": (2048, 1536), "3:4 MAX (1536x2048)": (1536, 2048), | |
| } | |
| EXAMPLES_GENERATE = [ | |
| ["Majestic phoenix rising from volcanic flames at midnight, ember particles swirling against a star-filled sky, wings of liquid gold and crimson fire", "Fantasy", "1:1 Square (1024x1024)", 9, 42, True], | |
| ["Underwater steampunk city with brass submarines and coral-covered clockwork towers, schools of glowing fish swimming through glass tunnels", "Digital Art", "9:16 Portrait (768x1344)", 9, 42, True], | |
| ["Street food vendor in a bustling night market, steam rising from sizzling woks, colorful paper lanterns illuminating weathered hands preparing dumplings", "Photorealistic", "4:3 Standard (1152x896)", 9, 42, True], | |
| ["Android geisha performing tea ceremony in a neon-lit zen garden, holographic cherry blossoms falling around chrome kimono", "Sci-Fi", "3:4 Vertical (896x1152)", 9, 42, True], | |
| ["Venetian masquerade ball at twilight, masked dancers in elaborate baroque costumes twirling beneath frescoed ceilings, candlelight reflecting off gilded mirrors and velvet drapes", "Oil Painting", "4:3 XL (1536x1152)", 9, 42, True], | |
| ["Colossal ancient tree growing through the ruins of a forgotten temple, roots wrapped around crumbling stone pillars, golden light filtering through the dense canopy as fireflies dance in the mist", "Cinematic", "16:9 XL (1920x1088)", 9, 42, True], | |
| ["Crystal ice palace floating above frozen tundra, aurora borealis casting ethereal green and purple ribbons across the polar sky, snow wolves howling on distant glaciers below", "Fantasy", "16:9 MAX (2048x1152)", 9, 42, True], | |
| ["Alchemist laboratory in a medieval tower, bubbling potions in glass vessels connected by copper tubes, scattered grimoires and astronomical instruments, moonlight streaming through a rose window casting prismatic shadows", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True], | |
| ] | |
| EXAMPLES_TRANSFORM = [ | |
| ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True], | |
| ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True], | |
| ["Japanese anime style with vibrant colors and cel shading", "Anime", 0.75, 9, 42, True], | |
| ["Digital concept art style, trending on artstation", "Digital Art", 0.6, 9, 42, True], | |
| ["Classical oil painting with visible brush strokes and rich colors", "Oil Painting", 0.7, 9, 42, True], | |
| ["Soft watercolor painting with delicate washes and gentle edges", "Watercolor", 0.65, 9, 42, True], | |
| ["High quality 3D render with ray tracing and realistic materials", "3D Render", 0.7, 9, 42, True], | |
| ["Magical fantasy art with ethereal glow and mystical atmosphere", "Fantasy", 0.65, 9, 42, True], | |
| ["Futuristic sci-fi style with neon accents and advanced technology", "Sci-Fi", 0.7, 9, 42, True], | |
| ["Enhanced version with improved details and quality", "None", 0.4, 9, 42, True], | |
| ] | |
| def upload_to_hf_cdn(image: Optional[Image.Image]) -> str: | |
| """Upload image to HuggingFace CDN with proper memory cleanup.""" | |
| if image is None: | |
| return "No image to share" | |
| buf = io.BytesIO() | |
| try: | |
| image.save(buf, format='PNG') | |
| buf.seek(0) | |
| response = requests.post( | |
| "https://huggingface.co/uploads", | |
| headers={"Content-Type": "image/png"}, | |
| data=buf.getvalue(), | |
| timeout=30, | |
| ) | |
| if response.status_code == 200: | |
| return response.text.strip() | |
| return f"Upload failed: {response.status_code}" | |
| except requests.Timeout: | |
| return "Upload timed out. Please try again." | |
| except Exception as e: | |
| logger.error(f"upload_to_hf_cdn failed: {type(e).__name__}: {str(e)}") | |
| return "Upload error. Please try again." | |
| finally: | |
| buf.close() | |
| def do_polish_prompt(prompt: str, style: str, do_polish: bool, mode: str = "generate") -> Tuple[str, str]: | |
| """Polish prompt before generation (runs on CPU, before GPU allocation).""" | |
| if not prompt or not prompt.strip(): | |
| return "", "" | |
| base_prompt = prompt.strip() | |
| if do_polish: | |
| polished = polish_prompt(base_prompt, mode=mode) | |
| else: | |
| polished = base_prompt | |
| final_prompt = polished + STYLE_SUFFIXES.get(style, "") | |
| return final_prompt, polished | |
| def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tuple[str, str]: | |
| """Polish prompt for transformation (style-focused). | |
| When a style is selected without a prompt, the style suffix contains all the | |
| information needed to guide the transformation. No prompt is required. | |
| """ | |
| style_suffix = STYLE_SUFFIXES.get(style, "") | |
| has_prompt = prompt and prompt.strip() | |
| has_style = style and style != "None" and style_suffix | |
| # Style-only transformation: use style as the complete transformation guide | |
| if not has_prompt and has_style: | |
| # Remove leading comma and space from suffix to make it a primary prompt | |
| style_prompt = style_suffix.lstrip(", ").strip() | |
| display = f"[{style} Style] {style_prompt[:100]}..." if len(style_prompt) > 100 else f"[{style} Style] {style_prompt}" | |
| logger.info(f"do_polish_transform_prompt: Style-only transform with {style}") | |
| return style_prompt, display | |
| # No prompt and no style - use generic enhancement | |
| if not has_prompt and not has_style: | |
| return "high quality image, enhanced details, professional quality", "" | |
| # Has prompt - proceed with normal flow | |
| if not do_polish: | |
| base = prompt.strip() | |
| final = base + style_suffix | |
| return final, "" | |
| return do_polish_prompt(prompt, style, True, mode="transform") | |
| # ============================================================================= | |
| # UNIFIED WRAPPER FUNCTIONS (Fix for race condition with gr.State) | |
| # These combine polish + generate/transform into single atomic operations | |
| # ============================================================================= | |
| def generate_with_polish(prompt: str, style: str, do_polish: bool, ratio: str, steps: int, seed: int, randomize: bool): | |
| """Unified generate with progress feedback using generator. | |
| Yields intermediate status updates with timer so user knows what's happening. | |
| Includes automatic retry for ZeroGPU allocation failures. | |
| """ | |
| logger.info(f"generate_with_polish: do_polish={do_polish}, style={style}, prompt_len={len(prompt) if prompt else 0}") | |
| # Start timer | |
| timer = GenerationTimer() | |
| timer.start() | |
| # Always yield initial status with animation | |
| if do_polish: | |
| yield None, create_status_html("Enhancing prompt with DeepSeek Reasoner", timer.format()), seed | |
| else: | |
| yield None, create_status_html("Preparing generation", timer.format()), seed | |
| full_prompt, polished_display = do_polish_prompt(prompt, style, do_polish, mode="generate") | |
| # Show whether enhancement was applied | |
| if do_polish and polished_display and polished_display != prompt: | |
| logger.info(f"generate_with_polish: Prompt+ applied successfully") | |
| elif do_polish: | |
| logger.warning(f"generate_with_polish: Prompt+ was enabled but enhancement unchanged") | |
| if not full_prompt.strip(): | |
| yield None, create_status_html("Empty prompt - please enter a description", timer.format(), is_generating=False).replace("✅", "❌"), seed | |
| return | |
| # Show status before GPU generation with the prompt that will be used | |
| yield None, create_status_html("Generating image", timer.format()), seed | |
| # GPU generation with automatic retry for ZeroGPU failures | |
| max_retries = 3 | |
| image = None | |
| used_seed = seed | |
| last_error = None | |
| for attempt in range(max_retries): | |
| try: | |
| image, used_seed = generate(full_prompt, polished_display, ratio, steps, seed, randomize) | |
| if image is not None: | |
| break # Success | |
| except RuntimeError as e: | |
| last_error = e | |
| error_msg = str(e).lower() | |
| if "cuda" in error_msg or "gpu" in error_msg or "driver" in error_msg: | |
| logger.warning(f"GPU allocation failed (attempt {attempt + 1}/{max_retries}): {e}") | |
| if attempt < max_retries - 1: | |
| import time | |
| time.sleep(1) # Brief pause before retry | |
| yield None, create_status_html(f"GPU busy, retrying ({attempt + 2}/{max_retries})", timer.format()), seed | |
| continue | |
| raise # Re-raise non-GPU errors | |
| if image is None and last_error: | |
| timer.stop() | |
| yield None, create_status_html(f"GPU unavailable after {max_retries} attempts. Please try again.", timer.format(), is_generating=False).replace("✅", "❌"), seed | |
| return | |
| # Stop timer and show final result | |
| timer.stop() | |
| final_display = polished_display if polished_display else full_prompt | |
| final_status = create_status_html(f"Generated in {timer.format()}", timer.format(), is_generating=False) | |
| yield image, final_status + f"\n\n{final_display}", used_seed | |
| def transform_with_polish(input_image: Optional[Image.Image], prompt: str, style: str, do_polish: bool, strength: float, steps: int, seed: int, randomize: bool): | |
| """Unified transform with progress feedback using generator. | |
| Yields intermediate status updates with timer so user knows what's happening. | |
| Includes automatic retry for ZeroGPU allocation failures. | |
| Style-only transformation: When a style is selected without a prompt, the style | |
| suffix contains all the information needed to guide the transformation. | |
| """ | |
| has_prompt = prompt and prompt.strip() | |
| has_style = style and style != "None" | |
| logger.info(f"transform_with_polish: do_polish={do_polish}, style={style}, has_prompt={has_prompt}, has_style={has_style}") | |
| # Start timer | |
| timer = GenerationTimer() | |
| timer.start() | |
| if input_image is None: | |
| yield None, create_status_html("Please upload an image first", timer.format(), is_generating=False).replace("✅", "❌"), 0 | |
| return | |
| # Show appropriate initial status based on transformation type | |
| if not has_prompt and has_style: | |
| # Style-only transformation | |
| yield None, create_status_html(f"Applying {style} style transformation", timer.format()), 0 | |
| elif do_polish and has_prompt: | |
| yield None, create_status_html("Enhancing prompt with DeepSeek Reasoner", timer.format()), 0 | |
| else: | |
| yield None, create_status_html("Preparing transformation", timer.format()), 0 | |
| full_prompt, polished_display = do_polish_transform_prompt(prompt, style, do_polish) | |
| # Show whether enhancement was applied | |
| if do_polish and polished_display and polished_display != prompt: | |
| logger.info(f"transform_with_polish: Prompt+ applied successfully") | |
| elif do_polish: | |
| logger.warning(f"transform_with_polish: Prompt+ was enabled but enhancement unchanged") | |
| # Show status before GPU transform | |
| yield None, create_status_html("Transforming image", timer.format()), 0 | |
| # GPU transform with automatic retry for ZeroGPU failures | |
| max_retries = 3 | |
| image = None | |
| used_seed = 0 | |
| last_error = None | |
| for attempt in range(max_retries): | |
| try: | |
| image, used_seed = transform(input_image, full_prompt, polished_display, strength, steps, seed, randomize) | |
| if image is not None: | |
| break # Success | |
| except RuntimeError as e: | |
| last_error = e | |
| error_msg = str(e).lower() | |
| if "cuda" in error_msg or "gpu" in error_msg or "driver" in error_msg: | |
| logger.warning(f"GPU allocation failed (attempt {attempt + 1}/{max_retries}): {e}") | |
| if attempt < max_retries - 1: | |
| import time | |
| time.sleep(1) # Brief pause before retry | |
| yield None, create_status_html(f"GPU busy, retrying ({attempt + 2}/{max_retries})", timer.format()), 0 | |
| continue | |
| raise # Re-raise non-GPU errors | |
| if image is None and last_error: | |
| timer.stop() | |
| yield None, create_status_html(f"GPU unavailable after {max_retries} attempts. Please try again.", timer.format(), is_generating=False).replace("✅", "❌"), 0 | |
| return | |
| # Stop timer and show final result | |
| timer.stop() | |
| final_display = polished_display if polished_display else full_prompt | |
| final_status = create_status_html(f"Transformed in {timer.format()}", timer.format(), is_generating=False) | |
| yield image, final_status + f"\n\n{final_display}", used_seed | |
| def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: | |
| """Generate image from text prompt.""" | |
| _ensure_gpu() | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt.strip(): | |
| return None, seed | |
| try: | |
| w, h = RATIO_DIMS.get(ratio, (1024, 1024)) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_t2i( | |
| prompt=full_prompt, | |
| height=h, | |
| width=w, | |
| num_inference_steps=int(steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_gen_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"Generation failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: | |
| """Transform image using prompt guidance.""" | |
| _ensure_gpu() | |
| if input_image is None: | |
| return None, 0 | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt.strip(): | |
| full_prompt = "high quality image, enhanced details" | |
| try: | |
| input_image = input_image.convert("RGB") | |
| w, h = input_image.size | |
| w = (w // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| h = (h // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| w = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, w)) | |
| h = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, h)) | |
| input_image = input_image.resize((w, h), Image.Resampling.BILINEAR) | |
| strength = float(strength) | |
| effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_i2i( | |
| prompt=full_prompt, | |
| image=input_image, | |
| strength=strength, | |
| num_inference_steps=effective_steps, | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_trans_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| import traceback | |
| logger.error(f"Transform failed: {type(e).__name__}: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| return None, seed | |
| # ============================================================================= | |
| # MCP-FRIENDLY WRAPPER FUNCTIONS | |
| # These functions expose all parameters directly for MCP server compatibility | |
| # ============================================================================= | |
| def mcp_generate(prompt: str, style: str = "None", ratio: str = "1:1 Square (1024x1024)", | |
| steps: int = 9, seed: int = 42, randomize: bool = True) -> Tuple[Optional[Image.Image], int]: | |
| """MCP-friendly image generation. Takes prompt directly and handles polish internally.""" | |
| _ensure_gpu() | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not prompt or not prompt.strip(): | |
| return None, seed | |
| # Apply style suffix | |
| full_prompt = prompt.strip() + STYLE_SUFFIXES.get(style, "") | |
| try: | |
| w, h = RATIO_DIMS.get(ratio, (1024, 1024)) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_t2i( | |
| prompt=full_prompt, | |
| height=h, | |
| width=w, | |
| num_inference_steps=int(steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_mcp_gen_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"MCP Generate failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| def mcp_transform(image: Optional[Image.Image], prompt: str, style: str = "None", | |
| strength: float = 0.6, steps: int = 9, seed: int = 42, | |
| randomize: bool = True) -> Tuple[Optional[Image.Image], int]: | |
| """MCP-friendly image transformation. Takes all parameters directly.""" | |
| _ensure_gpu() | |
| if image is None: | |
| return None, 0 | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| # Apply style suffix | |
| full_prompt = (prompt.strip() if prompt else "high quality image") + STYLE_SUFFIXES.get(style, "") | |
| try: | |
| image = image.convert("RGB") | |
| w, h = image.size | |
| w = (w // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| h = (h // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| w = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, w)) | |
| h = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, h)) | |
| image = image.resize((w, h), Image.Resampling.BILINEAR) | |
| strength = float(strength) | |
| effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| result = pipe_i2i( | |
| prompt=full_prompt, | |
| image=image, | |
| strength=strength, | |
| num_inference_steps=effective_steps, | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_mcp_trans_{seed}.png") | |
| result.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"MCP Transform failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| css = r""" | |
| /* Google Fonts for multilingual support */ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+Arabic:wght@400;500;600;700&family=Noto+Sans+Devanagari:wght@400;500;600;700&display=swap'); | |
| :root { | |
| --bg-primary: #0c0c0e; | |
| --bg-secondary: #141416; | |
| --bg-tertiary: #1c1c20; | |
| --surface: #232328; | |
| --surface-hover: #2a2a30; | |
| --accent-primary: #818cf8; | |
| --accent-secondary: #a78bfa; | |
| --accent-hover: #6366f1; | |
| --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); | |
| --accent-glow: rgba(99, 102, 241, 0.4); | |
| --text-primary: #f4f4f5; | |
| --text-secondary: #a1a1aa; | |
| --text-muted: #71717a; | |
| --border-subtle: rgba(255, 255, 255, 0.08); | |
| --border-default: rgba(255, 255, 255, 0.12); | |
| --success: #10b981; | |
| --warning: #f59e0b; | |
| --error: #ef4444; | |
| --shadow-sm: 0 1px 2px rgba(0,0,0,0.3); | |
| --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.4); | |
| --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.5); | |
| --shadow-glow: 0 0 20px var(--accent-glow); | |
| --radius-sm: 8px; | |
| --radius-md: 12px; | |
| --radius-lg: 16px; | |
| --transition: 0.2s ease; | |
| /* Font stacks */ | |
| --font-latin: 'Inter', -apple-system, BlinkMacSystemFont, system-ui, sans-serif; | |
| --font-arabic: 'Noto Sans Arabic', 'Tahoma', sans-serif; | |
| --font-hindi: 'Noto Sans Devanagari', 'Mangal', sans-serif; | |
| } | |
| /* Arabic font */ | |
| .lang-ar, .lang-ar * { font-family: var(--font-arabic) !important; } | |
| /* Hindi font */ | |
| .lang-hi, .lang-hi * { font-family: var(--font-hindi) !important; } | |
| /* RTL Support for Arabic */ | |
| [dir="rtl"], .rtl { direction: rtl; text-align: right; } | |
| [dir="rtl"] .tab-nav { flex-direction: row-reverse; } | |
| [dir="rtl"] .gr-row, [dir="rtl"] [class*="row"] { flex-direction: row-reverse; } | |
| [dir="rtl"] input, [dir="rtl"] textarea { text-align: right; direction: rtl; } | |
| [dir="rtl"] input[type="number"] { direction: ltr; text-align: left; } | |
| [dir="rtl"] label, [dir="rtl"] .gr-label { text-align: right; } | |
| [dir="rtl"] .gr-checkbox { flex-direction: row-reverse; } | |
| [dir="rtl"] .gr-slider { direction: ltr; } | |
| [dir="rtl"] .gr-markdown ul, [dir="rtl"] .gr-markdown ol { padding-left: 0; padding-right: 1.5em; } | |
| /* Language selector in header */ | |
| .lang-selector-row { display: flex; justify-content: flex-end; margin-bottom: 8px; } | |
| [dir="rtl"] .lang-selector-row { justify-content: flex-start; } | |
| .gradio-container { | |
| background: var(--bg-primary) !important; | |
| min-height: 100vh; | |
| color: var(--text-primary); | |
| } | |
| .tabs { background: transparent !important; padding: 8px 0; } | |
| .tab-nav { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg); | |
| padding: 6px; | |
| gap: 6px; | |
| margin-bottom: 20px; | |
| display: flex; | |
| justify-content: center; | |
| flex-wrap: wrap; | |
| } | |
| .tab-nav > button { | |
| background: transparent !important; | |
| color: var(--text-secondary) !important; | |
| border: none !important; | |
| border-radius: var(--radius-md); | |
| padding: 12px 24px; | |
| font-weight: 500; | |
| font-size: 0.95rem; | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| } | |
| .tab-nav > button:hover { | |
| background: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| .tab-nav > button.selected, | |
| .tab-nav > button[aria-selected="true"], | |
| [role="tab"][aria-selected="true"] { | |
| background: var(--accent-gradient) !important; | |
| color: white !important; | |
| font-weight: 600; | |
| box-shadow: var(--shadow-glow); | |
| } | |
| button.primary, .primary { | |
| background: var(--accent-gradient) !important; | |
| border: none !important; | |
| border-radius: var(--radius-md); | |
| font-weight: 600; | |
| padding: 12px 24px; | |
| color: white !important; | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| box-shadow: var(--shadow-md); | |
| } | |
| button.primary:hover, .primary:hover { | |
| box-shadow: var(--shadow-glow), var(--shadow-lg); | |
| filter: brightness(1.1); | |
| } | |
| button.secondary, .secondary { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm); | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| } | |
| button.secondary:hover, .secondary:hover { | |
| background: var(--surface-hover) !important; | |
| border-color: var(--accent-primary) !important; | |
| } | |
| .block { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg) !important; | |
| box-shadow: var(--shadow-sm); | |
| padding: 20px; | |
| margin: 8px 0; | |
| transition: all var(--transition); | |
| } | |
| .tabitem { background: transparent !important; padding: 16px 0; } | |
| input, textarea, .gr-input, .gr-textbox textarea { | |
| background: var(--bg-tertiary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| color: var(--text-primary) !important; | |
| transition: all var(--transition); | |
| } | |
| input:focus, textarea:focus { | |
| border-color: var(--accent-primary) !important; | |
| box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; | |
| outline: none !important; | |
| } | |
| .gr-dropdown, select { | |
| background: var(--bg-tertiary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| .gr-slider input[type="range"] { accent-color: var(--accent-primary); } | |
| /* Enhanced checkbox styling for clear checked state */ | |
| .gr-checkbox, | |
| .gr-form > div:has(input[type="checkbox"]) { | |
| cursor: pointer; | |
| padding: 8px 12px !important; | |
| margin: 4px 0 !important; | |
| border-radius: var(--radius-sm); | |
| background: transparent; | |
| transition: all var(--transition); | |
| display: flex !important; | |
| align-items: center !important; | |
| gap: 10px !important; | |
| } | |
| .gr-checkbox:hover { background: rgba(129, 140, 248, 0.1) !important; } | |
| .gr-checkbox input[type="checkbox"], | |
| input[type="checkbox"] { | |
| width: 20px !important; | |
| height: 20px !important; | |
| min-width: 20px !important; | |
| min-height: 20px !important; | |
| accent-color: #a78bfa !important; | |
| cursor: pointer !important; | |
| pointer-events: auto !important; | |
| border: 2px solid var(--border-default) !important; | |
| border-radius: 4px !important; | |
| background: var(--bg-tertiary) !important; | |
| transition: all 0.15s ease !important; | |
| } | |
| .gr-checkbox input[type="checkbox"]:hover, | |
| input[type="checkbox"]:hover { | |
| border-color: var(--accent-primary) !important; | |
| background: var(--surface) !important; | |
| } | |
| .gr-checkbox input[type="checkbox"]:focus, | |
| input[type="checkbox"]:focus { | |
| outline: none !important; | |
| border-color: var(--accent-primary) !important; | |
| box-shadow: 0 0 0 3px rgba(129, 140, 248, 0.3) !important; | |
| } | |
| /* CHECKED STATE - Highly visible with glow */ | |
| .gr-checkbox input[type="checkbox"]:checked, | |
| input[type="checkbox"]:checked { | |
| background: linear-gradient(135deg, #818cf8 0%, #a78bfa 100%) !important; | |
| border-color: #a78bfa !important; | |
| box-shadow: | |
| 0 0 12px rgba(167, 139, 250, 0.6), | |
| 0 0 4px rgba(129, 140, 248, 0.8), | |
| inset 0 0 0 1px rgba(255, 255, 255, 0.2) !important; | |
| } | |
| .gr-checkbox input[type="checkbox"]:checked:hover, | |
| input[type="checkbox"]:checked:hover { | |
| background: linear-gradient(135deg, #a78bfa 0%, #c4b5fd 100%) !important; | |
| border-color: #c4b5fd !important; | |
| box-shadow: | |
| 0 0 16px rgba(196, 181, 253, 0.7), | |
| 0 0 6px rgba(167, 139, 250, 0.9) !important; | |
| } | |
| .gr-checkbox:has(input[type="checkbox"]:checked) { | |
| background: rgba(129, 140, 248, 0.15) !important; | |
| border: 1px solid rgba(167, 139, 250, 0.3) !important; | |
| } | |
| .gr-checkbox:has(input[type="checkbox"]:checked) label, | |
| .gr-checkbox:has(input[type="checkbox"]:checked) span { | |
| color: var(--text-primary) !important; | |
| } | |
| .gr-checkbox label, | |
| .gr-checkbox span, | |
| input[type="checkbox"] + span { | |
| color: var(--text-secondary) !important; | |
| cursor: pointer !important; | |
| user-select: none !important; | |
| } | |
| label, .gr-label { color: var(--text-secondary) !important; font-weight: 500; } | |
| .gr-image, .image-container { | |
| background: var(--bg-tertiary) !important; | |
| border: 2px dashed var(--border-default) !important; | |
| border-radius: var(--radius-lg) !important; | |
| transition: all var(--transition); | |
| } | |
| .gr-image:hover { border-color: var(--accent-primary) !important; } | |
| .gr-image img { border-radius: var(--radius-md); } | |
| /* Examples table - Dark theme (stable selectors only) */ | |
| .examples, .gr-examples, [class*="example"], [class*="Example"], | |
| div[class*="example"], div[class*="sample"], .sample-table, | |
| [data-testid="examples"], [data-testid*="example"] { | |
| background: var(--bg-secondary) !important; | |
| border-radius: var(--radius-lg) !important; | |
| } | |
| /* Table itself */ | |
| .examples table, .gr-examples table, [class*="example"] table, | |
| [data-testid="examples"] table { | |
| background: var(--bg-secondary) !important; | |
| border-collapse: collapse !important; | |
| width: 100% !important; | |
| } | |
| /* All rows */ | |
| .examples tr, .gr-examples tr, [class*="example"] tr, | |
| [data-testid="examples"] tr { | |
| background: var(--bg-secondary) !important; | |
| border-bottom: 1px solid var(--border-default) !important; | |
| } | |
| /* Row hover */ | |
| .examples tr:hover, .gr-examples tr:hover, [class*="example"] tr:hover, | |
| [data-testid="examples"] tr:hover { | |
| background: var(--surface) !important; | |
| } | |
| /* Table cells */ | |
| .examples td, .gr-examples td, [class*="example"] td, | |
| [data-testid="examples"] td { | |
| color: var(--text-secondary) !important; | |
| background: transparent !important; | |
| } | |
| /* First column (prompts) - emphasized */ | |
| .examples td:first-child, [class*="example"] td:first-child, | |
| [data-testid="examples"] td:first-child { | |
| color: var(--text-primary) !important; | |
| font-weight: 500 !important; | |
| } | |
| /* Headers */ | |
| .examples th, .gr-examples th, [class*="example"] th, | |
| [data-testid="examples"] th { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| font-weight: 600 !important; | |
| border-bottom: 1px solid var(--border-default) !important; | |
| } | |
| /* Wrapper divs */ | |
| .examples > div, [class*="example"] > div { | |
| background: var(--bg-secondary) !important; | |
| } | |
| h1, h2, h3, h4 { color: var(--text-primary) !important; } | |
| h1 { font-size: clamp(1.5rem, 4vw, 2.2rem); font-weight: 700; } | |
| .markdown-text, .gr-markdown { color: var(--text-secondary) !important; } | |
| .gr-markdown a { color: var(--accent-primary) !important; } | |
| .gr-group { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg) !important; | |
| padding: 16px !important; | |
| } | |
| .gr-accordion { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-md) !important; | |
| } | |
| .footer-no-box { background: transparent !important; border: none !important; box-shadow: none !important; padding: 0; } | |
| .gradio-container > footer { | |
| background: var(--bg-secondary) !important; | |
| border-top: 1px solid var(--border-subtle) !important; | |
| padding: 12px 20px; | |
| } | |
| .gradio-container > footer span, .gradio-container > footer p { color: var(--text-muted) !important; } | |
| .gradio-container > footer a { color: var(--accent-primary) !important; } | |
| .progress-bar { background: var(--bg-tertiary) !important; border-radius: 4px; } | |
| .progress-bar > div { background: var(--accent-gradient) !important; border-radius: 4px; } | |
| /* ============================================ | |
| GENERATING IMAGE LOADING ANIMATIONS | |
| ============================================ */ | |
| @keyframes status-pulse { | |
| 0%, 100% { | |
| opacity: 1; | |
| text-shadow: 0 0 4px rgba(129, 140, 248, 0.4), 0 0 8px rgba(129, 140, 248, 0.2); | |
| } | |
| 50% { | |
| opacity: 0.7; | |
| text-shadow: 0 0 8px rgba(129, 140, 248, 0.6), 0 0 20px rgba(167, 139, 250, 0.4); | |
| } | |
| } | |
| @keyframes spinner-rotate { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| @keyframes glow-pulse { | |
| 0%, 100% { opacity: 0.5; transform: scale(1); } | |
| 50% { opacity: 0.8; transform: scale(1.02); } | |
| } | |
| /* Generation status container */ | |
| .generation-status { | |
| padding: 16px 20px; | |
| border-radius: var(--radius-md); | |
| margin: 8px 0; | |
| transition: all 0.3s ease; | |
| } | |
| .generation-status.generating { | |
| background: linear-gradient(135deg, rgba(99, 102, 241, 0.15) 0%, rgba(139, 92, 246, 0.1) 100%); | |
| border: 1px solid rgba(129, 140, 248, 0.3); | |
| box-shadow: 0 0 20px rgba(129, 140, 248, 0.2); | |
| } | |
| .generation-status.complete { | |
| background: linear-gradient(135deg, rgba(16, 185, 129, 0.15) 0%, rgba(52, 211, 153, 0.1) 100%); | |
| border: 1px solid rgba(16, 185, 129, 0.3); | |
| } | |
| .generation-status .status-content { | |
| display: flex; | |
| align-items: center; | |
| gap: 14px; | |
| } | |
| .generation-status .status-text-container { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 4px; | |
| } | |
| .generation-status .status-text { | |
| color: var(--accent-primary); | |
| font-weight: 600; | |
| font-size: 1rem; | |
| animation: status-pulse 2s ease-in-out infinite; | |
| } | |
| .generation-status .status-timer { | |
| color: var(--text-muted); | |
| font-size: 0.85rem; | |
| font-family: monospace; | |
| } | |
| .generation-status .status-complete { | |
| color: var(--success); | |
| font-weight: 600; | |
| font-size: 1rem; | |
| } | |
| .generation-status .status-timer-final { | |
| color: var(--text-secondary); | |
| font-size: 0.9rem; | |
| font-family: monospace; | |
| margin-left: auto; | |
| } | |
| /* Dual-ring spinner */ | |
| .generating-spinner-dual { | |
| display: inline-block; | |
| position: relative; | |
| width: 28px; | |
| height: 28px; | |
| flex-shrink: 0; | |
| } | |
| .generating-spinner-dual::before, | |
| .generating-spinner-dual::after { | |
| content: ''; | |
| position: absolute; | |
| inset: 0; | |
| border-radius: 50%; | |
| border: 3px solid transparent; | |
| } | |
| .generating-spinner-dual::before { | |
| border-top-color: var(--accent-primary); | |
| animation: spinner-rotate 1.2s linear infinite; | |
| } | |
| .generating-spinner-dual::after { | |
| border-bottom-color: var(--accent-secondary); | |
| animation: spinner-rotate 0.9s linear reverse infinite; | |
| } | |
| /* Image container glow while generating */ | |
| .generating .gr-image::after { | |
| content: ''; | |
| position: absolute; | |
| inset: -8px; | |
| border-radius: inherit; | |
| background: var(--accent-gradient); | |
| filter: blur(20px); | |
| opacity: 0.3; | |
| animation: glow-pulse 2s ease-in-out infinite; | |
| z-index: -1; | |
| pointer-events: none; | |
| } | |
| @media (prefers-reduced-motion: reduce) { | |
| *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } | |
| .generation-status .status-text { animation: none; text-shadow: 0 0 8px rgba(129, 140, 248, 0.5); } | |
| } | |
| @media (max-width: 768px) { | |
| .tab-nav { padding: 4px; gap: 4px; } | |
| .tab-nav > button { padding: 10px 16px; font-size: 0.85rem; } | |
| .block { padding: 12px; margin: 6px 0; } | |
| button.primary { padding: 10px 16px; width: 100%; } | |
| h1 { font-size: 1.4rem !important; } | |
| } | |
| /* Accessibility - keyboard focus indicators */ | |
| button:focus-visible, input:focus-visible, textarea:focus-visible, | |
| select:focus-visible, [role="button"]:focus-visible { | |
| outline: 2px solid var(--accent-primary) !important; | |
| outline-offset: 2px !important; | |
| } | |
| .gr-image:focus-visible, [role="tab"]:focus-visible { | |
| outline: 2px solid var(--accent-primary) !important; | |
| outline-offset: 2px !important; | |
| } | |
| ::-webkit-scrollbar { width: 8px; height: 8px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-secondary); } | |
| ::-webkit-scrollbar-thumb { background: var(--bg-tertiary); border-radius: 4px; } | |
| ::-webkit-scrollbar-thumb:hover { background: var(--surface); } | |
| /* Tab navigation text */ | |
| .tab-nav button, .tab-nav > button, button[role="tab"], .tabs button { color: var(--text-primary) !important; } | |
| /* Labels and spans */ | |
| label, .gr-label, .label-wrap, .label-wrap span, .gr-box label, .gr-form label, .gr-group label { color: var(--text-secondary) !important; } | |
| .gr-block span, .gr-box span, .gr-form span, .gr-group span, .block span { color: var(--text-secondary) !important; } | |
| /* Table overrides */ | |
| table thead, table thead tr, table thead th, [class*="examples"] thead th { background: var(--surface) !important; color: var(--text-primary) !important; } | |
| table tbody td, [class*="examples"] td { color: var(--text-secondary) !important; } | |
| /* Accordion and markdown */ | |
| .gr-accordion summary, .gr-accordion button, details summary, summary span { color: var(--text-primary) !important; } | |
| .gr-markdown, .gr-markdown p, .gr-markdown li, .markdown-text, .prose { color: var(--text-secondary) !important; } | |
| /* Input placeholders and buttons */ | |
| input::placeholder, textarea::placeholder { color: var(--text-muted) !important; } | |
| button.secondary, .secondary { color: var(--text-primary) !important; } | |
| /* Dropdown menus - dark theme */ | |
| .gr-dropdown ul, .gr-dropdown li, [data-testid="dropdown"] ul, | |
| .svelte-select-list, .dropdown-menu, select option, | |
| [role="listbox"], [role="listbox"] [role="option"] { | |
| background: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| /* Dropdown hover/selected states */ | |
| .gr-dropdown li:hover, select option:hover, | |
| [role="option"]:hover, [role="option"][aria-selected="true"] { | |
| background: var(--surface) !important; | |
| } | |
| /* Portal dropdowns (rendered outside .gradio-container) */ | |
| [data-testid="dropdown-list"], | |
| [role="listbox"]:not(.gradio-container [role="listbox"]) { | |
| background-color: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| } | |
| /* Slider and checkbox labels */ | |
| .gr-slider span, .gr-slider output, .range-wrap span, | |
| input[type="range"] + span { color: var(--text-primary) !important; } | |
| .gr-checkbox label, .gr-checkbox span, | |
| input[type="checkbox"] + span { color: var(--text-secondary) !important; } | |
| /* Image upload text */ | |
| .gr-image span, .gr-image p, .upload-text, | |
| [data-testid="image"] span { color: var(--text-secondary) !important; } | |
| .gr-image svg, .upload-icon { fill: var(--text-muted) !important; } | |
| /* Error/warning states */ | |
| .gr-error, [class*="error"] { | |
| background: rgba(239,68,68,0.15) !important; | |
| color: var(--error) !important; | |
| border-color: var(--error) !important; | |
| } | |
| .gr-info, [class*="info-msg"] { | |
| background: rgba(129,140,248,0.15) !important; | |
| color: var(--accent-primary) !important; | |
| } | |
| /* Copy buttons and icons */ | |
| .gr-textbox button, button svg, .copy-button { | |
| color: var(--text-secondary) !important; | |
| fill: var(--text-secondary) !important; | |
| } | |
| .gr-textbox button:hover { color: var(--text-primary) !important; } | |
| /* Tooltips */ | |
| [role="tooltip"], .gr-tooltip, .tooltip { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| } | |
| /* Progress/loading text */ | |
| .progress-text, .loading-text, [class*="loading"] span, | |
| [class*="progress"] span { color: var(--text-secondary) !important; } | |
| /* Number input spinners */ | |
| input[type="number"]::-webkit-inner-spin-button, | |
| input[type="number"]::-webkit-outer-spin-button { filter: invert(0.8); } | |
| """ | |
| # Create custom dark theme | |
| dark_theme = gr.themes.Base( | |
| primary_hue=gr.themes.colors.indigo, | |
| secondary_hue=gr.themes.colors.purple, | |
| neutral_hue=gr.themes.colors.zinc, | |
| ).set( | |
| # Backgrounds | |
| body_background_fill="#0c0c0e", | |
| body_background_fill_dark="#0c0c0e", | |
| background_fill_primary="#141416", | |
| background_fill_primary_dark="#141416", | |
| background_fill_secondary="#1c1c20", | |
| background_fill_secondary_dark="#1c1c20", | |
| # Borders | |
| border_color_primary="rgba(255,255,255,0.12)", | |
| border_color_primary_dark="rgba(255,255,255,0.12)", | |
| # Text | |
| body_text_color="#e5e5e5", | |
| body_text_color_dark="#e5e5e5", | |
| body_text_color_subdued="#a1a1aa", | |
| body_text_color_subdued_dark="#a1a1aa", | |
| # Blocks | |
| block_background_fill="#141416", | |
| block_background_fill_dark="#141416", | |
| block_border_color="rgba(255,255,255,0.08)", | |
| block_border_color_dark="rgba(255,255,255,0.08)", | |
| block_label_background_fill="#1c1c20", | |
| block_label_background_fill_dark="#1c1c20", | |
| block_label_text_color="#a1a1aa", | |
| block_label_text_color_dark="#a1a1aa", | |
| # Inputs | |
| input_background_fill="#1c1c20", | |
| input_background_fill_dark="#1c1c20", | |
| input_border_color="rgba(255,255,255,0.12)", | |
| input_border_color_dark="rgba(255,255,255,0.12)", | |
| # Buttons | |
| button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", | |
| button_primary_background_fill_dark="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", | |
| button_primary_text_color="white", | |
| button_primary_text_color_dark="white", | |
| button_secondary_background_fill="#232328", | |
| button_secondary_background_fill_dark="#232328", | |
| button_secondary_text_color="#e5e5e5", | |
| button_secondary_text_color_dark="#e5e5e5", | |
| # Table/Examples - CRITICAL for fixing white background | |
| table_even_background_fill="#1a1a1e", | |
| table_even_background_fill_dark="#1a1a1e", | |
| table_odd_background_fill="#1a1a1e", | |
| table_odd_background_fill_dark="#1a1a1e", | |
| table_row_focus="#252528", | |
| table_row_focus_dark="#252528", | |
| ) | |
| with gr.Blocks(title="Z Image Turbo", css=css, theme=dark_theme) as demo: | |
| # Language selector at top | |
| with gr.Row(elem_classes="lang-selector-row"): | |
| lang_selector = gr.Dropdown( | |
| choices=LANGUAGES, | |
| value="English", | |
| label="🌐 Language", | |
| scale=0, | |
| min_width=160, | |
| interactive=True | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 8px 16px 16px 16px;"> | |
| <h1 style="background: linear-gradient(135deg, #818cf8 0%, #a78bfa 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: clamp(1.5rem, 4vw, 2.2rem); margin-bottom: 8px; font-weight: 700;"> | |
| Z-Image Turbo + GLM-4.6V / DeepSeek-3.2 Thinking | |
| </h1> | |
| <p style="color: #a1a1aa; font-size: 1rem; margin: 0;"> | |
| Image Gen & Transformation. Van Gogh and Picasso Style included | |
| </p> | |
| <p style="color: #ef4444; font-size: 0.95rem; margin-top: 12px; font-weight: 500;"> | |
| If you liked it, please ❤️ like it. Thank you! | |
| </p> | |
| </div> | |
| <script> | |
| // RTL toggle based on language | |
| document.addEventListener('DOMContentLoaded', function() { | |
| const observer = new MutationObserver(function(mutations) { | |
| const dropdown = document.querySelector('.lang-selector-row select, .lang-selector-row input'); | |
| if (dropdown) { | |
| const checkLang = () => { | |
| const val = dropdown.value || ''; | |
| const html = document.documentElement; | |
| const body = document.body; | |
| if (val.includes('العربية')) { | |
| html.setAttribute('dir', 'rtl'); | |
| body.classList.add('rtl', 'lang-ar'); | |
| body.classList.remove('lang-hi'); | |
| } else if (val.includes('हिंदी')) { | |
| html.removeAttribute('dir'); | |
| body.classList.remove('rtl', 'lang-ar'); | |
| body.classList.add('lang-hi'); | |
| } else { | |
| html.removeAttribute('dir'); | |
| body.classList.remove('rtl', 'lang-ar', 'lang-hi'); | |
| } | |
| }; | |
| dropdown.addEventListener('change', checkLang); | |
| checkLang(); | |
| } | |
| }); | |
| observer.observe(document.body, { childList: true, subtree: true }); | |
| }); | |
| </script> | |
| """) | |
| with gr.Tabs(): | |
| # TAB 1: Generate Image | |
| with gr.Tab("Generate"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gen_prompt = gr.Textbox(label="Prompt", placeholder="Describe your image in detail...", lines=4) | |
| gen_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) | |
| with gr.Row(): | |
| gen_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="Aspect Ratio") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| gen_seed = gr.Number(label="Seed", value=42, precision=0) | |
| gen_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| gen_btn = gr.Button("Generate", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| gen_output = gr.Image(label="Generated Image", type="pil", interactive=False, height=512, format="png") | |
| gen_polished_prompt = gr.HTML(label="Status", value="") | |
| gen_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| gen_share_btn = gr.Button("Share", variant="secondary") | |
| gen_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) | |
| gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize]) | |
| gen_btn.click( | |
| fn=generate_with_polish, | |
| inputs=[gen_prompt, gen_style, gen_polish, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_polished_prompt, gen_seed_out] | |
| ) | |
| gen_prompt.submit( | |
| fn=generate_with_polish, | |
| inputs=[gen_prompt, gen_style, gen_polish, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_polished_prompt, gen_seed_out] | |
| ) | |
| gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link]) | |
| # TAB 2: AI Vision Assistant | |
| with gr.Tab("AI Assistant"): | |
| ai_desc_md = gr.Markdown("**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| ai_image = gr.Image(label="Upload Image", type="pil", height=300) | |
| ai_analyze_btn = gr.Button("Analyze Image", variant="primary") | |
| ai_description = gr.Textbox(label="Image Description", lines=12, interactive=False) | |
| with gr.Column(scale=1): | |
| ai_request = gr.Textbox(label="What changes do you want?", placeholder="e.g., 'watercolor style' or 'dramatic sunset lighting'", lines=2) | |
| ai_style = gr.Dropdown(choices=STYLES, value="None", label="Target Style") | |
| ai_generate_btn = gr.Button("Generate Prompt", variant="primary") | |
| ai_generated_prompt = gr.Textbox(label="Generated Prompt", lines=6, interactive=False) | |
| ai_send_btn = gr.Button("Send to Transform Tab", variant="primary") | |
| with gr.Accordion("How to Use", open=False): | |
| ai_howto_md = gr.Markdown(""" | |
| 1. **Upload** an image and click "Analyze Image" | |
| 2. **Describe** the changes you want | |
| 3. **Generate** an optimized prompt | |
| 4. **Send** to Transform tab to apply changes | |
| """) | |
| ai_analyze_btn.click( | |
| fn=analyze_image_with_glm, | |
| inputs=[ai_image], | |
| outputs=[ai_description] | |
| ) | |
| ai_generate_btn.click( | |
| fn=generate_prompt_with_glm, | |
| inputs=[ai_description, ai_request, ai_style], | |
| outputs=[ai_generated_prompt] | |
| ) | |
| # TAB 3: Transform Image | |
| with gr.Tab("Transform"): | |
| trans_desc_md = gr.Markdown("**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| trans_input = gr.Image(label="Upload Image", type="pil", height=300) | |
| trans_prompt = gr.Textbox(label="Transformation Prompt (optional if style selected)", placeholder="Optional: describe changes, or just select a Style below (Van Gogh, Picasso, etc.)", lines=3) | |
| trans_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) | |
| with gr.Row(): | |
| trans_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="Strength") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| trans_seed = gr.Number(label="Seed", value=42, precision=0) | |
| trans_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| trans_btn = gr.Button("Transform", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| trans_output = gr.Image(label="Transformed Image", type="pil", interactive=False, height=512, format="png") | |
| trans_polished_prompt = gr.HTML(label="Status", value="") | |
| trans_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| trans_share_btn = gr.Button("Share", variant="secondary") | |
| trans_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) | |
| with gr.Accordion("Example Prompts", open=False): | |
| gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize]) | |
| trans_btn.click( | |
| fn=transform_with_polish, | |
| inputs=[trans_input, trans_prompt, trans_style, trans_polish, trans_strength, trans_steps, trans_seed, trans_randomize], | |
| outputs=[trans_output, trans_polished_prompt, trans_seed_out] | |
| ) | |
| trans_prompt.submit( | |
| fn=transform_with_polish, | |
| inputs=[trans_input, trans_prompt, trans_style, trans_polish, trans_strength, trans_steps, trans_seed, trans_randomize], | |
| outputs=[trans_output, trans_polished_prompt, trans_seed_out] | |
| ) | |
| trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link]) | |
| # Cross-tab handler | |
| ai_send_btn.click( | |
| fn=lambda prompt, img: (prompt, img), | |
| inputs=[ai_generated_prompt, ai_image], | |
| outputs=[trans_prompt, trans_input] | |
| ) | |
| # Language selector - update all UI labels when language changes | |
| lang_selector.change( | |
| fn=change_language, | |
| inputs=[lang_selector], | |
| outputs=[ | |
| # Generate tab (12 components) | |
| gen_prompt, gen_polish, gen_style, gen_ratio, gen_steps, gen_seed, | |
| gen_randomize, gen_btn, gen_output, gen_polished_prompt, gen_seed_out, gen_share_btn, | |
| # AI Assistant tab (10 components) | |
| ai_desc_md, ai_image, ai_analyze_btn, ai_description, ai_request, ai_style, | |
| ai_generate_btn, ai_generated_prompt, ai_send_btn, ai_howto_md, | |
| # Transform tab (14 components) | |
| trans_desc_md, trans_input, trans_prompt, trans_polish, trans_style, trans_strength, | |
| trans_steps, trans_seed, trans_randomize, trans_btn, trans_output, trans_polished_prompt, | |
| trans_seed_out, trans_share_btn, | |
| ] | |
| ) | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; width: 100%; font-size: 0.9rem; padding: 1rem; margin-top: 1.5rem; background: #141416; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; color: #71717a;"> | |
| <div style="margin-bottom: 8px;"> | |
| <strong style="color: #a1a1aa;">Image Generation:</strong> | |
| <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank" style="color: #818cf8; font-weight: 500;">Z-Image-Turbo</a> | |
| <span style="color: #52525b;">(Tongyi-MAI)</span> | |
| </div> | |
| <div style="margin-bottom: 8px;"> | |
| <strong style="color: #a1a1aa;">Vision AI:</strong> | |
| <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank" style="color: #818cf8; font-weight: 500;">GLM-4.6V</a> | |
| <span style="color: #52525b;">(Z.AI / Zhipu)</span> | | |
| <strong style="color: #a1a1aa;">Prompt+:</strong> | |
| <a href="https://deepseek.com" target="_blank" style="color: #818cf8; font-weight: 500;">DeepSeek Reasoner</a> | |
| </div> | |
| <div> | |
| <strong style="color: #a1a1aa;">Built by</strong> | |
| <a href="https://huggingface.co/lulavc" target="_blank" style="color: #a78bfa; font-weight: 600;">@lulavc</a> | | |
| <a href="https://huggingface.co/spaces/lulavc/Z-Image-Turbo" target="_blank" style="color: #6366f1; font-weight: 500;">MCP Server Enabled</a> | |
| </div> | |
| </div> | |
| """, | |
| elem_classes="footer-no-box" | |
| ) | |
| # MCP API Endpoints - Hidden components for direct API access | |
| with gr.Row(visible=False): | |
| mcp_prompt_in = gr.Textbox() | |
| mcp_style_in = gr.Dropdown(choices=STYLES, value="None") | |
| mcp_ratio_in = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)") | |
| mcp_steps_in = gr.Slider(minimum=4, maximum=16, value=9) | |
| mcp_seed_in = gr.Number(value=42) | |
| mcp_random_in = gr.Checkbox(value=True) | |
| mcp_image_out = gr.Image(type="pil", format="png") | |
| mcp_seed_out = gr.Number() | |
| mcp_gen_btn = gr.Button() | |
| mcp_gen_btn.click( | |
| fn=mcp_generate, | |
| inputs=[mcp_prompt_in, mcp_style_in, mcp_ratio_in, mcp_steps_in, mcp_seed_in, mcp_random_in], | |
| outputs=[mcp_image_out, mcp_seed_out], | |
| api_name="mcp_generate" | |
| ) | |
| demo.launch(mcp_server=True) | |