Spaces:

AbstractPhil
/

lyra-xl-playground

Running on Zero

App Files Files Community

AbstractPhil commited on 21 days ago

Commit

3893764

verified ·

1 Parent(s): dda3cf4

Update app.py

Browse files

Files changed (1) hide show

app.py +244 -72

app.py CHANGED Viewed

@@ -61,7 +61,7 @@ except ImportError:
 # Import Lyra loader module
 try:
-    from geofractal.model.vae.load_lyra import load_vae_lyra, load_lyra_illustrious
     LYRA_LOADER_AVAILABLE = True
 except ImportError:
     print("⚠️ Lyra loader module not available, using fallback")
@@ -154,7 +154,10 @@ class LazyT5Encoder:
 class LazyLyraModel:
-    """Lazy loader for Lyra VAE - only downloads/loads when first accessed."""
     def __init__(
         self,
@@ -167,7 +170,118 @@ class LazyLyraModel:
         self.checkpoint = checkpoint
         self._model = None
         self._info = None
         self._loaded = False
     @property
     def model(self):
@@ -182,10 +296,13 @@ class LazyLyraModel:
                     device=self.device,
                     return_info=True
                 )
             else:
                 # Fallback to manual loading
                 self._model = self._load_fallback()
-                self._info = {"repo_id": self.repo_id, "version": "v2"}
             self._model.eval()
             self._loaded = True
@@ -194,8 +311,8 @@ class LazyLyraModel:
     @property
     def info(self) -> Optional[Dict]:
-        if self._info is None and self._model is not None:
-            return {"repo_id": self.repo_id}
         return self._info
     @property
@@ -207,18 +324,11 @@ class LazyLyraModel:
         if not LYRA_V2_AVAILABLE:
             raise ImportError("Lyra VAE v2 not available")
-        config_path = hf_hub_download(
-            repo_id=self.repo_id,
-            filename="config.json",
-            repo_type="model"
-        )
-        with open(config_path, 'r') as f:
-            config_dict = json.load(f)
         # Find checkpoint
         from huggingface_hub import list_repo_files
-        import re
         repo_files = list_repo_files(self.repo_id, repo_type="model")
         checkpoint_files = [f for f in repo_files if f.endswith('.safetensors') or f.endswith('.pt')]
@@ -245,7 +355,7 @@ class LazyLyraModel:
             checkpoint = torch.load(checkpoint_path, map_location="cpu")
             state_dict = checkpoint.get('model_state_dict', checkpoint)
-        # Build config
         vae_config = LyraV2Config(
             modality_dims=config_dict.get('modality_dims'),
             modality_seq_lens=config_dict.get('modality_seq_lens'),
@@ -253,6 +363,13 @@ class LazyLyraModel:
             latent_dim=config_dict.get('latent_dim', 2048),
             hidden_dim=config_dict.get('hidden_dim', 2048),
             fusion_strategy=config_dict.get('fusion_strategy', 'adaptive_cantor'),
         )
         model = LyraV2(vae_config)
@@ -337,39 +454,6 @@ def get_scheduler(
 # UTILITIES
 # ============================================================================
-def extract_comfyui_components(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
-    """Extract UNet, CLIP-L, CLIP-G, and VAE from ComfyUI single-file checkpoint."""
-    components = {
-        "unet": {},
-        "clip_l": {},
-        "clip_g": {},
-        "vae": {}
-    }
-    for key, value in state_dict.items():
-        if key.startswith(COMFYUI_UNET_PREFIX):
-            new_key = key[len(COMFYUI_UNET_PREFIX):]
-            components["unet"][new_key] = value
-        elif key.startswith(COMFYUI_CLIP_L_PREFIX):
-            new_key = key[len(COMFYUI_CLIP_L_PREFIX):]
-            components["clip_l"][new_key] = value
-        elif key.startswith(COMFYUI_CLIP_G_PREFIX):
-            new_key = key[len(COMFYUI_CLIP_G_PREFIX):]
-            components["clip_g"][new_key] = value
-        elif key.startswith(COMFYUI_VAE_PREFIX):
-            new_key = key[len(COMFYUI_VAE_PREFIX):]
-            components["vae"][new_key] = value
-    print(f"  Extracted components:")
-    print(f"    UNet: {len(components['unet'])} keys")
-    print(f"    CLIP-L: {len(components['clip_l'])} keys")
-    print(f"    CLIP-G: {len(components['clip_g'])} keys")
-    print(f"    VAE: {len(components['vae'])} keys")
-    return components
 def get_clip_hidden_state(
     model_output,
     clip_skip: int = 1,
@@ -551,40 +635,77 @@ class SDXLFlowMatchingPipeline:
     def encode_prompt_lyra(
         self,
-        prompt: str,
         negative_prompt: str = "",
         clip_skip: int = 1,
         t5_summary: str = "",
-        lyra_strength: float = 0.3
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """Encode prompts using Lyra VAE v2 fusion (CLIP + T5).
         This triggers lazy loading of T5 and Lyra if not already loaded.
         """
         if not self.lyra_available:
             raise ValueError("Lyra VAE components not configured")
         # Access properties triggers lazy load
         t5_encoder = self.t5_encoder
         t5_tokenizer = self.t5_tokenizer
         lyra_model = self.lyra_model
-        # Get standard CLIP embeddings first
         prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
-            prompt, negative_prompt, clip_skip
         )
-        # Format T5 input with pilcrow separator (¶)
         SUMMARY_SEPARATOR = "¶"
         if t5_summary.strip():
-            t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {t5_summary}"
         else:
-            t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {prompt}"
-        # Get T5 embeddings
         t5_inputs = t5_tokenizer(
             t5_prompt,
-            max_length=512,
             padding='max_length',
             truncation=True,
             return_tensors='pt'
@@ -593,6 +714,7 @@ class SDXLFlowMatchingPipeline:
         with torch.no_grad():
             t5_embeds = t5_encoder(**t5_inputs).last_hidden_state
         clip_l_dim = 768
         clip_g_dim = 1280
@@ -632,14 +754,17 @@ class SDXLFlowMatchingPipeline:
         prompt_embeds_fused = torch.cat([fused_clip_l, fused_clip_g], dim=-1)
-        # Process negative prompt
         if negative_prompt:
             neg_strength = lyra_strength * 0.5  # Less aggressive for negative
-            t5_neg_prompt = f"{negative_prompt} {SUMMARY_SEPARATOR} {negative_prompt}"
             t5_inputs_neg = t5_tokenizer(
                 t5_neg_prompt,
-                max_length=512,
                 padding='max_length',
                 truncation=True,
                 return_tensors='pt'
@@ -710,9 +835,19 @@ class SDXLFlowMatchingPipeline:
         clip_skip: int = 1,
         t5_summary: str = "",
         lyra_strength: float = 1.0,
         progress_callback=None
     ):
-        """Generate image using SDXL architecture."""
         if seed is not None:
             generator = torch.Generator(device=self.device).manual_seed(seed)
@@ -722,7 +857,9 @@ class SDXLFlowMatchingPipeline:
         # Encode prompts (Lyra triggers lazy load only if use_lyra=True)
         if use_lyra and self.lyra_available:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt_lyra(
-                prompt, negative_prompt, clip_skip, t5_summary, lyra_strength
             )
         else:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
@@ -912,10 +1049,19 @@ class SD15FlowMatchingPipeline:
         return prompt_embeds, negative_prompt_embeds
     def encode_prompt_lyra(self, prompt: str, negative_prompt: str = ""):
-        """Encode using Lyra VAE (CLIP + T5 fusion)."""
         if not self.lyra_available:
             raise ValueError("Lyra VAE components not configured")
         t5_encoder = self.t5_encoder
         t5_tokenizer = self.t5_tokenizer
         lyra_model = self.lyra_model
@@ -933,10 +1079,10 @@ class SD15FlowMatchingPipeline:
         with torch.no_grad():
             clip_embeds = self.text_encoder(text_input_ids)[0]
-        # T5
         t5_inputs = t5_tokenizer(
             prompt,
-            max_length=77,
             padding='max_length',
             truncation=True,
             return_tensors='pt'
@@ -971,7 +1117,7 @@ class SD15FlowMatchingPipeline:
             t5_inputs_uncond = t5_tokenizer(
                 negative_prompt,
-                max_length=77,
                 padding='max_length',
                 truncation=True,
                 return_tensors='pt'
@@ -1363,7 +1509,7 @@ def estimate_duration(num_steps: int, width: int, height: int, use_lyra: bool =
 @spaces.GPU(duration=lambda *args: estimate_duration(
-    args[6], args[8], args[9], args[12],
     "SDXL" in args[3] or "Illustrious" in args[3]
 ))
 def generate_image(
@@ -1381,11 +1527,20 @@ def generate_image(
     use_flow_matching: bool,
     use_lyra: bool,
     lyra_strength: float,
     seed: int,
     randomize_seed: bool,
     progress=gr.Progress()
 ):
-    """Generate image with ZeroGPU support."""
     if randomize_seed:
         seed = np.random.randint(0, 2**32 - 1)
@@ -1464,6 +1619,8 @@ def generate_image(
                 clip_skip=clip_skip,
                 t5_summary=t5_summary,
                 lyra_strength=lyra_strength,
                 progress_callback=lambda s, t, d: progress(0.5 + (s/t) * 0.45, desc=d)
             )
@@ -1507,14 +1664,15 @@ def create_demo():
                 prompt = gr.TextArea(
                     label="Prompt (Tags for CLIP)",
                     value="masterpiece, best quality, 1girl, blue hair, school uniform, cherry blossoms, detailed background",
-                    lines=3
                 )
                 t5_summary = gr.TextArea(
-                    label="T5 Summary (Natural Language for Lyra)",
                     value="A beautiful anime girl with flowing blue hair wearing a school uniform, surrounded by delicate pink cherry blossoms against a bright sky",
                     lines=2,
-                    info="Used after ¶ separator for T5. Leave empty to use tags only."
                 )
                 negative_prompt = gr.TextArea(
@@ -1565,6 +1723,19 @@ def create_demo():
                     info="0.0 = pure CLIP, 1.0 = pure Lyra reconstruction"
                 )
                 with gr.Accordion("Generation Settings", open=True):
                     num_steps = gr.Slider(
                         label="Steps",
@@ -1725,7 +1896,8 @@ def create_demo():
             inputs=[
                 prompt, t5_summary, negative_prompt, model_choice, scheduler_choice, clip_skip,
                 num_steps, cfg_scale, width, height, shift,
-                use_flow_matching, use_lyra, lyra_strength, seed, randomize_seed
             ],
             outputs=[output_image_standard, output_image_lyra, output_seed]
         )

 # Import Lyra loader module
 try:
+    from geofractal.model.vae.loader import load_vae_lyra, load_lyra_illustrious
     LYRA_LOADER_AVAILABLE = True
 except ImportError:
     print("⚠️ Lyra loader module not available, using fallback")
 class LazyLyraModel:
+    """Lazy loader for Lyra VAE - only downloads/loads when first accessed.
+    Exposes config with modality_seq_lens for proper tokenization lengths.
+    """
     def __init__(
         self,
         self.checkpoint = checkpoint
         self._model = None
         self._info = None
+        self._config = None
         self._loaded = False
+        # Pre-fetch config without loading model (lightweight)
+        self._prefetch_config()
+    def _prefetch_config(self):
+        """Fetch config.json to get sequence lengths without loading the full model."""
+        try:
+            config_path = hf_hub_download(
+                repo_id=self.repo_id,
+                filename="config.json",
+                repo_type="model"
+            )
+            with open(config_path, 'r') as f:
+                self._config = json.load(f)
+            # Detect version from config
+            is_v2 = 'modality_seq_lens' in self._config or 'binding_config' in self._config
+            version = "v2" if is_v2 else "v1"
+            print(f"📋 Lyra config prefetched: {self.repo_id} ({version})")
+            if is_v2:
+                print(f"   Sequence lengths: {self._config.get('modality_seq_lens', {})}")
+            else:
+                print(f"   Sequence length: {self._config.get('seq_len', 77)}")
+        except Exception as e:
+            print(f"⚠️ Could not prefetch Lyra config: {e}")
+            # Detect version from repo name and use appropriate defaults
+            is_illustrious = 'illustrious' in self.repo_id.lower() or 'xl' in self.repo_id.lower()
+            if is_illustrious:
+                # v2 defaults for SDXL/Illustrious
+                self._config = {
+                    "modality_dims": {
+                        "clip_l": 768,
+                        "clip_g": 1280,
+                        "t5_xl_l": 2048,
+                        "t5_xl_g": 2048
+                    },
+                    "modality_seq_lens": {
+                        "clip_l": 77,
+                        "clip_g": 77,
+                        "t5_xl_l": 512,
+                        "t5_xl_g": 512
+                    },
+                    "fusion_strategy": "adaptive_cantor",
+                    "latent_dim": 2048
+                }
+            else:
+                # v1 defaults for SD1.5
+                self._config = {
+                    "modality_dims": {
+                        "clip": 768,
+                        "t5": 768
+                    },
+                    "seq_len": 77,
+                    "fusion_strategy": "cantor",
+                    "latent_dim": 768
+                }
+    @property
+    def config(self) -> Dict:
+        """Get model config (available before full model load)."""
+        return self._config or {}
+    @property
+    def modality_seq_lens(self) -> Dict[str, int]:
+        """Get sequence lengths for each modality.
+        Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
+        """
+        # v2 format: modality_seq_lens dict
+        if 'modality_seq_lens' in self.config:
+            return self.config['modality_seq_lens']
+        # v1 format: derive from single seq_len
+        seq_len = self.config.get('seq_len', 77)
+        modality_dims = self.config.get('modality_dims', {})
+        # Return seq_len for all modalities in v1
+        return {name: seq_len for name in modality_dims.keys()}
+    @property
+    def t5_max_length(self) -> int:
+        """Get T5 max sequence length from config.
+        Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
+        """
+        # v2 format: modality_seq_lens dict
+        if 'modality_seq_lens' in self.config:
+            seq_lens = self.config['modality_seq_lens']
+            return seq_lens.get('t5_xl_l', seq_lens.get('t5_xl_g', 512))
+        # v1 format: single seq_len
+        return self.config.get('seq_len', 77)
+    @property
+    def clip_max_length(self) -> int:
+        """Get CLIP max sequence length from config.
+        Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
+        """
+        # v2 format: modality_seq_lens dict
+        if 'modality_seq_lens' in self.config:
+            seq_lens = self.config['modality_seq_lens']
+            return seq_lens.get('clip_l', 77)
+        # v1 format: single seq_len (same for all modalities)
+        return self.config.get('seq_len', 77)
     @property
     def model(self):
                     device=self.device,
                     return_info=True
                 )
+                # Update config from loaded info
+                if self._info and 'config' in self._info:
+                    self._config = self._info['config']
             else:
                 # Fallback to manual loading
                 self._model = self._load_fallback()
+                self._info = {"repo_id": self.repo_id, "version": "v2", "config": self._config}
             self._model.eval()
             self._loaded = True
     @property
     def info(self) -> Optional[Dict]:
+        if self._info is None:
+            return {"repo_id": self.repo_id, "config": self._config}
         return self._info
     @property
         if not LYRA_V2_AVAILABLE:
             raise ImportError("Lyra VAE v2 not available")
+        # Config already prefetched
+        config_dict = self._config
         # Find checkpoint
         from huggingface_hub import list_repo_files
         repo_files = list_repo_files(self.repo_id, repo_type="model")
         checkpoint_files = [f for f in repo_files if f.endswith('.safetensors') or f.endswith('.pt')]
             checkpoint = torch.load(checkpoint_path, map_location="cpu")
             state_dict = checkpoint.get('model_state_dict', checkpoint)
+        # Build config with all fields from prefetched config
         vae_config = LyraV2Config(
             modality_dims=config_dict.get('modality_dims'),
             modality_seq_lens=config_dict.get('modality_seq_lens'),
             latent_dim=config_dict.get('latent_dim', 2048),
             hidden_dim=config_dict.get('hidden_dim', 2048),
             fusion_strategy=config_dict.get('fusion_strategy', 'adaptive_cantor'),
+            encoder_layers=config_dict.get('encoder_layers', 3),
+            decoder_layers=config_dict.get('decoder_layers', 3),
+            fusion_heads=config_dict.get('fusion_heads', 8),
+            cantor_depth=config_dict.get('cantor_depth', 8),
+            cantor_local_window=config_dict.get('cantor_local_window', 3),
+            alpha_init=config_dict.get('alpha_init', 1.0),
+            beta_init=config_dict.get('beta_init', 0.3),
         )
         model = LyraV2(vae_config)
 # UTILITIES
 # ============================================================================
 def get_clip_hidden_state(
     model_output,
     clip_skip: int = 1,
     def encode_prompt_lyra(
         self,
+        prompt: str,
         negative_prompt: str = "",
         clip_skip: int = 1,
         t5_summary: str = "",
+        lyra_strength: float = 0.3,
+        use_separator: bool = True,
+        clip_include_summary: bool = False
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """Encode prompts using Lyra VAE v2 fusion (CLIP + T5).
+        CLIP encoders receive tags only (prompt field).
+        T5 encoder receives tags + separator + summary.
+        Args:
+            prompt: Tags/keywords for CLIP encoding
+            negative_prompt: Negative tags
+            clip_skip: CLIP skip layers
+            t5_summary: Natural language summary for T5
+            lyra_strength: Blend factor (0=pure CLIP, 1=pure Lyra)
+            use_separator: If True, use ¶ separator between tags and summary
+            clip_include_summary: If True, append summary to CLIP input (default False)
         This triggers lazy loading of T5 and Lyra if not already loaded.
+        Uses sequence lengths from Lyra config for proper tokenization.
         """
         if not self.lyra_available:
             raise ValueError("Lyra VAE components not configured")
+        # Get sequence lengths from Lyra config (available before full load)
+        t5_max_length = self.lyra_loader.t5_max_length  # 512 for Illustrious
+        clip_max_length = self.lyra_loader.clip_max_length  # 77 for Illustrious
+        print(f"[Lyra] Using sequence lengths: CLIP={clip_max_length}, T5={t5_max_length}")
         # Access properties triggers lazy load
         t5_encoder = self.t5_encoder
         t5_tokenizer = self.t5_tokenizer
         lyra_model = self.lyra_model
+        # === CLIP ENCODING ===
+        # CLIP sees tags only (unless clip_include_summary is True)
+        if clip_include_summary and t5_summary.strip():
+            clip_prompt = f"{prompt} {t5_summary}"
+        else:
+            clip_prompt = prompt
+        # Get CLIP embeddings with tags only
         prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
+            clip_prompt, negative_prompt, clip_skip
         )
+        # === T5 ENCODING ===
+        # T5 sees tags + separator + summary (or tags + summary if no separator)
         SUMMARY_SEPARATOR = "¶"
         if t5_summary.strip():
+            if use_separator:
+                t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {t5_summary}"
+            else:
+                t5_prompt = f"{prompt} {t5_summary}"
         else:
+            # No summary provided - T5 just sees the tags
+            t5_prompt = prompt
+        print(f"[Lyra] CLIP input: {clip_prompt[:80]}...")
+        print(f"[Lyra] T5 input: {t5_prompt[:80]}...")
+        # Get T5 embeddings with config-specified max_length
         t5_inputs = t5_tokenizer(
             t5_prompt,
+            max_length=t5_max_length,
             padding='max_length',
             truncation=True,
             return_tensors='pt'
         with torch.no_grad():
             t5_embeds = t5_encoder(**t5_inputs).last_hidden_state
+        # === LYRA FUSION ===
         clip_l_dim = 768
         clip_g_dim = 1280
         prompt_embeds_fused = torch.cat([fused_clip_l, fused_clip_g], dim=-1)
+        # === NEGATIVE PROMPT ===
+        # Negative uses same logic: CLIP sees negative tags only
         if negative_prompt:
             neg_strength = lyra_strength * 0.5  # Less aggressive for negative
+            # T5 negative: tags only (no summary for negative)
+            t5_neg_prompt = negative_prompt
             t5_inputs_neg = t5_tokenizer(
                 t5_neg_prompt,
+                max_length=t5_max_length,
                 padding='max_length',
                 truncation=True,
                 return_tensors='pt'
         clip_skip: int = 1,
         t5_summary: str = "",
         lyra_strength: float = 1.0,
+        use_separator: bool = True,
+        clip_include_summary: bool = False,
         progress_callback=None
     ):
+        """Generate image using SDXL architecture.
+        Args:
+            prompt: Tags/keywords for image generation
+            negative_prompt: Negative tags
+            t5_summary: Natural language summary (T5 only, unless clip_include_summary=True)
+            use_separator: Use ¶ separator between tags and summary in T5 input
+            clip_include_summary: If True, append summary to CLIP input (default False)
+        """
         if seed is not None:
             generator = torch.Generator(device=self.device).manual_seed(seed)
         # Encode prompts (Lyra triggers lazy load only if use_lyra=True)
         if use_lyra and self.lyra_available:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt_lyra(
+                prompt, negative_prompt, clip_skip, t5_summary, lyra_strength,
+                use_separator=use_separator,
+                clip_include_summary=clip_include_summary
             )
         else:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
         return prompt_embeds, negative_prompt_embeds
     def encode_prompt_lyra(self, prompt: str, negative_prompt: str = ""):
+        """Encode using Lyra VAE v1 (CLIP + T5 fusion).
+        Uses sequence lengths from Lyra config for proper tokenization.
+        """
         if not self.lyra_available:
             raise ValueError("Lyra VAE components not configured")
+        # Get sequence length from config (v1 uses same length for clip and t5)
+        # Default to 77 for SD1.5/v1
+        t5_max_length = self.lyra_loader.config.get('seq_len', 77)
+        print(f"[Lyra v1] Using sequence length: {t5_max_length}")
         t5_encoder = self.t5_encoder
         t5_tokenizer = self.t5_tokenizer
         lyra_model = self.lyra_model
         with torch.no_grad():
             clip_embeds = self.text_encoder(text_input_ids)[0]
+        # T5 with config-specified max_length
         t5_inputs = t5_tokenizer(
             prompt,
+            max_length=t5_max_length,
             padding='max_length',
             truncation=True,
             return_tensors='pt'
             t5_inputs_uncond = t5_tokenizer(
                 negative_prompt,
+                max_length=t5_max_length,
                 padding='max_length',
                 truncation=True,
                 return_tensors='pt'
 @spaces.GPU(duration=lambda *args: estimate_duration(
+    args[6], args[8], args[9], args[14],
     "SDXL" in args[3] or "Illustrious" in args[3]
 ))
 def generate_image(
     use_flow_matching: bool,
     use_lyra: bool,
     lyra_strength: float,
+    use_separator: bool,
+    clip_include_summary: bool,
     seed: int,
     randomize_seed: bool,
     progress=gr.Progress()
 ):
+    """Generate image with ZeroGPU support.
+    Args:
+        prompt: Tags/keywords (CLIP input)
+        t5_summary: Natural language summary (T5 input, unless clip_include_summary)
+        use_separator: Use ¶ separator between tags and summary
+        clip_include_summary: If True, CLIP also sees the summary
+    """
     if randomize_seed:
         seed = np.random.randint(0, 2**32 - 1)
                 clip_skip=clip_skip,
                 t5_summary=t5_summary,
                 lyra_strength=lyra_strength,
+                use_separator=use_separator,
+                clip_include_summary=clip_include_summary,
                 progress_callback=lambda s, t, d: progress(0.5 + (s/t) * 0.45, desc=d)
             )
                 prompt = gr.TextArea(
                     label="Prompt (Tags for CLIP)",
                     value="masterpiece, best quality, 1girl, blue hair, school uniform, cherry blossoms, detailed background",
+                    lines=3,
+                    info="CLIP encoders see these tags. T5 also sees these + the summary below."
                 )
                 t5_summary = gr.TextArea(
+                    label="T5 Summary (Natural Language - T5 Only)",
                     value="A beautiful anime girl with flowing blue hair wearing a school uniform, surrounded by delicate pink cherry blossoms against a bright sky",
                     lines=2,
+                    info="T5 sees: tags ¶ summary. CLIP sees: tags only (unless 'Include Summary in CLIP' is enabled)."
                 )
                 negative_prompt = gr.TextArea(
                     info="0.0 = pure CLIP, 1.0 = pure Lyra reconstruction"
                 )
+                with gr.Accordion("Lyra Advanced Settings", open=False):
+                    use_separator = gr.Checkbox(
+                        label="Use ¶ Separator",
+                        value=True,
+                        info="Insert ¶ between tags and summary in T5 input"
+                    )
+                    clip_include_summary = gr.Checkbox(
+                        label="Include Summary in CLIP",
+                        value=False,
+                        info="By default CLIP sees tags only. Enable to append summary to CLIP input."
+                    )
                 with gr.Accordion("Generation Settings", open=True):
                     num_steps = gr.Slider(
                         label="Steps",
             inputs=[
                 prompt, t5_summary, negative_prompt, model_choice, scheduler_choice, clip_skip,
                 num_steps, cfg_scale, width, height, shift,
+                use_flow_matching, use_lyra, lyra_strength, use_separator, clip_include_summary,
+                seed, randomize_seed
             ],
             outputs=[output_image_standard, output_image_lyra, output_seed]
         )