Spaces:

lulavc
/

Z-Image-Turbo

Running on Zero

lulavc commited on 1 day ago

Commit

67fd625

verified ·

1 Parent(s): 779976d

Enable FlashAttention-3 + torch.compile (AoTI) for H200

Files changed (1) hide show

app.py CHANGED Viewed

@@ -711,6 +711,24 @@ pipe_t2i = DiffusionPipeline.from_pretrained(
 )
 pipe_t2i.to("cuda")
 # Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization
 pipe_i2i = ZImageImg2ImgPipeline(
@@ -721,7 +739,7 @@ pipe_i2i = ZImageImg2ImgPipeline(
     scheduler=pipe_t2i.scheduler,
 )
-logger.info("Pipelines ready! (TF32 + SDPA optimizations enabled)")
 STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
           "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]

 )
 pipe_t2i.to("cuda")
+# Enable FlashAttention-3 on Hopper GPUs (H100/H200) via kernels library
+try:
+    pipe_t2i.transformer.set_attention_backend("_flash_3_hub")
+    logger.info("FlashAttention-3 enabled via kernels library")
+except Exception as e:
+    logger.warning(f"FA3 not available, using default attention: {e}")
+# Enable torch.compile with AoTI (Ahead of Time Inductor) for faster inference
+try:
+    pipe_t2i.transformer = torch.compile(
+        pipe_t2i.transformer,
+        mode="max-autotune",  # Best performance on H200
+        fullgraph=True,
+    )
+    logger.info("torch.compile (AoTI) enabled for transformer")
+except Exception as e:
+    logger.warning(f"torch.compile not available: {e}")
 # Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization
 pipe_i2i = ZImageImg2ImgPipeline(
     scheduler=pipe_t2i.scheduler,
 )
+logger.info("Pipelines ready! (TF32 + FA3 + AoTI optimizations enabled)")
 STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
           "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]