Spaces:
Running
on
Zero
Running
on
Zero
Enable FlashAttention-3 + torch.compile (AoTI) for H200
Browse files
app.py
CHANGED
|
@@ -711,6 +711,24 @@ pipe_t2i = DiffusionPipeline.from_pretrained(
|
|
| 711 |
)
|
| 712 |
pipe_t2i.to("cuda")
|
| 713 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
# Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization
|
| 715 |
|
| 716 |
pipe_i2i = ZImageImg2ImgPipeline(
|
|
@@ -721,7 +739,7 @@ pipe_i2i = ZImageImg2ImgPipeline(
|
|
| 721 |
scheduler=pipe_t2i.scheduler,
|
| 722 |
)
|
| 723 |
|
| 724 |
-
logger.info("Pipelines ready! (TF32 +
|
| 725 |
|
| 726 |
STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
|
| 727 |
"Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]
|
|
|
|
| 711 |
)
|
| 712 |
pipe_t2i.to("cuda")
|
| 713 |
|
| 714 |
+
# Enable FlashAttention-3 on Hopper GPUs (H100/H200) via kernels library
|
| 715 |
+
try:
|
| 716 |
+
pipe_t2i.transformer.set_attention_backend("_flash_3_hub")
|
| 717 |
+
logger.info("FlashAttention-3 enabled via kernels library")
|
| 718 |
+
except Exception as e:
|
| 719 |
+
logger.warning(f"FA3 not available, using default attention: {e}")
|
| 720 |
+
|
| 721 |
+
# Enable torch.compile with AoTI (Ahead of Time Inductor) for faster inference
|
| 722 |
+
try:
|
| 723 |
+
pipe_t2i.transformer = torch.compile(
|
| 724 |
+
pipe_t2i.transformer,
|
| 725 |
+
mode="max-autotune", # Best performance on H200
|
| 726 |
+
fullgraph=True,
|
| 727 |
+
)
|
| 728 |
+
logger.info("torch.compile (AoTI) enabled for transformer")
|
| 729 |
+
except Exception as e:
|
| 730 |
+
logger.warning(f"torch.compile not available: {e}")
|
| 731 |
+
|
| 732 |
# Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization
|
| 733 |
|
| 734 |
pipe_i2i = ZImageImg2ImgPipeline(
|
|
|
|
| 739 |
scheduler=pipe_t2i.scheduler,
|
| 740 |
)
|
| 741 |
|
| 742 |
+
logger.info("Pipelines ready! (TF32 + FA3 + AoTI optimizations enabled)")
|
| 743 |
|
| 744 |
STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
|
| 745 |
"Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]
|