Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -382,13 +382,8 @@ API_TIMEOUT = 90.0
|
|
| 382 |
API_MAX_RETRIES = 2
|
| 383 |
MAX_DESCRIPTION_LENGTH = 6000 # For GLM prompt generation - doubled for very detailed descriptions
|
| 384 |
|
| 385 |
-
#
|
| 386 |
-
|
| 387 |
-
torch.backends.cuda.enable_mem_efficient_sdp(True)
|
| 388 |
-
torch.backends.cudnn.benchmark = True
|
| 389 |
-
# Enable TF32 for better performance on Ampere+ GPUs
|
| 390 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
| 391 |
-
torch.backends.cudnn.allow_tf32 = True
|
| 392 |
|
| 393 |
# Singleton clients with timeout and retry
|
| 394 |
_deepseek_client: Optional[OpenAI] = None
|
|
@@ -861,64 +856,15 @@ MAX_SEQ_LEN = 65536 # 2048x2048 -> 256x256 -> 65,536
|
|
| 861 |
# which requires special handling in torch.export. Enable with ENABLE_AOTI=true once fixed.
|
| 862 |
ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "false").lower() == "true"
|
| 863 |
|
| 864 |
-
logger.info("Loading Z-Image-Turbo pipeline...")
|
| 865 |
|
| 866 |
pipe_t2i = DiffusionPipeline.from_pretrained(
|
| 867 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 868 |
torch_dtype=torch.bfloat16,
|
| 869 |
)
|
| 870 |
-
|
| 871 |
|
| 872 |
-
#
|
| 873 |
-
try:
|
| 874 |
-
pipe_t2i.transformer.set_attention_backend("_flash_3_hub")
|
| 875 |
-
logger.info("FlashAttention-3 enabled via kernels library")
|
| 876 |
-
except Exception as e:
|
| 877 |
-
logger.warning(f"FA3 not available, using default SDPA attention: {e}")
|
| 878 |
-
|
| 879 |
-
# =============================================================================
|
| 880 |
-
# APPLY AOTI COMPILATION
|
| 881 |
-
# =============================================================================
|
| 882 |
-
|
| 883 |
-
if ENABLE_AOTI:
|
| 884 |
-
try:
|
| 885 |
-
# Use the corrected compile function that handles positional args properly
|
| 886 |
-
@spaces.GPU(duration=1500)
|
| 887 |
-
def _compile_wrapper():
|
| 888 |
-
return compile_transformer_aoti(
|
| 889 |
-
pipe=pipe_t2i,
|
| 890 |
-
example_prompt="example prompt for compilation",
|
| 891 |
-
height=1024,
|
| 892 |
-
width=1024,
|
| 893 |
-
num_inference_steps=1,
|
| 894 |
-
inductor_configs=INDUCTOR_CONFIGS,
|
| 895 |
-
min_seq_len=MIN_SEQ_LEN,
|
| 896 |
-
max_seq_len=MAX_SEQ_LEN,
|
| 897 |
-
)
|
| 898 |
-
|
| 899 |
-
compiled_transformer = _compile_wrapper()
|
| 900 |
-
if compiled_transformer is not None:
|
| 901 |
-
spaces.aoti_apply(compiled_transformer, pipe_t2i.transformer)
|
| 902 |
-
logger.info("AoTI transformer applied successfully (1.3x-1.8x speedup expected)")
|
| 903 |
-
else:
|
| 904 |
-
logger.warning("Using non-compiled transformer (no AoTI speedup)")
|
| 905 |
-
except Exception as e:
|
| 906 |
-
logger.error(f"AoTI application failed: {type(e).__name__}: {str(e)}")
|
| 907 |
-
logger.warning("Using non-compiled transformer")
|
| 908 |
-
else:
|
| 909 |
-
logger.info("AoTI compilation disabled via ENABLE_AOTI=false")
|
| 910 |
-
|
| 911 |
-
# Enable torch.compile for VAE decoder (keep existing optimization)
|
| 912 |
-
try:
|
| 913 |
-
pipe_t2i.vae.decode = torch.compile(
|
| 914 |
-
pipe_t2i.vae.decode,
|
| 915 |
-
mode="reduce-overhead",
|
| 916 |
-
)
|
| 917 |
-
logger.info("torch.compile enabled for VAE decoder")
|
| 918 |
-
except Exception as e:
|
| 919 |
-
logger.warning(f"VAE torch.compile failed: {e}")
|
| 920 |
-
|
| 921 |
-
# Create image-to-image pipeline (shares compiled transformer)
|
| 922 |
pipe_i2i = ZImageImg2ImgPipeline(
|
| 923 |
transformer=pipe_t2i.transformer,
|
| 924 |
vae=pipe_t2i.vae,
|
|
@@ -927,10 +873,46 @@ pipe_i2i = ZImageImg2ImgPipeline(
|
|
| 927 |
scheduler=pipe_t2i.scheduler,
|
| 928 |
)
|
| 929 |
|
| 930 |
-
if
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 934 |
|
| 935 |
STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
|
| 936 |
"Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]
|
|
@@ -1130,6 +1112,7 @@ def transform_with_polish(input_image: Optional[Image.Image], prompt: str, style
|
|
| 1130 |
@spaces.GPU(duration=120)
|
| 1131 |
def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
|
| 1132 |
"""Generate image from text prompt."""
|
|
|
|
| 1133 |
if randomize:
|
| 1134 |
seed = torch.randint(0, 2**32 - 1, (1,)).item()
|
| 1135 |
seed = int(seed)
|
|
@@ -1159,6 +1142,7 @@ def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, se
|
|
| 1159 |
@spaces.GPU(duration=90)
|
| 1160 |
def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
|
| 1161 |
"""Transform image using prompt guidance."""
|
|
|
|
| 1162 |
if input_image is None:
|
| 1163 |
return None, 0
|
| 1164 |
|
|
@@ -1207,6 +1191,7 @@ def transform(input_image: Optional[Image.Image], full_prompt: str, polished_dis
|
|
| 1207 |
def mcp_generate(prompt: str, style: str = "None", ratio: str = "1:1 Square (1024x1024)",
|
| 1208 |
steps: int = 9, seed: int = 42, randomize: bool = True) -> Tuple[Optional[Image.Image], int]:
|
| 1209 |
"""MCP-friendly image generation. Takes prompt directly and handles polish internally."""
|
|
|
|
| 1210 |
if randomize:
|
| 1211 |
seed = torch.randint(0, 2**32 - 1, (1,)).item()
|
| 1212 |
seed = int(seed)
|
|
@@ -1241,6 +1226,7 @@ def mcp_transform(image: Optional[Image.Image], prompt: str, style: str = "None"
|
|
| 1241 |
strength: float = 0.6, steps: int = 9, seed: int = 42,
|
| 1242 |
randomize: bool = True) -> Tuple[Optional[Image.Image], int]:
|
| 1243 |
"""MCP-friendly image transformation. Takes all parameters directly."""
|
|
|
|
| 1244 |
if image is None:
|
| 1245 |
return None, 0
|
| 1246 |
|
|
|
|
| 382 |
API_MAX_RETRIES = 2
|
| 383 |
MAX_DESCRIPTION_LENGTH = 6000 # For GLM prompt generation - doubled for very detailed descriptions
|
| 384 |
|
| 385 |
+
# Backend settings will be applied when GPU is available (inside @spaces.GPU functions)
|
| 386 |
+
# Don't set them here to avoid CUDA initialization at module load time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
# Singleton clients with timeout and retry
|
| 389 |
_deepseek_client: Optional[OpenAI] = None
|
|
|
|
| 856 |
# which requires special handling in torch.export. Enable with ENABLE_AOTI=true once fixed.
|
| 857 |
ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "false").lower() == "true"
|
| 858 |
|
| 859 |
+
logger.info("Loading Z-Image-Turbo pipeline (CPU)...")
|
| 860 |
|
| 861 |
pipe_t2i = DiffusionPipeline.from_pretrained(
|
| 862 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 863 |
torch_dtype=torch.bfloat16,
|
| 864 |
)
|
| 865 |
+
# Don't move to CUDA here - ZeroGPU requires GPU ops inside @spaces.GPU functions
|
| 866 |
|
| 867 |
+
# Create image-to-image pipeline (shares components)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 868 |
pipe_i2i = ZImageImg2ImgPipeline(
|
| 869 |
transformer=pipe_t2i.transformer,
|
| 870 |
vae=pipe_t2i.vae,
|
|
|
|
| 873 |
scheduler=pipe_t2i.scheduler,
|
| 874 |
)
|
| 875 |
|
| 876 |
+
# Track if pipelines have been moved to GPU and optimized
|
| 877 |
+
_gpu_initialized = False
|
| 878 |
+
|
| 879 |
+
def _ensure_gpu():
|
| 880 |
+
"""Move pipelines to GPU and apply optimizations (called inside @spaces.GPU functions)."""
|
| 881 |
+
global _gpu_initialized
|
| 882 |
+
if _gpu_initialized:
|
| 883 |
+
return
|
| 884 |
+
|
| 885 |
+
# Enable optimized backends
|
| 886 |
+
torch.backends.cuda.enable_flash_sdp(True)
|
| 887 |
+
torch.backends.cuda.enable_mem_efficient_sdp(True)
|
| 888 |
+
torch.backends.cudnn.benchmark = True
|
| 889 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 890 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 891 |
+
|
| 892 |
+
# Move to GPU
|
| 893 |
+
pipe_t2i.to("cuda")
|
| 894 |
+
|
| 895 |
+
# Enable FlashAttention-3 via kernels library (H100/H200 Hopper GPUs)
|
| 896 |
+
try:
|
| 897 |
+
pipe_t2i.transformer.set_attention_backend("_flash_3_hub")
|
| 898 |
+
logger.info("[GPU] FlashAttention-3 enabled via kernels library")
|
| 899 |
+
except Exception as e:
|
| 900 |
+
logger.warning(f"[GPU] FA3 not available, using default SDPA attention: {e}")
|
| 901 |
+
|
| 902 |
+
# Enable torch.compile for VAE decoder
|
| 903 |
+
try:
|
| 904 |
+
pipe_t2i.vae.decode = torch.compile(
|
| 905 |
+
pipe_t2i.vae.decode,
|
| 906 |
+
mode="reduce-overhead",
|
| 907 |
+
)
|
| 908 |
+
logger.info("[GPU] torch.compile enabled for VAE decoder")
|
| 909 |
+
except Exception as e:
|
| 910 |
+
logger.warning(f"[GPU] VAE torch.compile failed: {e}")
|
| 911 |
+
|
| 912 |
+
_gpu_initialized = True
|
| 913 |
+
logger.info("[GPU] Pipelines ready! (TF32 + FA3 + VAE compile)")
|
| 914 |
+
|
| 915 |
+
logger.info("Pipelines loaded on CPU - will move to GPU on first generation")
|
| 916 |
|
| 917 |
STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
|
| 918 |
"Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]
|
|
|
|
| 1112 |
@spaces.GPU(duration=120)
|
| 1113 |
def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
|
| 1114 |
"""Generate image from text prompt."""
|
| 1115 |
+
_ensure_gpu()
|
| 1116 |
if randomize:
|
| 1117 |
seed = torch.randint(0, 2**32 - 1, (1,)).item()
|
| 1118 |
seed = int(seed)
|
|
|
|
| 1142 |
@spaces.GPU(duration=90)
|
| 1143 |
def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
|
| 1144 |
"""Transform image using prompt guidance."""
|
| 1145 |
+
_ensure_gpu()
|
| 1146 |
if input_image is None:
|
| 1147 |
return None, 0
|
| 1148 |
|
|
|
|
| 1191 |
def mcp_generate(prompt: str, style: str = "None", ratio: str = "1:1 Square (1024x1024)",
|
| 1192 |
steps: int = 9, seed: int = 42, randomize: bool = True) -> Tuple[Optional[Image.Image], int]:
|
| 1193 |
"""MCP-friendly image generation. Takes prompt directly and handles polish internally."""
|
| 1194 |
+
_ensure_gpu()
|
| 1195 |
if randomize:
|
| 1196 |
seed = torch.randint(0, 2**32 - 1, (1,)).item()
|
| 1197 |
seed = int(seed)
|
|
|
|
| 1226 |
strength: float = 0.6, steps: int = 9, seed: int = 42,
|
| 1227 |
randomize: bool = True) -> Tuple[Optional[Image.Image], int]:
|
| 1228 |
"""MCP-friendly image transformation. Takes all parameters directly."""
|
| 1229 |
+
_ensure_gpu()
|
| 1230 |
if image is None:
|
| 1231 |
return None, 0
|
| 1232 |
|