Spaces:
Running
on
Zero
Running
on
Zero
Alexander Bagus
commited on
Commit
·
52fdd94
1
Parent(s):
31717b2
22
Browse files- app.py +35 -22
- utils/image_utils.py +3 -3
app.py
CHANGED
|
@@ -8,7 +8,8 @@ from transformers import AutoTokenizer, Qwen3ForCausalLM
|
|
| 8 |
from diffusers import AutoencoderKL
|
| 9 |
from utils.image_utils import get_image_latent, scale_image
|
| 10 |
from utils.prompt_utils import polish_prompt
|
| 11 |
-
from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
|
|
|
|
| 12 |
# from videox_fun.utils.utils import get_image_latent
|
| 13 |
|
| 14 |
|
|
@@ -76,30 +77,17 @@ pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
|
|
| 76 |
spaces.aoti_blocks_load(pipe.transformer.layers,
|
| 77 |
"zerogpu-aoti/Z-Image", variant="fa3")
|
| 78 |
|
| 79 |
-
def prepare(prompt
|
| 80 |
polished_prompt = polish_prompt(prompt)
|
| 81 |
|
| 82 |
return polished_prompt
|
| 83 |
|
| 84 |
-
# if control_mode == 'HED':
|
| 85 |
-
# processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
| 86 |
-
# if control_mode =='Midas':
|
| 87 |
-
# processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
| 88 |
-
# if control_mode =='MLSD':
|
| 89 |
-
# processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
|
| 90 |
-
# if control_mode =='Pose':
|
| 91 |
-
# processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
|
| 92 |
-
# else:
|
| 93 |
-
# processor = CannyDetector()
|
| 94 |
-
# control_image = processor(input_image)
|
| 95 |
-
|
| 96 |
-
# return polished_prompt, control_image
|
| 97 |
-
|
| 98 |
@spaces.GPU
|
| 99 |
def inference(
|
| 100 |
prompt,
|
| 101 |
input_image,
|
| 102 |
image_scale=1.0,
|
|
|
|
| 103 |
control_context_scale = 0.75,
|
| 104 |
seed=42,
|
| 105 |
randomize_seed=True,
|
|
@@ -114,12 +102,24 @@ def inference(
|
|
| 114 |
|
| 115 |
input_image, width, height = scale_image(input_image, image_scale)
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
if randomize_seed:
|
| 121 |
-
seed = random.randint(0, MAX_SEED)
|
| 122 |
|
|
|
|
|
|
|
| 123 |
generator = torch.Generator().manual_seed(seed)
|
| 124 |
|
| 125 |
image = pipe(
|
|
@@ -128,12 +128,12 @@ def inference(
|
|
| 128 |
width=width,
|
| 129 |
generator=generator,
|
| 130 |
guidance_scale=guidance_scale,
|
| 131 |
-
control_image=
|
| 132 |
num_inference_steps=num_inference_steps,
|
| 133 |
control_context_scale=control_context_scale,
|
| 134 |
).images[0]
|
| 135 |
|
| 136 |
-
return image, seed
|
| 137 |
|
| 138 |
|
| 139 |
def read_file(path: str) -> str:
|
|
@@ -236,6 +236,19 @@ with gr.Blocks(css=css) as demo:
|
|
| 236 |
# fn=generate_image,
|
| 237 |
# inputs=None, # This will automatically use the previous result
|
| 238 |
# outputs=output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
)
|
| 240 |
# gr.on(
|
| 241 |
# triggers=[run_button.click, prompt.submit],
|
|
|
|
| 8 |
from diffusers import AutoencoderKL
|
| 9 |
from utils.image_utils import get_image_latent, scale_image
|
| 10 |
from utils.prompt_utils import polish_prompt
|
| 11 |
+
# from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
|
| 12 |
+
from controlnet_aux.processor import Processor
|
| 13 |
# from videox_fun.utils.utils import get_image_latent
|
| 14 |
|
| 15 |
|
|
|
|
| 77 |
spaces.aoti_blocks_load(pipe.transformer.layers,
|
| 78 |
"zerogpu-aoti/Z-Image", variant="fa3")
|
| 79 |
|
| 80 |
+
def prepare(prompt):
|
| 81 |
polished_prompt = polish_prompt(prompt)
|
| 82 |
|
| 83 |
return polished_prompt
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
@spaces.GPU
|
| 86 |
def inference(
|
| 87 |
prompt,
|
| 88 |
input_image,
|
| 89 |
image_scale=1.0,
|
| 90 |
+
control_mode='Canny',
|
| 91 |
control_context_scale = 0.75,
|
| 92 |
seed=42,
|
| 93 |
randomize_seed=True,
|
|
|
|
| 102 |
|
| 103 |
input_image, width, height = scale_image(input_image, image_scale)
|
| 104 |
|
| 105 |
+
if control_mode == 'HED':
|
| 106 |
+
processor = Processor('softedge_hed')
|
| 107 |
+
if control_mode =='Midas':
|
| 108 |
+
processor = Processor('depth_midas')
|
| 109 |
+
if control_mode =='MLSD':
|
| 110 |
+
processor = Processor('mlsd')
|
| 111 |
+
if control_mode =='Pose':
|
| 112 |
+
processor = Processor('openpose_full')
|
| 113 |
+
else:
|
| 114 |
+
processor = Processor('canny')
|
| 115 |
+
|
| 116 |
+
control_image = scale_image(input_image, image_scale, 64)
|
| 117 |
+
control_image = processor(control_image, to_pil=True)
|
| 118 |
|
| 119 |
+
control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
# generation
|
| 122 |
+
if randomize_seed: seed = random.randint(0, MAX_SEED)
|
| 123 |
generator = torch.Generator().manual_seed(seed)
|
| 124 |
|
| 125 |
image = pipe(
|
|
|
|
| 128 |
width=width,
|
| 129 |
generator=generator,
|
| 130 |
guidance_scale=guidance_scale,
|
| 131 |
+
control_image=control_image_torch,
|
| 132 |
num_inference_steps=num_inference_steps,
|
| 133 |
control_context_scale=control_context_scale,
|
| 134 |
).images[0]
|
| 135 |
|
| 136 |
+
return image, seed, control_image
|
| 137 |
|
| 138 |
|
| 139 |
def read_file(path: str) -> str:
|
|
|
|
| 236 |
# fn=generate_image,
|
| 237 |
# inputs=None, # This will automatically use the previous result
|
| 238 |
# outputs=output
|
| 239 |
+
fn=inference,
|
| 240 |
+
inputs=[
|
| 241 |
+
polished_prompt,
|
| 242 |
+
input_image,
|
| 243 |
+
image_scale,
|
| 244 |
+
"Canny",
|
| 245 |
+
control_context_scale,
|
| 246 |
+
seed,
|
| 247 |
+
randomize_seed,
|
| 248 |
+
guidance_scale,
|
| 249 |
+
num_inference_steps,
|
| 250 |
+
],
|
| 251 |
+
outputs=[output_image, seed],
|
| 252 |
)
|
| 253 |
# gr.on(
|
| 254 |
# triggers=[run_button.click, prompt.submit],
|
utils/image_utils.py
CHANGED
|
@@ -2,14 +2,14 @@ import torch
|
|
| 2 |
from PIL import Image
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
def scale_image(img, scale):
|
| 6 |
w, h = img.size
|
| 7 |
new_w = int(w * scale)
|
| 8 |
new_h = int(h * scale)
|
| 9 |
|
| 10 |
# Adjust to nearest multiple of 32
|
| 11 |
-
new_w = (new_w //
|
| 12 |
-
new_h = (new_h //
|
| 13 |
|
| 14 |
return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h
|
| 15 |
|
|
|
|
| 2 |
from PIL import Image
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
+
def scale_image(img, scale, nearest=32):
|
| 6 |
w, h = img.size
|
| 7 |
new_w = int(w * scale)
|
| 8 |
new_h = int(h * scale)
|
| 9 |
|
| 10 |
# Adjust to nearest multiple of 32
|
| 11 |
+
new_w = (new_w // nearest) * nearest
|
| 12 |
+
new_h = (new_h // nearest) * nearest
|
| 13 |
|
| 14 |
return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h
|
| 15 |
|