Spaces:

AiSudo
/

ZIT-Controlnet

Running on Zero

App Files Files Community

Alexander Bagus commited on 10 days ago

Commit

52fdd94

1 Parent(s): 31717b2

22

Browse files

Files changed (2) hide show

app.py +35 -22
utils/image_utils.py +3 -3

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from transformers import AutoTokenizer, Qwen3ForCausalLM
 from diffusers import AutoencoderKL
 from utils.image_utils import get_image_latent, scale_image
 from utils.prompt_utils import polish_prompt
-from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
 # from videox_fun.utils.utils import get_image_latent
@@ -76,30 +77,17 @@ pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
 spaces.aoti_blocks_load(pipe.transformer.layers,
                         "zerogpu-aoti/Z-Image", variant="fa3")
-def prepare(prompt, input_image, control_mode='Canny'):
     polished_prompt = polish_prompt(prompt)
     return polished_prompt
-    # if control_mode == 'HED':
-    #     processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
-    # if control_mode =='Midas':
-    #     processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
-    # if control_mode =='MLSD':
-    #     processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
-    # if control_mode =='Pose':
-    #     processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
-    # else:
-    #     processor = CannyDetector()
-    # control_image = processor(input_image)
-    # return polished_prompt, control_image
 @spaces.GPU
 def inference(
     prompt,
     input_image,
     image_scale=1.0,
     control_context_scale = 0.75,
     seed=42,
     randomize_seed=True,
@@ -114,12 +102,24 @@ def inference(
     input_image, width, height = scale_image(input_image, image_scale)
-    control_image = get_image_latent(input_image, sample_size=[height, width])[:, :, 0]
-    # generation
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     image = pipe(
@@ -128,12 +128,12 @@ def inference(
         width=width,
         generator=generator,
         guidance_scale=guidance_scale,
-        control_image=control_image,
         num_inference_steps=num_inference_steps,
         control_context_scale=control_context_scale,
     ).images[0]
-    return image, seed
 def read_file(path: str) -> str:
@@ -236,6 +236,19 @@ with gr.Blocks(css=css) as demo:
         # fn=generate_image,
         # inputs=None,   # This will automatically use the previous result
         # outputs=output
     )
     # gr.on(
     #     triggers=[run_button.click, prompt.submit],

 from diffusers import AutoencoderKL
 from utils.image_utils import get_image_latent, scale_image
 from utils.prompt_utils import polish_prompt
+# from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
+from controlnet_aux.processor import Processor
 # from videox_fun.utils.utils import get_image_latent
 spaces.aoti_blocks_load(pipe.transformer.layers,
                         "zerogpu-aoti/Z-Image", variant="fa3")
+def prepare(prompt):
     polished_prompt = polish_prompt(prompt)
     return polished_prompt
 @spaces.GPU
 def inference(
     prompt,
     input_image,
     image_scale=1.0,
+    control_mode='Canny',
     control_context_scale = 0.75,
     seed=42,
     randomize_seed=True,
     input_image, width, height = scale_image(input_image, image_scale)
+    if control_mode == 'HED':
+        processor = Processor('softedge_hed')
+    if control_mode =='Midas':
+        processor = Processor('depth_midas')
+    if control_mode =='MLSD':
+        processor = Processor('mlsd')
+    if control_mode =='Pose':
+        processor = Processor('openpose_full')
+    else:
+        processor = Processor('canny')
+    control_image = scale_image(input_image, image_scale, 64)
+    control_image = processor(control_image, to_pil=True)
+    control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
+    # generation
+    if randomize_seed: seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     image = pipe(
         width=width,
         generator=generator,
         guidance_scale=guidance_scale,
+        control_image=control_image_torch,
         num_inference_steps=num_inference_steps,
         control_context_scale=control_context_scale,
     ).images[0]
+    return image, seed, control_image
 def read_file(path: str) -> str:
         # fn=generate_image,
         # inputs=None,   # This will automatically use the previous result
         # outputs=output
+        fn=inference,
+        inputs=[
+            polished_prompt,
+            input_image,
+            image_scale,
+            "Canny",
+            control_context_scale,
+            seed,
+            randomize_seed,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[output_image, seed],
     )
     # gr.on(
     #     triggers=[run_button.click, prompt.submit],

utils/image_utils.py CHANGED Viewed

@@ -2,14 +2,14 @@ import torch
 from PIL import Image
 import numpy as np
-def scale_image(img, scale):
     w, h = img.size
     new_w = int(w * scale)
     new_h = int(h * scale)
     # Adjust to nearest multiple of 32
-    new_w = (new_w // 32) * 32
-    new_h = (new_h // 32) * 32
     return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h

 from PIL import Image
 import numpy as np
+def scale_image(img, scale, nearest=32):
     w, h = img.size
     new_w = int(w * scale)
     new_h = int(h * scale)
     # Adjust to nearest multiple of 32
+    new_w = (new_w // nearest) * nearest
+    new_h = (new_h // nearest) * nearest
     return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h