Alexander Bagus commited on
Commit
52fdd94
·
1 Parent(s): 31717b2
Files changed (2) hide show
  1. app.py +35 -22
  2. utils/image_utils.py +3 -3
app.py CHANGED
@@ -8,7 +8,8 @@ from transformers import AutoTokenizer, Qwen3ForCausalLM
8
  from diffusers import AutoencoderKL
9
  from utils.image_utils import get_image_latent, scale_image
10
  from utils.prompt_utils import polish_prompt
11
- from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
 
12
  # from videox_fun.utils.utils import get_image_latent
13
 
14
 
@@ -76,30 +77,17 @@ pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
76
  spaces.aoti_blocks_load(pipe.transformer.layers,
77
  "zerogpu-aoti/Z-Image", variant="fa3")
78
 
79
- def prepare(prompt, input_image, control_mode='Canny'):
80
  polished_prompt = polish_prompt(prompt)
81
 
82
  return polished_prompt
83
 
84
- # if control_mode == 'HED':
85
- # processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
86
- # if control_mode =='Midas':
87
- # processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
88
- # if control_mode =='MLSD':
89
- # processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
90
- # if control_mode =='Pose':
91
- # processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
92
- # else:
93
- # processor = CannyDetector()
94
- # control_image = processor(input_image)
95
-
96
- # return polished_prompt, control_image
97
-
98
  @spaces.GPU
99
  def inference(
100
  prompt,
101
  input_image,
102
  image_scale=1.0,
 
103
  control_context_scale = 0.75,
104
  seed=42,
105
  randomize_seed=True,
@@ -114,12 +102,24 @@ def inference(
114
 
115
  input_image, width, height = scale_image(input_image, image_scale)
116
 
117
- control_image = get_image_latent(input_image, sample_size=[height, width])[:, :, 0]
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- # generation
120
- if randomize_seed:
121
- seed = random.randint(0, MAX_SEED)
122
 
 
 
123
  generator = torch.Generator().manual_seed(seed)
124
 
125
  image = pipe(
@@ -128,12 +128,12 @@ def inference(
128
  width=width,
129
  generator=generator,
130
  guidance_scale=guidance_scale,
131
- control_image=control_image,
132
  num_inference_steps=num_inference_steps,
133
  control_context_scale=control_context_scale,
134
  ).images[0]
135
 
136
- return image, seed
137
 
138
 
139
  def read_file(path: str) -> str:
@@ -236,6 +236,19 @@ with gr.Blocks(css=css) as demo:
236
  # fn=generate_image,
237
  # inputs=None, # This will automatically use the previous result
238
  # outputs=output
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  )
240
  # gr.on(
241
  # triggers=[run_button.click, prompt.submit],
 
8
  from diffusers import AutoencoderKL
9
  from utils.image_utils import get_image_latent, scale_image
10
  from utils.prompt_utils import polish_prompt
11
+ # from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector, CannyDetector, MidasDetector
12
+ from controlnet_aux.processor import Processor
13
  # from videox_fun.utils.utils import get_image_latent
14
 
15
 
 
77
  spaces.aoti_blocks_load(pipe.transformer.layers,
78
  "zerogpu-aoti/Z-Image", variant="fa3")
79
 
80
+ def prepare(prompt):
81
  polished_prompt = polish_prompt(prompt)
82
 
83
  return polished_prompt
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  @spaces.GPU
86
  def inference(
87
  prompt,
88
  input_image,
89
  image_scale=1.0,
90
+ control_mode='Canny',
91
  control_context_scale = 0.75,
92
  seed=42,
93
  randomize_seed=True,
 
102
 
103
  input_image, width, height = scale_image(input_image, image_scale)
104
 
105
+ if control_mode == 'HED':
106
+ processor = Processor('softedge_hed')
107
+ if control_mode =='Midas':
108
+ processor = Processor('depth_midas')
109
+ if control_mode =='MLSD':
110
+ processor = Processor('mlsd')
111
+ if control_mode =='Pose':
112
+ processor = Processor('openpose_full')
113
+ else:
114
+ processor = Processor('canny')
115
+
116
+ control_image = scale_image(input_image, image_scale, 64)
117
+ control_image = processor(control_image, to_pil=True)
118
 
119
+ control_image_torch = get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
 
 
120
 
121
+ # generation
122
+ if randomize_seed: seed = random.randint(0, MAX_SEED)
123
  generator = torch.Generator().manual_seed(seed)
124
 
125
  image = pipe(
 
128
  width=width,
129
  generator=generator,
130
  guidance_scale=guidance_scale,
131
+ control_image=control_image_torch,
132
  num_inference_steps=num_inference_steps,
133
  control_context_scale=control_context_scale,
134
  ).images[0]
135
 
136
+ return image, seed, control_image
137
 
138
 
139
  def read_file(path: str) -> str:
 
236
  # fn=generate_image,
237
  # inputs=None, # This will automatically use the previous result
238
  # outputs=output
239
+ fn=inference,
240
+ inputs=[
241
+ polished_prompt,
242
+ input_image,
243
+ image_scale,
244
+ "Canny",
245
+ control_context_scale,
246
+ seed,
247
+ randomize_seed,
248
+ guidance_scale,
249
+ num_inference_steps,
250
+ ],
251
+ outputs=[output_image, seed],
252
  )
253
  # gr.on(
254
  # triggers=[run_button.click, prompt.submit],
utils/image_utils.py CHANGED
@@ -2,14 +2,14 @@ import torch
2
  from PIL import Image
3
  import numpy as np
4
 
5
- def scale_image(img, scale):
6
  w, h = img.size
7
  new_w = int(w * scale)
8
  new_h = int(h * scale)
9
 
10
  # Adjust to nearest multiple of 32
11
- new_w = (new_w // 32) * 32
12
- new_h = (new_h // 32) * 32
13
 
14
  return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h
15
 
 
2
  from PIL import Image
3
  import numpy as np
4
 
5
+ def scale_image(img, scale, nearest=32):
6
  w, h = img.size
7
  new_w = int(w * scale)
8
  new_h = int(h * scale)
9
 
10
  # Adjust to nearest multiple of 32
11
+ new_w = (new_w // nearest) * nearest
12
+ new_h = (new_h // nearest) * nearest
13
 
14
  return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h
15