Spaces:
Running
on
Zero
Running
on
Zero
Darius Morawiec
commited on
Commit
·
a84c724
1
Parent(s):
bc51dfa
Load model on CPU during download and move to CUDA if available
Browse files
app.py
CHANGED
|
@@ -184,11 +184,10 @@ with gr.Blocks() as demo:
|
|
| 184 |
elif model_id.startswith("Qwen/Qwen3-VL"):
|
| 185 |
model_loader = Qwen3VLForConditionalGeneration
|
| 186 |
assert model_loader is not None, f"Unsupported model ID: {model_id}"
|
|
|
|
| 187 |
current_model = model_loader.from_pretrained(
|
| 188 |
-
model_id,
|
| 189 |
-
|
| 190 |
-
device_map="auto",
|
| 191 |
-
).eval()
|
| 192 |
current_processor = AutoProcessor.from_pretrained(model_id)
|
| 193 |
current_model_id = model_id
|
| 194 |
return current_model, current_processor
|
|
@@ -205,6 +204,10 @@ with gr.Blocks() as demo:
|
|
| 205 |
image_resize: str,
|
| 206 |
image_target_size: int | None,
|
| 207 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
base64_image = image_to_base64(
|
| 209 |
resize_image(image, image_target_size)
|
| 210 |
if image_resize == "Yes" and image_target_size
|
|
|
|
| 184 |
elif model_id.startswith("Qwen/Qwen3-VL"):
|
| 185 |
model_loader = Qwen3VLForConditionalGeneration
|
| 186 |
assert model_loader is not None, f"Unsupported model ID: {model_id}"
|
| 187 |
+
# Load model on CPU to avoid using CUDA resources during download
|
| 188 |
current_model = model_loader.from_pretrained(
|
| 189 |
+
model_id, torch_dtype=torch.bfloat16, device_map="cpu"
|
| 190 |
+
)
|
|
|
|
|
|
|
| 191 |
current_processor = AutoProcessor.from_pretrained(model_id)
|
| 192 |
current_model_id = model_id
|
| 193 |
return current_model, current_processor
|
|
|
|
| 204 |
image_resize: str,
|
| 205 |
image_target_size: int | None,
|
| 206 |
):
|
| 207 |
+
# Move model to CUDA if available (inside @spaces.GPU decorated function)
|
| 208 |
+
model = model.to(DEVICE)
|
| 209 |
+
model.eval()
|
| 210 |
+
|
| 211 |
base64_image = image_to_base64(
|
| 212 |
resize_image(image, image_target_size)
|
| 213 |
if image_resize == "Yes" and image_target_size
|