Spaces:
Running
on
Zero
Running
on
Zero
Darius Morawiec
commited on
Commit
·
449fa2a
1
Parent(s):
3d8d21b
Add download notification
Browse files
app.py
CHANGED
|
@@ -217,7 +217,9 @@ with gr.Blocks() as demo:
|
|
| 217 |
with gr.Row():
|
| 218 |
run_button = gr.Button("Run")
|
| 219 |
|
| 220 |
-
def load_model(
|
|
|
|
|
|
|
| 221 |
global current_model, current_processor, current_model_id
|
| 222 |
|
| 223 |
# Only load model if it's different from the currently loaded one
|
|
@@ -237,11 +239,17 @@ with gr.Blocks() as demo:
|
|
| 237 |
torch.cuda.empty_cache()
|
| 238 |
torch.cuda.synchronize()
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
current_model = AutoModel.from_pretrained(
|
| 241 |
model_id, dtype="auto", device_map="cpu"
|
| 242 |
)
|
| 243 |
current_processor = AutoProcessor.from_pretrained(model_id)
|
| 244 |
current_model_id = model_id
|
|
|
|
| 245 |
return current_model, current_processor
|
| 246 |
|
| 247 |
@spaces.GPU
|
|
@@ -342,10 +350,10 @@ with gr.Blocks() as demo:
|
|
| 342 |
image_resize: str = "Yes",
|
| 343 |
image_target_size: int | None = None,
|
| 344 |
):
|
| 345 |
-
# Load the model and processor on CPU
|
| 346 |
model, processor = load_model(model_id)
|
| 347 |
|
| 348 |
-
# Run inference on GPU
|
| 349 |
return generate(
|
| 350 |
model,
|
| 351 |
processor,
|
|
|
|
| 217 |
with gr.Row():
|
| 218 |
run_button = gr.Button("Run")
|
| 219 |
|
| 220 |
+
def load_model(
|
| 221 |
+
model_id: str,
|
| 222 |
+
):
|
| 223 |
global current_model, current_processor, current_model_id
|
| 224 |
|
| 225 |
# Only load model if it's different from the currently loaded one
|
|
|
|
| 239 |
torch.cuda.empty_cache()
|
| 240 |
torch.cuda.synchronize()
|
| 241 |
|
| 242 |
+
gr.Info(
|
| 243 |
+
f"Downloading and loading <strong>{model_id.removeprefix('Qwen/')}</strong> model files ...",
|
| 244 |
+
duration=10,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
current_model = AutoModel.from_pretrained(
|
| 248 |
model_id, dtype="auto", device_map="cpu"
|
| 249 |
)
|
| 250 |
current_processor = AutoProcessor.from_pretrained(model_id)
|
| 251 |
current_model_id = model_id
|
| 252 |
+
|
| 253 |
return current_model, current_processor
|
| 254 |
|
| 255 |
@spaces.GPU
|
|
|
|
| 350 |
image_resize: str = "Yes",
|
| 351 |
image_target_size: int | None = None,
|
| 352 |
):
|
| 353 |
+
# Load the model and processor (on CPU)
|
| 354 |
model, processor = load_model(model_id)
|
| 355 |
|
| 356 |
+
# Run inference (on GPU *if available)
|
| 357 |
return generate(
|
| 358 |
model,
|
| 359 |
processor,
|