Spaces:

dmorawiec
/

Qwen-VL-Object-Detection

Running on Zero

App Files Files Community

Darius Morawiec commited on about 1 month ago

Commit

9afc0f5

1 Parent(s): 028f4ca

Refactor model loading and processing

Browse files

Files changed (1) hide show

app.py +20 -14

app.py CHANGED Viewed

@@ -145,17 +145,8 @@ with gr.Blocks() as demo:
     current_processor = None
     current_model_id = None
-    @spaces.GPU(duration=300)
-    def run(
-        image,
-        model_id: str,
-        system_prompt: str,
-        user_prompt: str,
-        max_new_tokens: int = 1024,
-        image_target_size: int | None = None,
-    ):
         global current_model, current_processor, current_model_id
-        scale = False if model_id.startswith("Qwen/Qwen2.5-VL") else True
         # Only load model if it's different from the currently loaded one
         if current_model_id != model_id or current_model is None:
@@ -176,12 +167,14 @@ with gr.Blocks() as demo:
                 torch.cuda.synchronize()
             # Load new model
             if model_id.startswith("Qwen/Qwen2-VL"):
                 model_loader = Qwen2VLForConditionalGeneration
             elif model_id.startswith("Qwen/Qwen2.5-VL"):
                 model_loader = Qwen2_5_VLForConditionalGeneration
             elif model_id.startswith("Qwen/Qwen3-VL"):
                 model_loader = Qwen3VLForConditionalGeneration
             current_model = model_loader.from_pretrained(
                 model_id,
                 torch_dtype="auto",
@@ -190,13 +183,21 @@ with gr.Blocks() as demo:
             current_processor = AutoProcessor.from_pretrained(model_id)
             current_model_id = model_id
-        model = current_model
-        processor = current_processor
         base64_image = image_to_base64(
             scale_image(image, image_target_size) if image_target_size else image
         )
         messages = [
             {
                 "role": "user",
@@ -226,7 +227,11 @@ with gr.Blocks() as demo:
         )
         inputs = inputs.to(DEVICE)
-        generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :]
             for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
@@ -241,6 +246,7 @@ with gr.Blocks() as demo:
         output_text = repair_json(output_text)
         output_json = json.loads(output_text)
         x_scale = float(image.width / 1000) if scale else 1.0
         y_scale = float(image.height / 1000) if scale else 1.0
         bboxes = []

     current_processor = None
     current_model_id = None
+    def load_model(model_id: str):
         global current_model, current_processor, current_model_id
         # Only load model if it's different from the currently loaded one
         if current_model_id != model_id or current_model is None:
                 torch.cuda.synchronize()
             # Load new model
+            model_loader = None
             if model_id.startswith("Qwen/Qwen2-VL"):
                 model_loader = Qwen2VLForConditionalGeneration
             elif model_id.startswith("Qwen/Qwen2.5-VL"):
                 model_loader = Qwen2_5_VLForConditionalGeneration
             elif model_id.startswith("Qwen/Qwen3-VL"):
                 model_loader = Qwen3VLForConditionalGeneration
+            assert model_loader is not None, f"Unsupported model ID: {model_id}"
             current_model = model_loader.from_pretrained(
                 model_id,
                 torch_dtype="auto",
             current_processor = AutoProcessor.from_pretrained(model_id)
             current_model_id = model_id
+        return current_model, current_processor
+    def run(
+        image,
+        model_id: str,
+        system_prompt: str,
+        user_prompt: str,
+        max_new_tokens: int = 1024,
+        image_target_size: int | None = None,
+    ):
+        model, processor = load_model(model_id)
         base64_image = image_to_base64(
             scale_image(image, image_target_size) if image_target_size else image
         )
         messages = [
             {
                 "role": "user",
         )
         inputs = inputs.to(DEVICE)
+        @spaces.GPU(duration=300)
+        def _generate(**kwargs):
+            return model.generate(**kwargs)
+        generated_ids = _generate(**inputs, max_new_tokens=max_new_tokens)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :]
             for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         output_text = repair_json(output_text)
         output_json = json.loads(output_text)
+        scale = False if model_id.startswith("Qwen/Qwen2.5-VL") else True
         x_scale = float(image.width / 1000) if scale else 1.0
         y_scale = float(image.height / 1000) if scale else 1.0
         bboxes = []