feat: merged checkpoint, modified qwen, readme

Browse files

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>

Files changed (9) hide show

README.md +5 -9
adapters/{retrieval/adapter_config.json → adapter_config.json} +0 -0
adapters/{text-matching/adapter_model.safetensors → adapter_model.safetensors} +2 -2
adapters/code/adapter_config.json +0 -26
adapters/code/adapter_model.safetensors +0 -3
adapters/retrieval/adapter_model.safetensors +0 -3
adapters/text-matching/adapter_config.json +0 -26
modeling_jina_embeddings_v4.py +5 -2
qwen2_5_vl.py +10 -9

README.md CHANGED Viewed

@@ -22,11 +22,9 @@ image_paths = ['/<path_to_image>']
 images = [Image.open(path) for path in image_paths]
 # Example 1: Text matching task with single vector embeddings
-model.set_task(task='text-matching')
 # Generate embeddings with dimension truncation (256), decrease max_pixels
-img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
-text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
 # Example 2: Retrieval task with multi-vector embeddings
 model.set_task(task='retrieval')
@@ -36,10 +34,8 @@ img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
 text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
 # Example 3: Code task with single vector embeddings
-model.set_task(task='code')
 code = ["def hello_world():\n    print('Hello, World!')"]
-code_embeddings = model.encode_texts(texts=code)
 ```
@@ -75,8 +71,8 @@ with torch.no_grad():
     with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
         # Get embeddings
-        text_embeddings = model.model(**text_batch).single_vec_emb
-        img_embeddings = model.model(**image_batch).single_vec_emb
 ```

 images = [Image.open(path) for path in image_paths]
 # Example 1: Text matching task with single vector embeddings
 # Generate embeddings with dimension truncation (256), decrease max_pixels
+img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112, task='text-matching')
+text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512, task='text-matching')
 # Example 2: Retrieval task with multi-vector embeddings
 model.set_task(task='retrieval')
 text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
 # Example 3: Code task with single vector embeddings
 code = ["def hello_world():\n    print('Hello, World!')"]
+code_embeddings = model.encode_texts(texts=code, task='code')
 ```
     with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
         # Get embeddings
+        text_embeddings = model.model(**text_batch, task_label='retrieval').single_vec_emb
+        img_embeddings = model.model(**image_batch, task_label='retrieval').single_vec_emb
 ```

adapters/{retrieval/adapter_config.json → adapter_config.json} RENAMED Viewed

File without changes

adapters/{text-matching/adapter_model.safetensors → adapter_model.safetensors} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3677815cef695c54aae2358c574c046d6d9a5787fd96ca457ee00ac656576985
-size 120138416

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a5cb8cc0f4e10f184ccc10f8864999098b887dbc4107221ec0e400d927f4555
+size 360095344

adapters/code/adapter_config.json DELETED Viewed

@@ -1,26 +0,0 @@
-{
-  "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "jinaai/colqwen25-duo-base",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": false,
-  "init_lora_weights": "gaussian",
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0.1,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": null,
-  "peft_type": "LORA",
-  "r": 32,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
-  "task_type": "FEATURE_EXTRACTION",
-  "use_dora": false,
-  "use_rslora": false
-}

adapters/code/adapter_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:510d017efc64c97e2db985ed1a96b17477ac97e1a5470996209041ad35beeee7
-size 119802032

adapters/retrieval/adapter_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0c2b1d85506d01bd29a942975cb0abbd8c4af3487fb80b5ad408ae0e55f8bb3a
-size 120138416

adapters/text-matching/adapter_config.json DELETED Viewed

@@ -1,26 +0,0 @@
-{
-  "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "jinaai/colqwen25-duo-base",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": "gaussian",
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0.1,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": null,
-  "peft_type": "LORA",
-  "r": 32,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
-  "task_type": "FEATURE_EXTRACTION",
-  "use_dora": false,
-  "use_rslora": false
-}

modeling_jina_embeddings_v4.py CHANGED Viewed

@@ -522,6 +522,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         """
         if "torch_dtype" not in kwargs:
             kwargs["torch_dtype"] = "auto"
         base_model = super().from_pretrained(
             pretrained_model_name_or_path, *args, **kwargs
@@ -536,7 +539,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             )
             adapter_dir = os.path.join(adapter_cache_path, "adapters")
-        lora_config = LoraConfig.from_pretrained(os.path.join(adapter_dir, "test"))
         lora_config._custom_modules = {
             torch.nn.modules.linear.Linear: partial(
                 MultiAdapterLinear,
@@ -545,7 +548,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         }
         peft_model = PeftModel.from_pretrained(
             model=base_model,
-            model_id=os.path.join(adapter_dir, "test"),
             config=lora_config,
         )

         """
         if "torch_dtype" not in kwargs:
             kwargs["torch_dtype"] = "auto"
+        if torch.cuda.is_available() and "attn_implementation" not in kwargs:
+            kwargs["attn_implementation"] = "flash_attention_2"
         base_model = super().from_pretrained(
             pretrained_model_name_or_path, *args, **kwargs
             )
             adapter_dir = os.path.join(adapter_cache_path, "adapters")
+        lora_config = LoraConfig.from_pretrained(adapter_dir)
         lora_config._custom_modules = {
             torch.nn.modules.linear.Linear: partial(
                 MultiAdapterLinear,
         }
         peft_model = PeftModel.from_pretrained(
             model=base_model,
+            model_id=adapter_dir,
             config=lora_config,
         )

qwen2_5_vl.py CHANGED Viewed

@@ -945,6 +945,7 @@ class Qwen2_5_VLAttention(nn.Module):
     def forward(
         self,
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
@@ -956,9 +957,9 @@ class Qwen2_5_VLAttention(nn.Module):
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         bsz, q_len, _ = hidden_states.size()
-        query_states = self.q_proj(hidden_states)
-        key_states = self.k_proj(hidden_states)
-        value_states = self.v_proj(hidden_states)
         query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
@@ -1002,7 +1003,7 @@ class Qwen2_5_VLAttention(nn.Module):
         attn_output = attn_output.transpose(1, 2).contiguous()
         attn_output = attn_output.reshape(bsz, q_len, -1)
-        attn_output = self.o_proj(attn_output)
         if not output_attentions:
             attn_weights = None
@@ -1021,7 +1022,6 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
         # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
@@ -1029,6 +1029,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
     def forward(
         self,
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
@@ -1040,9 +1041,9 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
     ):
         bsz, q_len, _ = hidden_states.size()
-        query_states = self.q_proj(hidden_states)
-        key_states = self.k_proj(hidden_states)
-        value_states = self.v_proj(hidden_states)
         query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
@@ -1113,7 +1114,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
         )
         attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
-        attn_output = self.o_proj(attn_output)
         if not output_attentions:
             attn_weights = None

     def forward(
         self,
+        task_label: Union[str, List[str]],
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states, task_label=task_label)
+        key_states = self.k_proj(hidden_states, task_label=task_label)
+        value_states = self.v_proj(hidden_states, task_label=task_label)
         query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         attn_output = attn_output.transpose(1, 2).contiguous()
         attn_output = attn_output.reshape(bsz, q_len, -1)
+        attn_output = self.o_proj(attn_output, task_label=task_label)
         if not output_attentions:
             attn_weights = None
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
         # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
     def forward(
         self,
+        task_label: Union[str, List[str]],
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
     ):
         bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states, task_label=task_label)
+        key_states = self.k_proj(hidden_states, task_label=task_label)
+        value_states = self.v_proj(hidden_states, task_label=task_label)
         query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
         )
         attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
+        attn_output = self.o_proj(attn_output, task_label=task_label)
         if not output_attentions:
             attn_weights = None