velocity-ai
/

phi-3.5-address-validation-pretrained

Text Classification

classification-inference

text-generation-inference

Model card Files Files and versions

velocity-ai commited on Jul 28, 2025

Commit

0ebdffc

·

verified ·

1 Parent(s): c410e3a

Update code/inference.py

Files changed (1) hide show

code/inference.py +30 -4

code/inference.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import json
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import logging
 logger = logging.getLogger(__name__)
@@ -11,6 +12,21 @@ logger = logging.getLogger(__name__)
 # Can specify GPU device with:
 # CUDA_VISIBLE_DEVICES="1" python script.py
 def model_fn(model_dir, context=None):
     """Load the model for inference"""
     try:
@@ -22,15 +38,25 @@ def model_fn(model_dir, context=None):
             torch.cuda.empty_cache()
         logger.info(f"Using device: {device}")
-        # Load tokenizer and model directly using AutoModelForSequenceClassification
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-        model = AutoModelForSequenceClassification.from_pretrained(
             model_id,
-            num_labels=2,
             torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
             trust_remote_code=True
         )
         # Move model to device
         model = model.to(device)

 import os
 import json
 import torch
+import torch.nn as nn
+from transformers import AutoModel, AutoTokenizer, AutoConfig
 import logging
 logger = logging.getLogger(__name__)
 # Can specify GPU device with:
 # CUDA_VISIBLE_DEVICES="1" python script.py
+class PhiForSequenceClassification(nn.Module):
+    def __init__(self, base_model, num_labels=2):
+        super().__init__()
+        self.phi = base_model
+        # Create classifier with same dtype as base model
+        dtype = next(base_model.parameters()).dtype
+        self.classifier = nn.Linear(self.phi.config.hidden_size, num_labels, dtype=dtype)
+    def forward(self, **inputs):
+        outputs = self.phi(**inputs, output_hidden_states=True)
+        # Use the last hidden state of the last token for classification
+        last_hidden_state = outputs.hidden_states[-1][:, -1, :]
+        logits = self.classifier(last_hidden_state)
+        return type('Outputs', (), {'logits': logits})()
 def model_fn(model_dir, context=None):
     """Load the model for inference"""
     try:
             torch.cuda.empty_cache()
         logger.info(f"Using device: {device}")
+        # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+        # Load config and specify it's a Phi3Config
+        config = AutoConfig.from_pretrained(model_id,
+                                          num_labels=2,
+                                          trust_remote_code=True)
+        # Load base model
+        base_model = AutoModel.from_pretrained(
             model_id,
+            config=config,
             torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
             trust_remote_code=True
         )
+        # Create classification model
+        model = PhiForSequenceClassification(base_model, num_labels=2)
         # Move model to device
         model = model.to(device)