Spaces:

mcamargo00
/

math-solution-classifier

Paused

mcamargo00 commited on Aug 6

Commit

9f84309

verified ·

1 Parent(s): cecea85

Upload 2 files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -24,8 +24,9 @@ def load_model():
         # Load the LoRA adapter model for text generation
         model = AutoPeftModelForCausalLM.from_pretrained(
             "./lora_adapter",  # Path to your adapter files
-            torch_dtype=torch.float16,
-            device_map="auto"
         )
         # Load tokenizer from the same directory
@@ -75,6 +76,11 @@ Respond *only* with a valid JSON object that follows this exact schema:
 Do NOT add any text or explanations before or after the JSON object.
 """
 def classify_solution(question: str, solution: str):
     """
     Classify the math solution using the exact training format
@@ -113,14 +119,15 @@ def classify_solution(question: str, solution: str):
             max_length=2048  # Increased for longer prompts
         )
-        # Generate response (not just classify)
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=200,
                 temperature=0.1,
-                do_sample=True,
-                pad_token_id=tokenizer.pad_token_id
             )
         # Decode the generated response

         # Load the LoRA adapter model for text generation
         model = AutoPeftModelForCausalLM.from_pretrained(
             "./lora_adapter",  # Path to your adapter files
+            torch_dtype=torch.float32,  # Use float32 for CPU
+            device_map="cpu",  # Force CPU
+            low_cpu_mem_usage=True  # Optimize for low memory
         )
         # Load tokenizer from the same directory
 Do NOT add any text or explanations before or after the JSON object.
 """
+# Add this import at the top
+import spaces
+# Add this decorator to the classify function
+@spaces.GPU
 def classify_solution(question: str, solution: str):
     """
     Classify the math solution using the exact training format
             max_length=2048  # Increased for longer prompts
         )
+        # Generate response with CPU optimization
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=150,  # Reduced from 200
                 temperature=0.1,
+                do_sample=False,  # Faster greedy decoding
+                pad_token_id=tokenizer.pad_token_id,
+                use_cache=True  # Speed up generation
             )
         # Decode the generated response

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio
 torch
 transformers
 peft
-accelerate

 torch
 transformers
 peft
+accelerate
+spaces