Upload 2 files
Browse files- app.py +13 -6
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -24,8 +24,9 @@ def load_model():
|
|
| 24 |
# Load the LoRA adapter model for text generation
|
| 25 |
model = AutoPeftModelForCausalLM.from_pretrained(
|
| 26 |
"./lora_adapter", # Path to your adapter files
|
| 27 |
-
torch_dtype=torch.
|
| 28 |
-
device_map="
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Load tokenizer from the same directory
|
|
@@ -75,6 +76,11 @@ Respond *only* with a valid JSON object that follows this exact schema:
|
|
| 75 |
Do NOT add any text or explanations before or after the JSON object.
|
| 76 |
"""
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def classify_solution(question: str, solution: str):
|
| 79 |
"""
|
| 80 |
Classify the math solution using the exact training format
|
|
@@ -113,14 +119,15 @@ def classify_solution(question: str, solution: str):
|
|
| 113 |
max_length=2048 # Increased for longer prompts
|
| 114 |
)
|
| 115 |
|
| 116 |
-
# Generate response
|
| 117 |
with torch.no_grad():
|
| 118 |
outputs = model.generate(
|
| 119 |
**inputs,
|
| 120 |
-
max_new_tokens=200
|
| 121 |
temperature=0.1,
|
| 122 |
-
do_sample=
|
| 123 |
-
pad_token_id=tokenizer.pad_token_id
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
# Decode the generated response
|
|
|
|
| 24 |
# Load the LoRA adapter model for text generation
|
| 25 |
model = AutoPeftModelForCausalLM.from_pretrained(
|
| 26 |
"./lora_adapter", # Path to your adapter files
|
| 27 |
+
torch_dtype=torch.float32, # Use float32 for CPU
|
| 28 |
+
device_map="cpu", # Force CPU
|
| 29 |
+
low_cpu_mem_usage=True # Optimize for low memory
|
| 30 |
)
|
| 31 |
|
| 32 |
# Load tokenizer from the same directory
|
|
|
|
| 76 |
Do NOT add any text or explanations before or after the JSON object.
|
| 77 |
"""
|
| 78 |
|
| 79 |
+
# Add this import at the top
|
| 80 |
+
import spaces
|
| 81 |
+
|
| 82 |
+
# Add this decorator to the classify function
|
| 83 |
+
@spaces.GPU
|
| 84 |
def classify_solution(question: str, solution: str):
|
| 85 |
"""
|
| 86 |
Classify the math solution using the exact training format
|
|
|
|
| 119 |
max_length=2048 # Increased for longer prompts
|
| 120 |
)
|
| 121 |
|
| 122 |
+
# Generate response with CPU optimization
|
| 123 |
with torch.no_grad():
|
| 124 |
outputs = model.generate(
|
| 125 |
**inputs,
|
| 126 |
+
max_new_tokens=150, # Reduced from 200
|
| 127 |
temperature=0.1,
|
| 128 |
+
do_sample=False, # Faster greedy decoding
|
| 129 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 130 |
+
use_cache=True # Speed up generation
|
| 131 |
)
|
| 132 |
|
| 133 |
# Decode the generated response
|
requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ gradio
|
|
| 2 |
torch
|
| 3 |
transformers
|
| 4 |
peft
|
| 5 |
-
accelerate
|
|
|
|
|
|
| 2 |
torch
|
| 3 |
transformers
|
| 4 |
peft
|
| 5 |
+
accelerate
|
| 6 |
+
spaces
|