robinhad's picture
Update app.py
dfe5d92 verified
import os
import subprocess
import tempfile
import threading
import spaces
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification
from kernels import get_kernel
from typing import Any, Optional, Dict
import numpy as np
# Login to HF to get access to the model weights
HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')
from huggingface_hub import login
login(token=HF_LE_LLM_READ_TOKEN)
# Constants
DEFAULT_MODEL = "lapa-llm/manipulative-score-model"
DEVICE = "cuda"
MODEL_OPTIONS = [
"lapa-llm/manipulative-score-model",
"lapa-llm/gec-score-model",
"lapa-llm/fineweb-mixtral-edu-score",
"lapa-llm/fineweb-nemotron-edu-score",
"lapa-llm/alignment-score-model",
"lapa-llm/fasttext-quality-score",
]
# --- Cache to avoid repeated reloads ---
_model_cache: Dict[str, tuple[torch.nn.Module, AutoTokenizer]] = {}
def load_model(model_id: str):
if model_id in _model_cache:
return _model_cache[model_id]
print(f"🔹 Loading model: {model_id}")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
print(f"Detected model: {model_id}")
model.to(DEVICE).eval()
_model_cache[model_id] = (model, tokenizer)
print(f"✅ Loaded model on {DEVICE}")
return model, tokenizer
def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict:
inputs = tokenizer(
text,
return_tensors="pt",
padding="longest",
truncation=True,
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits.squeeze(-1).float().cpu().numpy()
res = {}
res["score"] = logits.tolist()[0]
res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
return res
# --- Main scoring logic ---
@spaces.GPU
def bot(user_message: str, history: list[dict[str, Any]]):
if not user_message.strip():
return "", history
res = ""
history = history + [{"role": "user", "content": user_message}]
scores = {}
for model_choice in MODEL_OPTIONS:
model, tokenizer = load_model(model_choice) # returns embedding model
score = compute_score(user_message, model, tokenizer)["score"]
scores[model_choice] = score
res += f"{model_choice}: {score}\n"
formula_score = np.median([scores["lapa-llm/fineweb-nemotron-edu-score"], scores["lapa-llm/fineweb-mixtral-edu-score"], scores["lapa-llm/fasttext-quality-score"],]) \
* scores["lapa-llm/alignment-score-model"] * scores["lapa-llm/manipulative-score-model"] * scores["lapa-llm/gec-score-model"]
res += f"Formula (combined) score: {formula_score}\n"
history.append({"role": "assistant", "content": res.strip()})
return "", history
# --- UI ---
THEME = gr.themes.Soft(primary_hue="blue", secondary_hue="amber", neutral_hue="stone")
def _clear_chat():
return "", []
with gr.Blocks(theme=THEME, fill_height=True) as demo:
gr.Markdown("### 🤔 LAPA Quality Estimation")
chatbot = gr.Chatbot(type="messages", height=480)
msg = gr.Textbox(label=None, placeholder="Type your text…", lines=1)
clear_btn = gr.Button("Clear")
msg.submit(bot, inputs=[msg, chatbot], outputs=[msg, chatbot])
clear_btn.click(_clear_chat, outputs=[msg, chatbot])
if __name__ == "__main__":
demo.queue().launch()