Spaces:
Running
Running
File size: 3,630 Bytes
d5bedae 4aa8608 635f9ff d5bedae c08a038 d5bedae fbaf277 b04ad10 d5bedae c550f81 fbaf277 c550f81 d5bedae b04ad10 d5bedae b04ad10 d5bedae b04ad10 d5bedae 635f9ff 2dd438d d5bedae b04ad10 d5bedae 635f9ff d5bedae 635f9ff 6add81b 635f9ff d5bedae b04ad10 d5bedae c550f81 b04ad10 c550f81 b04ad10 c08a038 c550f81 869fbea c550f81 b04ad10 c08a038 dfe5d92 c08a038 c550f81 b04ad10 d5bedae b04ad10 d5bedae b04ad10 d5bedae b04ad10 d5bedae c550f81 b04ad10 d5bedae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import os
import subprocess
import tempfile
import threading
import spaces
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification
from kernels import get_kernel
from typing import Any, Optional, Dict
import numpy as np
# Login to HF to get access to the model weights
HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')
from huggingface_hub import login
login(token=HF_LE_LLM_READ_TOKEN)
# Constants
DEFAULT_MODEL = "lapa-llm/manipulative-score-model"
DEVICE = "cuda"
MODEL_OPTIONS = [
"lapa-llm/manipulative-score-model",
"lapa-llm/gec-score-model",
"lapa-llm/fineweb-mixtral-edu-score",
"lapa-llm/fineweb-nemotron-edu-score",
"lapa-llm/alignment-score-model",
"lapa-llm/fasttext-quality-score",
]
# --- Cache to avoid repeated reloads ---
_model_cache: Dict[str, tuple[torch.nn.Module, AutoTokenizer]] = {}
def load_model(model_id: str):
if model_id in _model_cache:
return _model_cache[model_id]
print(f"🔹 Loading model: {model_id}")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
print(f"Detected model: {model_id}")
model.to(DEVICE).eval()
_model_cache[model_id] = (model, tokenizer)
print(f"✅ Loaded model on {DEVICE}")
return model, tokenizer
def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict:
inputs = tokenizer(
text,
return_tensors="pt",
padding="longest",
truncation=True,
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits.squeeze(-1).float().cpu().numpy()
res = {}
res["score"] = logits.tolist()[0]
res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
return res
# --- Main scoring logic ---
@spaces.GPU
def bot(user_message: str, history: list[dict[str, Any]]):
if not user_message.strip():
return "", history
res = ""
history = history + [{"role": "user", "content": user_message}]
scores = {}
for model_choice in MODEL_OPTIONS:
model, tokenizer = load_model(model_choice) # returns embedding model
score = compute_score(user_message, model, tokenizer)["score"]
scores[model_choice] = score
res += f"{model_choice}: {score}\n"
formula_score = np.median([scores["lapa-llm/fineweb-nemotron-edu-score"], scores["lapa-llm/fineweb-mixtral-edu-score"], scores["lapa-llm/fasttext-quality-score"],]) \
* scores["lapa-llm/alignment-score-model"] * scores["lapa-llm/manipulative-score-model"] * scores["lapa-llm/gec-score-model"]
res += f"Formula (combined) score: {formula_score}\n"
history.append({"role": "assistant", "content": res.strip()})
return "", history
# --- UI ---
THEME = gr.themes.Soft(primary_hue="blue", secondary_hue="amber", neutral_hue="stone")
def _clear_chat():
return "", []
with gr.Blocks(theme=THEME, fill_height=True) as demo:
gr.Markdown("### 🤔 LAPA Quality Estimation")
chatbot = gr.Chatbot(type="messages", height=480)
msg = gr.Textbox(label=None, placeholder="Type your text…", lines=1)
clear_btn = gr.Button("Clear")
msg.submit(bot, inputs=[msg, chatbot], outputs=[msg, chatbot])
clear_btn.click(_clear_chat, outputs=[msg, chatbot])
if __name__ == "__main__":
demo.queue().launch()
|