Spaces:

lapa-llm
/

quality-estimation

Running

File size: 3,630 Bytes

d5bedae
 
 
 
 
 
 
 
 
4aa8608
635f9ff
d5bedae
 
c08a038
d5bedae
 
 
 
 
 
 
 
 
fbaf277
b04ad10
d5bedae
c550f81
fbaf277
 
 
 
 
 
c550f81
 
d5bedae
b04ad10
 
d5bedae
 
b04ad10
 
 
d5bedae
b04ad10
 
d5bedae
635f9ff
2dd438d
d5bedae
b04ad10
 
 
 
d5bedae
 
635f9ff
 
 
 
 
 
 
d5bedae
635f9ff
 
 
 
 
6add81b
635f9ff
 
d5bedae
b04ad10
d5bedae
c550f81
b04ad10
 
 
c550f81
b04ad10
c08a038
c550f81
 
 
869fbea
c550f81
b04ad10
c08a038
 
 
dfe5d92
c08a038
 
 
c550f81
b04ad10
 
 
 
 
d5bedae
b04ad10
 
d5bedae
 
b04ad10
 
d5bedae
b04ad10
 
 
d5bedae
c550f81
b04ad10
d5bedae

import os
import subprocess
import tempfile

import threading

import spaces
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification
from kernels import get_kernel
from typing import Any, Optional, Dict
import numpy as np


# Login to HF to get access to the model weights
HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')

from huggingface_hub import login
login(token=HF_LE_LLM_READ_TOKEN)

# Constants
DEFAULT_MODEL = "lapa-llm/manipulative-score-model"
DEVICE = "cuda"

MODEL_OPTIONS = [
    "lapa-llm/manipulative-score-model",
    "lapa-llm/gec-score-model",
    "lapa-llm/fineweb-mixtral-edu-score",
    "lapa-llm/fineweb-nemotron-edu-score",
    "lapa-llm/alignment-score-model",
    "lapa-llm/fasttext-quality-score",

]

# --- Cache to avoid repeated reloads ---
_model_cache: Dict[str, tuple[torch.nn.Module, AutoTokenizer]] = {}


def load_model(model_id: str):
    if model_id in _model_cache:
        return _model_cache[model_id]

    print(f"🔹 Loading model: {model_id}")
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
    print(f"Detected model: {model_id}")

    model.to(DEVICE).eval()
    _model_cache[model_id] = (model, tokenizer)
    print(f"✅ Loaded model on {DEVICE}")
    return model, tokenizer


def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict:
        inputs = tokenizer(
            text,
            return_tensors="pt",
            padding="longest",
            truncation=True,
        ).to(DEVICE)

        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits.squeeze(-1).float().cpu().numpy()

        res = {}
        res["score"] = logits.tolist()[0]
        res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
        return res

# --- Main scoring logic ---
@spaces.GPU
def bot(user_message: str, history: list[dict[str, Any]]):
    if not user_message.strip():
        return "", history

    res = ""
    history = history + [{"role": "user", "content": user_message}]
    scores = {}
    for model_choice in MODEL_OPTIONS:
        model, tokenizer = load_model(model_choice)  # returns embedding model
        score = compute_score(user_message, model, tokenizer)["score"]
        scores[model_choice] = score
        res += f"{model_choice}: {score}\n"



    formula_score = np.median([scores["lapa-llm/fineweb-nemotron-edu-score"], scores["lapa-llm/fineweb-mixtral-edu-score"], scores["lapa-llm/fasttext-quality-score"],]) \
    * scores["lapa-llm/alignment-score-model"] * scores["lapa-llm/manipulative-score-model"] * scores["lapa-llm/gec-score-model"]

    res += f"Formula (combined) score: {formula_score}\n"

    history.append({"role": "assistant", "content": res.strip()})
    return "", history

# --- UI ---
THEME = gr.themes.Soft(primary_hue="blue", secondary_hue="amber", neutral_hue="stone")


def _clear_chat():
    return "", []


with gr.Blocks(theme=THEME, fill_height=True) as demo:
    gr.Markdown("### 🤔 LAPA Quality Estimation")

    chatbot = gr.Chatbot(type="messages", height=480)
    msg = gr.Textbox(label=None, placeholder="Type your text…", lines=1)
    clear_btn = gr.Button("Clear")

    msg.submit(bot, inputs=[msg, chatbot], outputs=[msg, chatbot])
    clear_btn.click(_clear_chat, outputs=[msg, chatbot])

if __name__ == "__main__":
    demo.queue().launch()