Spaces:

lapa-llm
/

quality-estimation

Running

App Files Files Community

quality-estimation / app.py

robinhad

Update app.py

dfe5d92 verified about 1 month ago

raw

history blame contribute delete

3.63 kB

	import os
	import subprocess
	import tempfile

	import threading

	import spaces
	import gradio as gr
	import torch
	import torch.nn.functional as F
	from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification
	from kernels import get_kernel
	from typing import Any, Optional, Dict
	import numpy as np


	# Login to HF to get access to the model weights
	HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')

	from huggingface_hub import login
	login(token=HF_LE_LLM_READ_TOKEN)

	# Constants
	DEFAULT_MODEL = "lapa-llm/manipulative-score-model"
	DEVICE = "cuda"

	MODEL_OPTIONS = [
	"lapa-llm/manipulative-score-model",
	"lapa-llm/gec-score-model",
	"lapa-llm/fineweb-mixtral-edu-score",
	"lapa-llm/fineweb-nemotron-edu-score",
	"lapa-llm/alignment-score-model",
	"lapa-llm/fasttext-quality-score",

	]

	# --- Cache to avoid repeated reloads ---
	_model_cache: Dict[str, tuple[torch.nn.Module, AutoTokenizer]] = {}


	def load_model(model_id: str):
	if model_id in _model_cache:
	return _model_cache[model_id]

	print(f"🔹 Loading model: {model_id}")
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
	print(f"Detected model: {model_id}")

	model.to(DEVICE).eval()
	_model_cache[model_id] = (model, tokenizer)
	print(f"✅ Loaded model on {DEVICE}")
	return model, tokenizer


	def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict:
	inputs = tokenizer(
	text,
	return_tensors="pt",
	padding="longest",
	truncation=True,
	).to(DEVICE)

	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits.squeeze(-1).float().cpu().numpy()

	res = {}
	res["score"] = logits.tolist()[0]
	res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
	return res

	# --- Main scoring logic ---
	@spaces.GPU
	def bot(user_message: str, history: list[dict[str, Any]]):
	if not user_message.strip():
	return "", history

	res = ""
	history = history + [{"role": "user", "content": user_message}]
	scores = {}
	for model_choice in MODEL_OPTIONS:
	model, tokenizer = load_model(model_choice) # returns embedding model
	score = compute_score(user_message, model, tokenizer)["score"]
	scores[model_choice] = score
	res += f"{model_choice}: {score}\n"



	formula_score = np.median([scores["lapa-llm/fineweb-nemotron-edu-score"], scores["lapa-llm/fineweb-mixtral-edu-score"], scores["lapa-llm/fasttext-quality-score"],]) \
	* scores["lapa-llm/alignment-score-model"] * scores["lapa-llm/manipulative-score-model"] * scores["lapa-llm/gec-score-model"]

	res += f"Formula (combined) score: {formula_score}\n"

	history.append({"role": "assistant", "content": res.strip()})
	return "", history

	# --- UI ---
	THEME = gr.themes.Soft(primary_hue="blue", secondary_hue="amber", neutral_hue="stone")


	def _clear_chat():
	return "", []


	with gr.Blocks(theme=THEME, fill_height=True) as demo:
	gr.Markdown("### 🤔 LAPA Quality Estimation")

	chatbot = gr.Chatbot(type="messages", height=480)
	msg = gr.Textbox(label=None, placeholder="Type your text…", lines=1)
	clear_btn = gr.Button("Clear")

	msg.submit(bot, inputs=[msg, chatbot], outputs=[msg, chatbot])
	clear_btn.click(_clear_chat, outputs=[msg, chatbot])

	if __name__ == "__main__":
	demo.queue().launch()