Spaces:
Running
Running
| import os | |
| import subprocess | |
| import tempfile | |
| import threading | |
| import spaces | |
| import gradio as gr | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification | |
| from kernels import get_kernel | |
| from typing import Any, Optional, Dict | |
| import numpy as np | |
| # Login to HF to get access to the model weights | |
| HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN') | |
| from huggingface_hub import login | |
| login(token=HF_LE_LLM_READ_TOKEN) | |
| # Constants | |
| DEFAULT_MODEL = "lapa-llm/manipulative-score-model" | |
| DEVICE = "cuda" | |
| MODEL_OPTIONS = [ | |
| "lapa-llm/manipulative-score-model", | |
| "lapa-llm/gec-score-model", | |
| "lapa-llm/fineweb-mixtral-edu-score", | |
| "lapa-llm/fineweb-nemotron-edu-score", | |
| "lapa-llm/alignment-score-model", | |
| "lapa-llm/fasttext-quality-score", | |
| ] | |
| # --- Cache to avoid repeated reloads --- | |
| _model_cache: Dict[str, tuple[torch.nn.Module, AutoTokenizer]] = {} | |
| def load_model(model_id: str): | |
| if model_id in _model_cache: | |
| return _model_cache[model_id] | |
| print(f"🔹 Loading model: {model_id}") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16) | |
| print(f"Detected model: {model_id}") | |
| model.to(DEVICE).eval() | |
| _model_cache[model_id] = (model, tokenizer) | |
| print(f"✅ Loaded model on {DEVICE}") | |
| return model, tokenizer | |
| def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict: | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| padding="longest", | |
| truncation=True, | |
| ).to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits.squeeze(-1).float().cpu().numpy() | |
| res = {} | |
| res["score"] = logits.tolist()[0] | |
| res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits] | |
| return res | |
| # --- Main scoring logic --- | |
| def bot(user_message: str, history: list[dict[str, Any]]): | |
| if not user_message.strip(): | |
| return "", history | |
| res = "" | |
| history = history + [{"role": "user", "content": user_message}] | |
| scores = {} | |
| for model_choice in MODEL_OPTIONS: | |
| model, tokenizer = load_model(model_choice) # returns embedding model | |
| score = compute_score(user_message, model, tokenizer)["score"] | |
| scores[model_choice] = score | |
| res += f"{model_choice}: {score}\n" | |
| formula_score = np.median([scores["lapa-llm/fineweb-nemotron-edu-score"], scores["lapa-llm/fineweb-mixtral-edu-score"], scores["lapa-llm/fasttext-quality-score"],]) \ | |
| * scores["lapa-llm/alignment-score-model"] * scores["lapa-llm/manipulative-score-model"] * scores["lapa-llm/gec-score-model"] | |
| res += f"Formula (combined) score: {formula_score}\n" | |
| history.append({"role": "assistant", "content": res.strip()}) | |
| return "", history | |
| # --- UI --- | |
| THEME = gr.themes.Soft(primary_hue="blue", secondary_hue="amber", neutral_hue="stone") | |
| def _clear_chat(): | |
| return "", [] | |
| with gr.Blocks(theme=THEME, fill_height=True) as demo: | |
| gr.Markdown("### 🤔 LAPA Quality Estimation") | |
| chatbot = gr.Chatbot(type="messages", height=480) | |
| msg = gr.Textbox(label=None, placeholder="Type your text…", lines=1) | |
| clear_btn = gr.Button("Clear") | |
| msg.submit(bot, inputs=[msg, chatbot], outputs=[msg, chatbot]) | |
| clear_btn.click(_clear_chat, outputs=[msg, chatbot]) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |