Spaces:
Sleeping
Sleeping
File size: 4,728 Bytes
10e9b7d eccf8e4 3c4371f 97fb162 10e9b7d e80aab9 3db6293 e80aab9 97fb162 31243f4 97fb162 b4bc036 97fb162 b4bc036 97fb162 b4bc036 97fb162 b4bc036 31243f4 97fb162 b4bc036 97fb162 b4bc036 97fb162 4021bf3 97fb162 7d65c66 97fb162 3c4371f 7e4a06b 31243f4 e80aab9 31243f4 97fb162 36ed51a 3c4371f 97fb162 eccf8e4 97fb162 7d65c66 97fb162 e80aab9 7d65c66 31243f4 97fb162 31243f4 97fb162 31243f4 97fb162 31243f4 97fb162 e80aab9 97fb162 e80aab9 97fb162 e80aab9 97fb162 7d65c66 97fb162 e80aab9 97fb162 e80aab9 97fb162 0ee0419 e514fd7 97fb162 e514fd7 e80aab9 7e4a06b 31243f4 97fb162 7d65c66 e80aab9 97fb162 e80aab9 97fb162 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import gradio as gr
import requests
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Definition ---
class BasicAgent:
def __init__(self):
# Change this model to one you have access to
model_name = "Qwen/Qwen3-0.6B-MLX-bf16"
print(f"Loading model {model_name}")
# Load tokenizer and model
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
# Create generation pipeline
self.generator = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
max_new_tokens=100,
temperature=0.0,
do_sample=False
)
def __call__(self, question: str) -> str:
print("Question:", question)
prompt = question.strip()
output = self.generator(prompt)[0]["generated_text"]
# Remove the prompt prefix so only the answer remains
if output.startswith(prompt):
answer = output[len(prompt):].strip()
else:
answer = output.strip()
# Take first line if multiple lines
answer = answer.split("\n")[0].strip()
# Optionally strip trailing punctuation
answer = answer.rstrip(" .,:;!?")
print("Answer:", answer)
return answer
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please Login to Hugging Face with the button.", None
username = profile.username
print("User:", username)
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# Fetch questions
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions_data = resp.json()
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
try:
ans = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": ans})
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Submitted Answer": ans
})
except Exception as e:
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Submitted Answer": f"ERROR: {e}"
})
if not answers_payload:
return "Agent did not produce any answers.", pd.DataFrame(results_log)
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
post_resp = requests.post(submit_url, json=submission_data, timeout=60)
post_resp.raise_for_status()
result = post_resp.json()
status_text = (
f"Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Overall Score: {result.get('score', 'N/A')}% "
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
f"Message: {result.get('message', '')}"
)
return status_text, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Agent Evaluation Runner")
gr.Markdown(
"""
1. Login with Hugging Face
2. Click “Run Evaluation & Submit All Answers”
3. Wait for score and see your answers
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_out, results_table])
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|