File size: 4,728 Bytes
10e9b7d
 
eccf8e4
3c4371f
97fb162
 
10e9b7d
e80aab9
3db6293
e80aab9
97fb162
31243f4
 
97fb162
 
 
 
b4bc036
 
 
97fb162
b4bc036
 
97fb162
b4bc036
 
 
 
97fb162
 
b4bc036
 
 
31243f4
97fb162
 
 
 
 
 
 
 
 
b4bc036
97fb162
 
 
b4bc036
 
97fb162
 
4021bf3
97fb162
7d65c66
97fb162
 
3c4371f
7e4a06b
31243f4
 
e80aab9
31243f4
 
 
 
97fb162
36ed51a
3c4371f
97fb162
eccf8e4
97fb162
 
 
7d65c66
97fb162
e80aab9
7d65c66
 
31243f4
 
 
 
 
 
97fb162
 
 
 
 
 
 
31243f4
97fb162
 
 
 
 
31243f4
 
97fb162
31243f4
97fb162
 
 
 
 
e80aab9
 
97fb162
 
 
 
e80aab9
97fb162
 
 
 
e80aab9
97fb162
7d65c66
97fb162
e80aab9
97fb162
e80aab9
97fb162
0ee0419
e514fd7
97fb162
 
 
e514fd7
e80aab9
 
7e4a06b
31243f4
97fb162
7d65c66
e80aab9
97fb162
e80aab9
 
97fb162
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import gradio as gr
import requests
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Agent Definition ---
class BasicAgent:
    def __init__(self):
        # Change this model to one you have access to
        model_name = "Qwen/Qwen3-0.6B-MLX-bf16"
        print(f"Loading model {model_name}")
        # Load tokenizer and model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        # Create generation pipeline
        self.generator = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=100,
            temperature=0.0,
            do_sample=False
        )

    def __call__(self, question: str) -> str:
        print("Question:", question)
        prompt = question.strip()
        output = self.generator(prompt)[0]["generated_text"]
        # Remove the prompt prefix so only the answer remains
        if output.startswith(prompt):
            answer = output[len(prompt):].strip()
        else:
            answer = output.strip()
        # Take first line if multiple lines
        answer = answer.split("\n")[0].strip()
        # Optionally strip trailing punctuation
        answer = answer.rstrip(" .,:;!?")
        print("Answer:", answer)
        return answer

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")

    if not profile:
        return "Please Login to Hugging Face with the button.", None
    username = profile.username
    print("User:", username)

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch questions
    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions_data = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            ans = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": ans})
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": ans
            })
        except Exception as e:
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"ERROR: {e}"
            })

    if not answers_payload:
        return "Agent did not produce any answers.", pd.DataFrame(results_log)

    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        post_resp = requests.post(submit_url, json=submission_data, timeout=60)
        post_resp.raise_for_status()
        result = post_resp.json()
        status_text = (
            f"Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Overall Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
            f"Message: {result.get('message', '')}"
        )
        return status_text, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Agent Evaluation Runner")
    gr.Markdown(
        """
        1. Login with Hugging Face  
        2. Click “Run Evaluation & Submit All Answers”  
        3. Wait for score and see your answers  
        """
    )

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_out, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)