|
|
|
|
|
""" |
|
|
Integration test for GAIA Agents |
|
|
Tests Web Researcher, File Processor, and Reasoning agents |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import time |
|
|
import tempfile |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent)) |
|
|
|
|
|
from agents.state import GAIAAgentState, QuestionType |
|
|
from agents.web_researcher import WebResearchAgent |
|
|
from agents.file_processor_agent import FileProcessorAgent |
|
|
from agents.reasoning_agent import ReasoningAgent |
|
|
from models.qwen_client import QwenClient |
|
|
|
|
|
def test_agents(): |
|
|
"""Test all implemented agents""" |
|
|
|
|
|
print("🤖 GAIA Agents Integration Test") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
try: |
|
|
llm_client = QwenClient() |
|
|
except Exception as e: |
|
|
print(f"❌ Failed to initialize LLM client: {e}") |
|
|
return False |
|
|
|
|
|
results = [] |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
print("\n🌐 Testing Web Research Agent...") |
|
|
web_agent = WebResearchAgent(llm_client) |
|
|
|
|
|
web_test_cases = [ |
|
|
{ |
|
|
"question": "What is the capital of France?", |
|
|
"question_type": QuestionType.WIKIPEDIA, |
|
|
"complexity": "simple" |
|
|
}, |
|
|
{ |
|
|
"question": "Find information about Python programming language", |
|
|
"question_type": QuestionType.WEB_RESEARCH, |
|
|
"complexity": "medium" |
|
|
} |
|
|
] |
|
|
|
|
|
for i, test_case in enumerate(web_test_cases, 1): |
|
|
state = GAIAAgentState() |
|
|
state.question = test_case["question"] |
|
|
state.question_type = test_case["question_type"] |
|
|
state.complexity_assessment = test_case["complexity"] |
|
|
|
|
|
try: |
|
|
result_state = web_agent.process(state) |
|
|
success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success |
|
|
results.append(('Web Research', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0)) |
|
|
status = "✅ PASS" if success else "❌ FAIL" |
|
|
print(f" Test {i}: {status}") |
|
|
|
|
|
except Exception as e: |
|
|
results.append(('Web Research', f'Test {i}', False, 0)) |
|
|
print(f" Test {i}: ❌ FAIL ({e})") |
|
|
|
|
|
|
|
|
print("\n📁 Testing File Processor Agent...") |
|
|
file_agent = FileProcessorAgent(llm_client) |
|
|
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
|
|
|
csv_path = os.path.join(temp_dir, "test.csv") |
|
|
with open(csv_path, 'w') as f: |
|
|
f.write("name,age,salary\nAlice,25,50000\nBob,30,60000\nCharlie,35,70000") |
|
|
|
|
|
|
|
|
py_path = os.path.join(temp_dir, "test.py") |
|
|
with open(py_path, 'w') as f: |
|
|
f.write("def calculate_sum(a, b):\n return a + b\n\nresult = calculate_sum(5, 3)") |
|
|
|
|
|
file_test_cases = [ |
|
|
{ |
|
|
"question": "What is the average salary in this data?", |
|
|
"file_path": csv_path, |
|
|
"question_type": QuestionType.FILE_PROCESSING, |
|
|
"complexity": "medium" |
|
|
}, |
|
|
{ |
|
|
"question": "What does this Python code do?", |
|
|
"file_path": py_path, |
|
|
"question_type": QuestionType.FILE_PROCESSING, |
|
|
"complexity": "simple" |
|
|
} |
|
|
] |
|
|
|
|
|
for i, test_case in enumerate(file_test_cases, 1): |
|
|
state = GAIAAgentState() |
|
|
state.question = test_case["question"] |
|
|
state.file_path = test_case["file_path"] |
|
|
state.question_type = test_case["question_type"] |
|
|
state.complexity_assessment = test_case["complexity"] |
|
|
|
|
|
try: |
|
|
result_state = file_agent.process(state) |
|
|
success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success |
|
|
results.append(('File Processor', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0)) |
|
|
status = "✅ PASS" if success else "❌ FAIL" |
|
|
print(f" Test {i}: {status}") |
|
|
|
|
|
except Exception as e: |
|
|
results.append(('File Processor', f'Test {i}', False, 0)) |
|
|
print(f" Test {i}: ❌ FAIL ({e})") |
|
|
|
|
|
|
|
|
print("\n🧠 Testing Reasoning Agent...") |
|
|
reasoning_agent = ReasoningAgent(llm_client) |
|
|
|
|
|
reasoning_test_cases = [ |
|
|
{ |
|
|
"question": "Calculate 15% of 200", |
|
|
"question_type": QuestionType.REASONING, |
|
|
"complexity": "simple" |
|
|
}, |
|
|
{ |
|
|
"question": "Convert 100 celsius to fahrenheit", |
|
|
"question_type": QuestionType.REASONING, |
|
|
"complexity": "simple" |
|
|
}, |
|
|
{ |
|
|
"question": "What is the average of 10, 15, 20, 25, 30?", |
|
|
"question_type": QuestionType.REASONING, |
|
|
"complexity": "medium" |
|
|
} |
|
|
] |
|
|
|
|
|
for i, test_case in enumerate(reasoning_test_cases, 1): |
|
|
state = GAIAAgentState() |
|
|
state.question = test_case["question"] |
|
|
state.question_type = test_case["question_type"] |
|
|
state.complexity_assessment = test_case["complexity"] |
|
|
|
|
|
try: |
|
|
result_state = reasoning_agent.process(state) |
|
|
success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success |
|
|
results.append(('Reasoning', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0)) |
|
|
status = "✅ PASS" if success else "❌ FAIL" |
|
|
print(f" Test {i}: {status}") |
|
|
|
|
|
except Exception as e: |
|
|
results.append(('Reasoning', f'Test {i}', False, 0)) |
|
|
print(f" Test {i}: ❌ FAIL ({e})") |
|
|
|
|
|
|
|
|
total_time = time.time() - start_time |
|
|
passed_tests = sum(1 for _, _, success, _ in results if success) |
|
|
total_tests = len(results) |
|
|
|
|
|
print("\n" + "=" * 50) |
|
|
print("📊 AGENT TEST RESULTS") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
agents = {} |
|
|
for agent, test, success, exec_time in results: |
|
|
if agent not in agents: |
|
|
agents[agent] = {'passed': 0, 'total': 0, 'time': 0} |
|
|
agents[agent]['total'] += 1 |
|
|
agents[agent]['time'] += exec_time |
|
|
if success: |
|
|
agents[agent]['passed'] += 1 |
|
|
|
|
|
for agent, stats in agents.items(): |
|
|
pass_rate = (stats['passed'] / stats['total']) * 100 |
|
|
avg_time = stats['time'] / stats['total'] |
|
|
status = "✅" if pass_rate == 100 else "⚠️" if pass_rate >= 80 else "❌" |
|
|
print(f"{status} {agent:15}: {stats['passed']}/{stats['total']} ({pass_rate:5.1f}%) - Avg: {avg_time:.3f}s") |
|
|
|
|
|
|
|
|
overall_pass_rate = (passed_tests / total_tests) * 100 |
|
|
print(f"\n🎯 OVERALL: {passed_tests}/{total_tests} tests passed ({overall_pass_rate:.1f}%)") |
|
|
print(f"⏱️ TOTAL TIME: {total_time:.2f} seconds") |
|
|
|
|
|
|
|
|
if overall_pass_rate >= 80: |
|
|
print("🚀 AGENTS READY! Multi-agent system is working correctly!") |
|
|
return True |
|
|
else: |
|
|
print("⚠️ ISSUES FOUND! Check individual agent failures above") |
|
|
return False |
|
|
|
|
|
if __name__ == "__main__": |
|
|
success = test_agents() |
|
|
sys.exit(0 if success else 1) |