File size: 8,043 Bytes
225a75e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
#!/usr/bin/env python3
"""
Complete Integration Test for GAIA Agent System
Tests the full pipeline: Router -> Agents -> Tools -> Results
"""
import os
import sys
import time
import tempfile
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from agents.state import GAIAAgentState, QuestionType, AgentRole
from agents.router import RouterAgent
from agents.web_researcher import WebResearchAgent
from agents.file_processor_agent import FileProcessorAgent
from agents.reasoning_agent import ReasoningAgent
from models.qwen_client import QwenClient
def test_complete_pipeline():
"""Test the complete GAIA agent pipeline"""
print("π GAIA Complete Integration Test")
print("=" * 50)
# Initialize system
try:
llm_client = QwenClient()
router = RouterAgent(llm_client)
web_agent = WebResearchAgent(llm_client)
file_agent = FileProcessorAgent(llm_client)
reasoning_agent = ReasoningAgent(llm_client)
except Exception as e:
print(f"β Failed to initialize system: {e}")
return False
# End-to-end test cases
test_cases = [
{
"question": "What is the population of Paris?",
"description": "Simple Wikipedia/web research question",
"expected_agent": AgentRole.WEB_RESEARCHER
},
{
"question": "Calculate the area of a circle with radius 5 meters",
"description": "Mathematical reasoning with unit conversion",
"expected_agent": AgentRole.REASONING_AGENT
},
{
"question": "What is the average of these numbers: 10, 20, 30, 40, 50?",
"description": "Statistical calculation",
"expected_agent": AgentRole.REASONING_AGENT
}
]
results = []
total_cost = 0.0
start_time = time.time()
for i, test_case in enumerate(test_cases, 1):
print(f"\nπ§ͺ Test {i}: {test_case['description']}")
print(f" Question: {test_case['question']}")
try:
# Step 1: Initialize state
state = GAIAAgentState()
state.task_id = f"test_{i}"
state.question = test_case["question"]
# Step 2: Route question
routed_state = router.route_question(state)
print(f" β
Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}")
# Step 3: Process with appropriate agent
if test_case["expected_agent"] in routed_state.selected_agents:
if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER:
processed_state = web_agent.process(routed_state)
elif test_case["expected_agent"] == AgentRole.REASONING_AGENT:
processed_state = reasoning_agent.process(routed_state)
elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR:
processed_state = file_agent.process(routed_state)
else:
print(f" β οΈ Agent {test_case['expected_agent'].value} not implemented in test")
continue
# Check results
if processed_state.agent_results:
agent_result = list(processed_state.agent_results.values())[-1]
success = agent_result.success
confidence = agent_result.confidence
cost = processed_state.total_cost
processing_time = processed_state.total_processing_time
print(f" β
Agent: {agent_result.agent_role.value}")
print(f" β
Result: {agent_result.result[:100]}...")
print(f" π Confidence: {confidence:.2f}")
print(f" π° Cost: ${cost:.4f}")
print(f" β±οΈ Time: {processing_time:.2f}s")
total_cost += cost
results.append(success)
print(f" π― Overall: {'β
PASS' if success else 'β FAIL'}")
else:
print(f" β No agent results produced")
results.append(False)
else:
print(f" β οΈ Expected agent {test_case['expected_agent'].value} not selected")
results.append(False)
except Exception as e:
print(f" β Pipeline failed: {e}")
results.append(False)
# File processing test with actual file
print(f"\nπ§ͺ Test 4: File Processing with CSV")
print(f" Description: Complete file analysis pipeline")
try:
with tempfile.TemporaryDirectory() as temp_dir:
# Create test CSV
csv_path = os.path.join(temp_dir, "sales_data.csv")
with open(csv_path, 'w') as f:
f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75")
# Initialize state with file
state = GAIAAgentState()
state.task_id = "test_file"
state.question = "What is the total sales value across all products?"
state.file_name = "sales_data.csv"
state.file_path = csv_path
# Route and process
routed_state = router.route_question(state)
processed_state = file_agent.process(routed_state)
if processed_state.agent_results:
agent_result = list(processed_state.agent_results.values())[-1]
success = agent_result.success
total_cost += processed_state.total_cost
results.append(success)
print(f" β
Router: {routed_state.question_type.value}")
print(f" β
Agent: File processor")
print(f" β
Result: {agent_result.result[:100]}...")
print(f" π° Cost: ${processed_state.total_cost:.4f}")
print(f" π― Overall: {'β
PASS' if success else 'β FAIL'}")
else:
print(f" β File processing failed")
results.append(False)
except Exception as e:
print(f" β File test failed: {e}")
results.append(False)
# Final summary
total_time = time.time() - start_time
passed = sum(results)
total = len(results)
pass_rate = (passed / total) * 100
print("\n" + "=" * 50)
print("π COMPLETE INTEGRATION RESULTS")
print("=" * 50)
print(f"π― Tests Passed: {passed}/{total} ({pass_rate:.1f}%)")
print(f"π° Total Cost: ${total_cost:.4f}")
print(f"β±οΈ Total Time: {total_time:.2f} seconds")
print(f"π Average Cost per Test: ${total_cost/total:.4f}")
print(f"β‘ Average Time per Test: {total_time/total:.2f}s")
# Budget analysis
monthly_budget = 0.10 # $0.10/month
if total_cost <= monthly_budget:
remaining_budget = monthly_budget - total_cost
estimated_questions = int(remaining_budget / (total_cost / total))
print(f"π° Budget Status: β
${remaining_budget:.4f} remaining (~{estimated_questions} more tests)")
else:
print(f"π° Budget Status: β οΈ Over budget by ${total_cost - monthly_budget:.4f}")
# Success criteria
if pass_rate >= 80 and total_cost <= 0.05: # 80% success, reasonable cost
print("\nπ INTEGRATION SUCCESS! System ready for GAIA benchmark!")
return True
elif pass_rate >= 80:
print("\nβ
FUNCTIONALITY SUCCESS! (Higher cost than ideal)")
return True
else:
print("\nβ οΈ INTEGRATION ISSUES! Check individual test failures")
return False
if __name__ == "__main__":
success = test_complete_pipeline()
sys.exit(0 if success else 1) |