Agent_Course_Final_Assignment / src /test_integration.py
Chris
Final 4
225a75e
#!/usr/bin/env python3
"""
Complete Integration Test for GAIA Agent System
Tests the full pipeline: Router -> Agents -> Tools -> Results
"""
import os
import sys
import time
import tempfile
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from agents.state import GAIAAgentState, QuestionType, AgentRole
from agents.router import RouterAgent
from agents.web_researcher import WebResearchAgent
from agents.file_processor_agent import FileProcessorAgent
from agents.reasoning_agent import ReasoningAgent
from models.qwen_client import QwenClient
def test_complete_pipeline():
"""Test the complete GAIA agent pipeline"""
print("πŸš€ GAIA Complete Integration Test")
print("=" * 50)
# Initialize system
try:
llm_client = QwenClient()
router = RouterAgent(llm_client)
web_agent = WebResearchAgent(llm_client)
file_agent = FileProcessorAgent(llm_client)
reasoning_agent = ReasoningAgent(llm_client)
except Exception as e:
print(f"❌ Failed to initialize system: {e}")
return False
# End-to-end test cases
test_cases = [
{
"question": "What is the population of Paris?",
"description": "Simple Wikipedia/web research question",
"expected_agent": AgentRole.WEB_RESEARCHER
},
{
"question": "Calculate the area of a circle with radius 5 meters",
"description": "Mathematical reasoning with unit conversion",
"expected_agent": AgentRole.REASONING_AGENT
},
{
"question": "What is the average of these numbers: 10, 20, 30, 40, 50?",
"description": "Statistical calculation",
"expected_agent": AgentRole.REASONING_AGENT
}
]
results = []
total_cost = 0.0
start_time = time.time()
for i, test_case in enumerate(test_cases, 1):
print(f"\nπŸ§ͺ Test {i}: {test_case['description']}")
print(f" Question: {test_case['question']}")
try:
# Step 1: Initialize state
state = GAIAAgentState()
state.task_id = f"test_{i}"
state.question = test_case["question"]
# Step 2: Route question
routed_state = router.route_question(state)
print(f" βœ… Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}")
# Step 3: Process with appropriate agent
if test_case["expected_agent"] in routed_state.selected_agents:
if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER:
processed_state = web_agent.process(routed_state)
elif test_case["expected_agent"] == AgentRole.REASONING_AGENT:
processed_state = reasoning_agent.process(routed_state)
elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR:
processed_state = file_agent.process(routed_state)
else:
print(f" ⚠️ Agent {test_case['expected_agent'].value} not implemented in test")
continue
# Check results
if processed_state.agent_results:
agent_result = list(processed_state.agent_results.values())[-1]
success = agent_result.success
confidence = agent_result.confidence
cost = processed_state.total_cost
processing_time = processed_state.total_processing_time
print(f" βœ… Agent: {agent_result.agent_role.value}")
print(f" βœ… Result: {agent_result.result[:100]}...")
print(f" πŸ“Š Confidence: {confidence:.2f}")
print(f" πŸ’° Cost: ${cost:.4f}")
print(f" ⏱️ Time: {processing_time:.2f}s")
total_cost += cost
results.append(success)
print(f" 🎯 Overall: {'βœ… PASS' if success else '❌ FAIL'}")
else:
print(f" ❌ No agent results produced")
results.append(False)
else:
print(f" ⚠️ Expected agent {test_case['expected_agent'].value} not selected")
results.append(False)
except Exception as e:
print(f" ❌ Pipeline failed: {e}")
results.append(False)
# File processing test with actual file
print(f"\nπŸ§ͺ Test 4: File Processing with CSV")
print(f" Description: Complete file analysis pipeline")
try:
with tempfile.TemporaryDirectory() as temp_dir:
# Create test CSV
csv_path = os.path.join(temp_dir, "sales_data.csv")
with open(csv_path, 'w') as f:
f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75")
# Initialize state with file
state = GAIAAgentState()
state.task_id = "test_file"
state.question = "What is the total sales value across all products?"
state.file_name = "sales_data.csv"
state.file_path = csv_path
# Route and process
routed_state = router.route_question(state)
processed_state = file_agent.process(routed_state)
if processed_state.agent_results:
agent_result = list(processed_state.agent_results.values())[-1]
success = agent_result.success
total_cost += processed_state.total_cost
results.append(success)
print(f" βœ… Router: {routed_state.question_type.value}")
print(f" βœ… Agent: File processor")
print(f" βœ… Result: {agent_result.result[:100]}...")
print(f" πŸ’° Cost: ${processed_state.total_cost:.4f}")
print(f" 🎯 Overall: {'βœ… PASS' if success else '❌ FAIL'}")
else:
print(f" ❌ File processing failed")
results.append(False)
except Exception as e:
print(f" ❌ File test failed: {e}")
results.append(False)
# Final summary
total_time = time.time() - start_time
passed = sum(results)
total = len(results)
pass_rate = (passed / total) * 100
print("\n" + "=" * 50)
print("πŸ“Š COMPLETE INTEGRATION RESULTS")
print("=" * 50)
print(f"🎯 Tests Passed: {passed}/{total} ({pass_rate:.1f}%)")
print(f"πŸ’° Total Cost: ${total_cost:.4f}")
print(f"⏱️ Total Time: {total_time:.2f} seconds")
print(f"πŸ“ˆ Average Cost per Test: ${total_cost/total:.4f}")
print(f"⚑ Average Time per Test: {total_time/total:.2f}s")
# Budget analysis
monthly_budget = 0.10 # $0.10/month
if total_cost <= monthly_budget:
remaining_budget = monthly_budget - total_cost
estimated_questions = int(remaining_budget / (total_cost / total))
print(f"πŸ’° Budget Status: βœ… ${remaining_budget:.4f} remaining (~{estimated_questions} more tests)")
else:
print(f"πŸ’° Budget Status: ⚠️ Over budget by ${total_cost - monthly_budget:.4f}")
# Success criteria
if pass_rate >= 80 and total_cost <= 0.05: # 80% success, reasonable cost
print("\nπŸš€ INTEGRATION SUCCESS! System ready for GAIA benchmark!")
return True
elif pass_rate >= 80:
print("\nβœ… FUNCTIONALITY SUCCESS! (Higher cost than ideal)")
return True
else:
print("\n⚠️ INTEGRATION ISSUES! Check individual test failures")
return False
if __name__ == "__main__":
success = test_complete_pipeline()
sys.exit(0 if success else 1)