#!/usr/bin/env python3 """ Complete Integration Test for GAIA Agent System Tests the full pipeline: Router -> Agents -> Tools -> Results """ import os import sys import time import tempfile from pathlib import Path # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent)) from agents.state import GAIAAgentState, QuestionType, AgentRole from agents.router import RouterAgent from agents.web_researcher import WebResearchAgent from agents.file_processor_agent import FileProcessorAgent from agents.reasoning_agent import ReasoningAgent from models.qwen_client import QwenClient def test_complete_pipeline(): """Test the complete GAIA agent pipeline""" print("๐Ÿš€ GAIA Complete Integration Test") print("=" * 50) # Initialize system try: llm_client = QwenClient() router = RouterAgent(llm_client) web_agent = WebResearchAgent(llm_client) file_agent = FileProcessorAgent(llm_client) reasoning_agent = ReasoningAgent(llm_client) except Exception as e: print(f"โŒ Failed to initialize system: {e}") return False # End-to-end test cases test_cases = [ { "question": "What is the population of Paris?", "description": "Simple Wikipedia/web research question", "expected_agent": AgentRole.WEB_RESEARCHER }, { "question": "Calculate the area of a circle with radius 5 meters", "description": "Mathematical reasoning with unit conversion", "expected_agent": AgentRole.REASONING_AGENT }, { "question": "What is the average of these numbers: 10, 20, 30, 40, 50?", "description": "Statistical calculation", "expected_agent": AgentRole.REASONING_AGENT } ] results = [] total_cost = 0.0 start_time = time.time() for i, test_case in enumerate(test_cases, 1): print(f"\n๐Ÿงช Test {i}: {test_case['description']}") print(f" Question: {test_case['question']}") try: # Step 1: Initialize state state = GAIAAgentState() state.task_id = f"test_{i}" state.question = test_case["question"] # Step 2: Route question routed_state = router.route_question(state) print(f" โœ… Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}") # Step 3: Process with appropriate agent if test_case["expected_agent"] in routed_state.selected_agents: if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER: processed_state = web_agent.process(routed_state) elif test_case["expected_agent"] == AgentRole.REASONING_AGENT: processed_state = reasoning_agent.process(routed_state) elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR: processed_state = file_agent.process(routed_state) else: print(f" โš ๏ธ Agent {test_case['expected_agent'].value} not implemented in test") continue # Check results if processed_state.agent_results: agent_result = list(processed_state.agent_results.values())[-1] success = agent_result.success confidence = agent_result.confidence cost = processed_state.total_cost processing_time = processed_state.total_processing_time print(f" โœ… Agent: {agent_result.agent_role.value}") print(f" โœ… Result: {agent_result.result[:100]}...") print(f" ๐Ÿ“Š Confidence: {confidence:.2f}") print(f" ๐Ÿ’ฐ Cost: ${cost:.4f}") print(f" โฑ๏ธ Time: {processing_time:.2f}s") total_cost += cost results.append(success) print(f" ๐ŸŽฏ Overall: {'โœ… PASS' if success else 'โŒ FAIL'}") else: print(f" โŒ No agent results produced") results.append(False) else: print(f" โš ๏ธ Expected agent {test_case['expected_agent'].value} not selected") results.append(False) except Exception as e: print(f" โŒ Pipeline failed: {e}") results.append(False) # File processing test with actual file print(f"\n๐Ÿงช Test 4: File Processing with CSV") print(f" Description: Complete file analysis pipeline") try: with tempfile.TemporaryDirectory() as temp_dir: # Create test CSV csv_path = os.path.join(temp_dir, "sales_data.csv") with open(csv_path, 'w') as f: f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75") # Initialize state with file state = GAIAAgentState() state.task_id = "test_file" state.question = "What is the total sales value across all products?" state.file_name = "sales_data.csv" state.file_path = csv_path # Route and process routed_state = router.route_question(state) processed_state = file_agent.process(routed_state) if processed_state.agent_results: agent_result = list(processed_state.agent_results.values())[-1] success = agent_result.success total_cost += processed_state.total_cost results.append(success) print(f" โœ… Router: {routed_state.question_type.value}") print(f" โœ… Agent: File processor") print(f" โœ… Result: {agent_result.result[:100]}...") print(f" ๐Ÿ’ฐ Cost: ${processed_state.total_cost:.4f}") print(f" ๐ŸŽฏ Overall: {'โœ… PASS' if success else 'โŒ FAIL'}") else: print(f" โŒ File processing failed") results.append(False) except Exception as e: print(f" โŒ File test failed: {e}") results.append(False) # Final summary total_time = time.time() - start_time passed = sum(results) total = len(results) pass_rate = (passed / total) * 100 print("\n" + "=" * 50) print("๐Ÿ“Š COMPLETE INTEGRATION RESULTS") print("=" * 50) print(f"๐ŸŽฏ Tests Passed: {passed}/{total} ({pass_rate:.1f}%)") print(f"๐Ÿ’ฐ Total Cost: ${total_cost:.4f}") print(f"โฑ๏ธ Total Time: {total_time:.2f} seconds") print(f"๐Ÿ“ˆ Average Cost per Test: ${total_cost/total:.4f}") print(f"โšก Average Time per Test: {total_time/total:.2f}s") # Budget analysis monthly_budget = 0.10 # $0.10/month if total_cost <= monthly_budget: remaining_budget = monthly_budget - total_cost estimated_questions = int(remaining_budget / (total_cost / total)) print(f"๐Ÿ’ฐ Budget Status: โœ… ${remaining_budget:.4f} remaining (~{estimated_questions} more tests)") else: print(f"๐Ÿ’ฐ Budget Status: โš ๏ธ Over budget by ${total_cost - monthly_budget:.4f}") # Success criteria if pass_rate >= 80 and total_cost <= 0.05: # 80% success, reasonable cost print("\n๐Ÿš€ INTEGRATION SUCCESS! System ready for GAIA benchmark!") return True elif pass_rate >= 80: print("\nโœ… FUNCTIONALITY SUCCESS! (Higher cost than ideal)") return True else: print("\nโš ๏ธ INTEGRATION ISSUES! Check individual test failures") return False if __name__ == "__main__": success = test_complete_pipeline() sys.exit(0 if success else 1)