#!/usr/bin/env python3 """ Integration test for all GAIA Agent tools Tests Wikipedia, Web Search, Calculator, and File Processor tools """ import os import sys import time import tempfile from pathlib import Path # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent)) from tools.wikipedia_tool import WikipediaTool from tools.web_search_tool import WebSearchTool from tools.calculator import CalculatorTool from tools.file_processor import FileProcessorTool def test_all_tools(): """Comprehensive test of all GAIA agent tools""" print("๐Ÿงช GAIA Agent Tools Integration Test") print("=" * 50) results = [] start_time = time.time() # Test 1: Wikipedia Tool print("\n๐Ÿ“š Testing Wikipedia Tool...") wikipedia_tool = WikipediaTool() test_cases = [ "Albert Einstein", {"query": "Machine Learning", "action": "summary"} ] for i, test_case in enumerate(test_cases, 1): result = wikipedia_tool.execute(test_case) success = result.success and result.result.get('found', False) results.append(('Wikipedia', f'Test {i}', success, result.execution_time)) status = "โœ… PASS" if success else "โŒ FAIL" print(f" Test {i}: {status} ({result.execution_time:.2f}s)") # Test 2: Web Search Tool print("\n๐Ÿ” Testing Web Search Tool...") web_search_tool = WebSearchTool() test_cases = [ "Python programming", {"query": "https://www.python.org", "action": "extract"} ] for i, test_case in enumerate(test_cases, 1): result = web_search_tool.execute(test_case) success = result.success and result.result.get('found', False) results.append(('Web Search', f'Test {i}', success, result.execution_time)) status = "โœ… PASS" if success else "โŒ FAIL" print(f" Test {i}: {status} ({result.execution_time:.2f}s)") # Test 3: Calculator Tool print("\n๐Ÿงฎ Testing Calculator Tool...") calculator_tool = CalculatorTool() test_cases = [ "2 + 3 * 4", {"operation": "statistics", "data": [1, 2, 3, 4, 5]}, {"operation": "convert", "value": 100, "from_unit": "cm", "to_unit": "m"} ] for i, test_case in enumerate(test_cases, 1): result = calculator_tool.execute(test_case) success = result.success and result.result.get('success', False) results.append(('Calculator', f'Test {i}', success, result.execution_time)) status = "โœ… PASS" if success else "โŒ FAIL" print(f" Test {i}: {status} ({result.execution_time:.3f}s)") # Test 4: File Processor Tool print("\n๐Ÿ“ Testing File Processor Tool...") file_processor_tool = FileProcessorTool() # Create test files with tempfile.TemporaryDirectory() as temp_dir: # Create CSV test file csv_path = os.path.join(temp_dir, "test.csv") with open(csv_path, 'w') as f: f.write("name,value\nTest,42\nData,100") # Create Python test file py_path = os.path.join(temp_dir, "test.py") with open(py_path, 'w') as f: f.write("def test_function():\n return 'Hello, World!'") test_files = [csv_path, py_path] for i, file_path in enumerate(test_files, 1): result = file_processor_tool.execute(file_path) success = result.success and result.result.get('success', False) results.append(('File Processor', f'Test {i}', success, result.execution_time)) status = "โœ… PASS" if success else "โŒ FAIL" file_type = os.path.splitext(file_path)[1] print(f" Test {i} ({file_type}): {status} ({result.execution_time:.3f}s)") # Summary total_time = time.time() - start_time passed_tests = sum(1 for _, _, success, _ in results if success) total_tests = len(results) print("\n" + "=" * 50) print("๐Ÿ“Š INTEGRATION TEST RESULTS") print("=" * 50) # Results by tool tools = {} for tool, test, success, exec_time in results: if tool not in tools: tools[tool] = {'passed': 0, 'total': 0, 'time': 0} tools[tool]['total'] += 1 tools[tool]['time'] += exec_time if success: tools[tool]['passed'] += 1 for tool, stats in tools.items(): pass_rate = (stats['passed'] / stats['total']) * 100 avg_time = stats['time'] / stats['total'] status = "โœ…" if pass_rate == 100 else "โš ๏ธ" if pass_rate >= 80 else "โŒ" print(f"{status} {tool:15}: {stats['passed']}/{stats['total']} ({pass_rate:5.1f}%) - Avg: {avg_time:.3f}s") # Overall results overall_pass_rate = (passed_tests / total_tests) * 100 print(f"\n๐ŸŽฏ OVERALL: {passed_tests}/{total_tests} tests passed ({overall_pass_rate:.1f}%)") print(f"โฑ๏ธ TOTAL TIME: {total_time:.2f} seconds") # Success criteria if overall_pass_rate >= 90: print("๐Ÿš€ EXCELLENT! All tools working correctly - Ready for agent integration!") return True elif overall_pass_rate >= 80: print("โœ… GOOD! Most tools working - Minor issues to address") return True else: print("โš ๏ธ NEEDS WORK! Significant issues found - Check individual tool failures") return False def test_tool_coordination(): """Test how tools can work together in a coordinated workflow""" print("\n๐Ÿค Testing Tool Coordination...") print("-" * 30) # Scenario: Research Python programming, then calculate some metrics try: # Step 1: Get information about Python wiki_tool = WikipediaTool() wiki_result = wiki_tool.execute("Python (programming language)") if wiki_result.success: print("โœ… Step 1: Wikipedia lookup successful") # Step 2: Get additional web information web_tool = WebSearchTool() web_result = web_tool.execute("Python programming language features") if web_result.success: print("โœ… Step 2: Web search successful") # Step 3: Calculate some metrics calc_tool = CalculatorTool() search_count = len(web_result.result.get('results', [])) calc_result = calc_tool.execute(f"sqrt({search_count}) * 10") if calc_result.success: print("โœ… Step 3: Calculation successful") print(f" Coordinated result: Found {search_count} web results, computed metric: {calc_result.result['calculation']['result']}") return True except Exception as e: print(f"โŒ Coordination test failed: {e}") return False if __name__ == "__main__": success = test_all_tools() coordination_success = test_tool_coordination() if success and coordination_success: print("\n๐ŸŽ‰ ALL TESTS PASSED! Tools are ready for agent integration!") sys.exit(0) else: print("\nโš ๏ธ Some tests failed. Check output above.") sys.exit(1)