Chris
Final 4
225a75e
#!/usr/bin/env python3
"""
Complete Workflow Test for GAIA Agent System
Tests both LangGraph and simplified workflow implementations
"""
import os
import sys
import time
import tempfile
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from workflow.gaia_workflow import GAIAWorkflow, SimpleGAIAWorkflow
from models.qwen_client import QwenClient
def test_simple_workflow():
"""Test the simplified workflow implementation"""
print("πŸ§ͺ Testing Simple GAIA Workflow")
print("=" * 50)
# Initialize workflow
try:
llm_client = QwenClient()
workflow = SimpleGAIAWorkflow(llm_client)
except Exception as e:
print(f"❌ Failed to initialize workflow: {e}")
return False
# Test cases
test_cases = [
{
"question": "What is the capital of France?",
"description": "Simple web research question",
"expected_agents": ["web_researcher"]
},
{
"question": "Calculate 25% of 200",
"description": "Mathematical reasoning question",
"expected_agents": ["reasoning_agent"]
},
{
"question": "What is the average of 10, 15, 20?",
"description": "Statistical calculation",
"expected_agents": ["reasoning_agent"]
}
]
results = []
total_cost = 0.0
start_time = time.time()
for i, test_case in enumerate(test_cases, 1):
print(f"\nπŸ” Test {i}: {test_case['description']}")
print(f" Question: {test_case['question']}")
try:
# Process question
result_state = workflow.process_question(
question=test_case["question"],
task_id=f"simple_test_{i}"
)
# Check results
success = result_state.is_complete and result_state.final_answer
confidence = result_state.final_confidence
cost = result_state.total_cost
print(f" βœ… Router: {result_state.question_type.value}")
print(f" βœ… Agents: {[a.value for a in result_state.selected_agents]}")
print(f" βœ… Final Answer: {result_state.final_answer[:100]}...")
print(f" πŸ“Š Confidence: {confidence:.2f}")
print(f" πŸ’° Cost: ${cost:.4f}")
print(f" 🎯 Success: {'βœ… PASS' if success else '❌ FAIL'}")
total_cost += cost
results.append(bool(success))
except Exception as e:
print(f" ❌ Test failed: {e}")
results.append(False)
# Summary
total_time = time.time() - start_time
passed = sum(results)
total = len(results)
print(f"\nπŸ“Š Simple Workflow Results:")
print(f" 🎯 Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)")
print(f" πŸ’° Total Cost: ${total_cost:.4f}")
print(f" ⏱️ Total Time: {total_time:.2f}s")
return passed >= total * 0.8 # 80% success rate
def test_complete_workflow_with_files():
"""Test workflow with file processing"""
print("\nπŸ§ͺ Testing Complete Workflow with Files")
print("=" * 50)
try:
llm_client = QwenClient()
workflow = SimpleGAIAWorkflow(llm_client)
except Exception as e:
print(f"❌ Failed to initialize workflow: {e}")
return False
# Create test file
with tempfile.TemporaryDirectory() as temp_dir:
csv_path = os.path.join(temp_dir, "test_data.csv")
with open(csv_path, 'w') as f:
f.write("item,quantity,price\nApple,10,1.50\nBanana,20,0.75\nOrange,15,2.00")
print(f"πŸ“ Created test file: {csv_path}")
try:
result_state = workflow.process_question(
question="What is the total value of all items in this data?",
file_path=csv_path,
file_name="test_data.csv",
task_id="file_test"
)
success = result_state.is_complete and result_state.final_answer
print(f" βœ… Router: {result_state.question_type.value}")
print(f" βœ… Agents: {[a.value for a in result_state.selected_agents]}")
print(f" βœ… Final Answer: {result_state.final_answer[:150]}...")
print(f" πŸ“Š Confidence: {result_state.final_confidence:.2f}")
print(f" πŸ’° Cost: ${result_state.total_cost:.4f}")
print(f" 🎯 File Processing: {'βœ… PASS' if success else '❌ FAIL'}")
return bool(success)
except Exception as e:
print(f" ❌ File test failed: {e}")
return False
def test_workflow_error_handling():
"""Test workflow error handling and edge cases"""
print("\nπŸ§ͺ Testing Workflow Error Handling")
print("=" * 50)
try:
llm_client = QwenClient()
workflow = SimpleGAIAWorkflow(llm_client)
except Exception as e:
print(f"❌ Failed to initialize workflow: {e}")
return False
# Test cases that might cause errors
error_test_cases = [
{
"question": "", # Empty question
"description": "Empty question"
},
{
"question": "x" * 5000, # Very long question
"description": "Extremely long question"
},
{
"question": "What is this file about?",
"file_path": "/nonexistent/file.txt", # Non-existent file
"description": "Non-existent file"
}
]
results = []
for i, test_case in enumerate(error_test_cases, 1):
print(f"\nπŸ” Error Test {i}: {test_case['description']}")
try:
result_state = workflow.process_question(
question=test_case["question"],
file_path=test_case.get("file_path"),
task_id=f"error_test_{i}"
)
# Check if error was handled gracefully
graceful_handling = (
result_state.is_complete and
result_state.final_answer and
not result_state.final_answer.startswith("Traceback")
)
print(f" βœ… Graceful Handling: {'βœ… PASS' if graceful_handling else '❌ FAIL'}")
print(f" βœ… Error Messages: {len(result_state.error_messages)}")
print(f" βœ… Final Answer: {result_state.final_answer[:100]}...")
results.append(graceful_handling)
except Exception as e:
print(f" ❌ Unhandled exception: {e}")
results.append(False)
passed = sum(results)
total = len(results)
print(f"\nπŸ“Š Error Handling Results:")
print(f" 🎯 Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)")
return passed >= total * 0.8
def test_workflow_state_management():
"""Test workflow state management and tracking"""
print("\nπŸ§ͺ Testing Workflow State Management")
print("=" * 50)
try:
llm_client = QwenClient()
workflow = SimpleGAIAWorkflow(llm_client)
except Exception as e:
print(f"❌ Failed to initialize workflow: {e}")
return False
try:
result_state = workflow.process_question(
question="What is the square root of 144?",
task_id="state_test"
)
# Verify state completeness
state_checks = {
"has_task_id": bool(result_state.task_id),
"has_question": bool(result_state.question),
"has_routing_decision": bool(result_state.routing_decision),
"has_processing_steps": len(result_state.processing_steps) > 0,
"has_final_answer": bool(result_state.final_answer),
"is_complete": result_state.is_complete,
"has_cost_tracking": result_state.total_cost >= 0,
"has_timing": result_state.total_processing_time >= 0
}
print(" πŸ“Š State Management Checks:")
for check, passed in state_checks.items():
status = "βœ…" if passed else "❌"
print(f" {status} {check}: {passed}")
# Check state summary
summary = result_state.get_summary()
print(f"\n πŸ“‹ State Summary:")
for key, value in summary.items():
print(f" {key}: {value}")
# Verify processing steps
print(f"\n πŸ”„ Processing Steps ({len(result_state.processing_steps)}):")
for i, step in enumerate(result_state.processing_steps[-5:], 1): # Last 5 steps
print(f" {i}. {step}")
all_passed = all(state_checks.values())
print(f"\n 🎯 State Management: {'βœ… PASS' if all_passed else '❌ FAIL'}")
return all_passed
except Exception as e:
print(f" ❌ State test failed: {e}")
return False
def main():
"""Run all workflow tests"""
print("πŸš€ GAIA Workflow Integration Tests")
print("=" * 60)
test_results = []
start_time = time.time()
# Run all tests
test_results.append(test_simple_workflow())
test_results.append(test_complete_workflow_with_files())
test_results.append(test_workflow_error_handling())
test_results.append(test_workflow_state_management())
# Summary
total_time = time.time() - start_time
passed = sum(test_results)
total = len(test_results)
print("\n" + "=" * 60)
print("πŸ“Š COMPLETE WORKFLOW TEST RESULTS")
print("=" * 60)
print(f"🎯 Test Suites Passed: {passed}/{total} ({passed/total*100:.1f}%)")
print(f"⏱️ Total Time: {total_time:.2f} seconds")
# Test breakdown
test_names = [
"Simple Workflow",
"File Processing",
"Error Handling",
"State Management"
]
print(f"\nπŸ“‹ Test Breakdown:")
for i, (name, result) in enumerate(zip(test_names, test_results)):
status = "βœ…" if result else "❌"
print(f" {status} {name}")
if passed == total:
print("\nπŸš€ ALL WORKFLOW TESTS PASSED! System ready for production!")
return True
elif passed >= total * 0.8:
print("\nβœ… MOST TESTS PASSED! System functional with minor issues.")
return True
else:
print("\n⚠️ SIGNIFICANT ISSUES! Review failures above.")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)