File size: 10,748 Bytes
225a75e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python3
"""
Complete Workflow Test for GAIA Agent System
Tests both LangGraph and simplified workflow implementations
"""

import os
import sys
import time
import tempfile
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from workflow.gaia_workflow import GAIAWorkflow, SimpleGAIAWorkflow
from models.qwen_client import QwenClient

def test_simple_workflow():
    """Test the simplified workflow implementation"""
    
    print("πŸ§ͺ Testing Simple GAIA Workflow")
    print("=" * 50)
    
    # Initialize workflow
    try:
        llm_client = QwenClient()
        workflow = SimpleGAIAWorkflow(llm_client)
    except Exception as e:
        print(f"❌ Failed to initialize workflow: {e}")
        return False
    
    # Test cases
    test_cases = [
        {
            "question": "What is the capital of France?",
            "description": "Simple web research question",
            "expected_agents": ["web_researcher"]
        },
        {
            "question": "Calculate 25% of 200",
            "description": "Mathematical reasoning question",
            "expected_agents": ["reasoning_agent"]
        },
        {
            "question": "What is the average of 10, 15, 20?",
            "description": "Statistical calculation",
            "expected_agents": ["reasoning_agent"]
        }
    ]
    
    results = []
    total_cost = 0.0
    start_time = time.time()
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\nπŸ” Test {i}: {test_case['description']}")
        print(f"   Question: {test_case['question']}")
        
        try:
            # Process question
            result_state = workflow.process_question(
                question=test_case["question"],
                task_id=f"simple_test_{i}"
            )
            
            # Check results
            success = result_state.is_complete and result_state.final_answer
            confidence = result_state.final_confidence
            cost = result_state.total_cost
            
            print(f"   βœ… Router: {result_state.question_type.value}")
            print(f"   βœ… Agents: {[a.value for a in result_state.selected_agents]}")
            print(f"   βœ… Final Answer: {result_state.final_answer[:100]}...")
            print(f"   πŸ“Š Confidence: {confidence:.2f}")
            print(f"   πŸ’° Cost: ${cost:.4f}")
            print(f"   🎯 Success: {'βœ… PASS' if success else '❌ FAIL'}")
            
            total_cost += cost
            results.append(bool(success))
            
        except Exception as e:
            print(f"   ❌ Test failed: {e}")
            results.append(False)
    
    # Summary
    total_time = time.time() - start_time
    passed = sum(results)
    total = len(results)
    
    print(f"\nπŸ“Š Simple Workflow Results:")
    print(f"   🎯 Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)")
    print(f"   πŸ’° Total Cost: ${total_cost:.4f}")
    print(f"   ⏱️  Total Time: {total_time:.2f}s")
    
    return passed >= total * 0.8  # 80% success rate

def test_complete_workflow_with_files():
    """Test workflow with file processing"""
    
    print("\nπŸ§ͺ Testing Complete Workflow with Files")
    print("=" * 50)
    
    try:
        llm_client = QwenClient()
        workflow = SimpleGAIAWorkflow(llm_client)
    except Exception as e:
        print(f"❌ Failed to initialize workflow: {e}")
        return False
    
    # Create test file
    with tempfile.TemporaryDirectory() as temp_dir:
        csv_path = os.path.join(temp_dir, "test_data.csv")
        with open(csv_path, 'w') as f:
            f.write("item,quantity,price\nApple,10,1.50\nBanana,20,0.75\nOrange,15,2.00")
        
        print(f"πŸ“ Created test file: {csv_path}")
        
        try:
            result_state = workflow.process_question(
                question="What is the total value of all items in this data?",
                file_path=csv_path,
                file_name="test_data.csv",
                task_id="file_test"
            )
            
            success = result_state.is_complete and result_state.final_answer
            
            print(f"   βœ… Router: {result_state.question_type.value}")
            print(f"   βœ… Agents: {[a.value for a in result_state.selected_agents]}")
            print(f"   βœ… Final Answer: {result_state.final_answer[:150]}...")
            print(f"   πŸ“Š Confidence: {result_state.final_confidence:.2f}")
            print(f"   πŸ’° Cost: ${result_state.total_cost:.4f}")
            print(f"   🎯 File Processing: {'βœ… PASS' if success else '❌ FAIL'}")
            
            return bool(success)
            
        except Exception as e:
            print(f"   ❌ File test failed: {e}")
            return False

def test_workflow_error_handling():
    """Test workflow error handling and edge cases"""
    
    print("\nπŸ§ͺ Testing Workflow Error Handling")
    print("=" * 50)
    
    try:
        llm_client = QwenClient()
        workflow = SimpleGAIAWorkflow(llm_client)
    except Exception as e:
        print(f"❌ Failed to initialize workflow: {e}")
        return False
    
    # Test cases that might cause errors
    error_test_cases = [
        {
            "question": "",  # Empty question
            "description": "Empty question"
        },
        {
            "question": "x" * 5000,  # Very long question
            "description": "Extremely long question"
        },
        {
            "question": "What is this file about?",
            "file_path": "/nonexistent/file.txt",  # Non-existent file
            "description": "Non-existent file"
        }
    ]
    
    results = []
    
    for i, test_case in enumerate(error_test_cases, 1):
        print(f"\nπŸ” Error Test {i}: {test_case['description']}")
        
        try:
            result_state = workflow.process_question(
                question=test_case["question"],
                file_path=test_case.get("file_path"),
                task_id=f"error_test_{i}"
            )
            
            # Check if error was handled gracefully
            graceful_handling = (
                result_state.is_complete and
                result_state.final_answer and
                not result_state.final_answer.startswith("Traceback")
            )
            
            print(f"   βœ… Graceful Handling: {'βœ… PASS' if graceful_handling else '❌ FAIL'}")
            print(f"   βœ… Error Messages: {len(result_state.error_messages)}")
            print(f"   βœ… Final Answer: {result_state.final_answer[:100]}...")
            
            results.append(graceful_handling)
            
        except Exception as e:
            print(f"   ❌ Unhandled exception: {e}")
            results.append(False)
    
    passed = sum(results)
    total = len(results)
    
    print(f"\nπŸ“Š Error Handling Results:")
    print(f"   🎯 Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)")
    
    return passed >= total * 0.8

def test_workflow_state_management():
    """Test workflow state management and tracking"""
    
    print("\nπŸ§ͺ Testing Workflow State Management")
    print("=" * 50)
    
    try:
        llm_client = QwenClient()
        workflow = SimpleGAIAWorkflow(llm_client)
    except Exception as e:
        print(f"❌ Failed to initialize workflow: {e}")
        return False
    
    try:
        result_state = workflow.process_question(
            question="What is the square root of 144?",
            task_id="state_test"
        )
        
        # Verify state completeness
        state_checks = {
            "has_task_id": bool(result_state.task_id),
            "has_question": bool(result_state.question),
            "has_routing_decision": bool(result_state.routing_decision),
            "has_processing_steps": len(result_state.processing_steps) > 0,
            "has_final_answer": bool(result_state.final_answer),
            "is_complete": result_state.is_complete,
            "has_cost_tracking": result_state.total_cost >= 0,
            "has_timing": result_state.total_processing_time >= 0
        }
        
        print("   πŸ“Š State Management Checks:")
        for check, passed in state_checks.items():
            status = "βœ…" if passed else "❌"
            print(f"      {status} {check}: {passed}")
        
        # Check state summary
        summary = result_state.get_summary()
        print(f"\n   πŸ“‹ State Summary:")
        for key, value in summary.items():
            print(f"      {key}: {value}")
        
        # Verify processing steps
        print(f"\n   πŸ”„ Processing Steps ({len(result_state.processing_steps)}):")
        for i, step in enumerate(result_state.processing_steps[-5:], 1):  # Last 5 steps
            print(f"      {i}. {step}")
        
        all_passed = all(state_checks.values())
        print(f"\n   🎯 State Management: {'βœ… PASS' if all_passed else '❌ FAIL'}")
        
        return all_passed
        
    except Exception as e:
        print(f"   ❌ State test failed: {e}")
        return False

def main():
    """Run all workflow tests"""
    
    print("πŸš€ GAIA Workflow Integration Tests")
    print("=" * 60)
    
    test_results = []
    start_time = time.time()
    
    # Run all tests
    test_results.append(test_simple_workflow())
    test_results.append(test_complete_workflow_with_files())
    test_results.append(test_workflow_error_handling())
    test_results.append(test_workflow_state_management())
    
    # Summary
    total_time = time.time() - start_time
    passed = sum(test_results)
    total = len(test_results)
    
    print("\n" + "=" * 60)
    print("πŸ“Š COMPLETE WORKFLOW TEST RESULTS")
    print("=" * 60)
    print(f"🎯 Test Suites Passed: {passed}/{total} ({passed/total*100:.1f}%)")
    print(f"⏱️  Total Time: {total_time:.2f} seconds")
    
    # Test breakdown
    test_names = [
        "Simple Workflow",
        "File Processing",
        "Error Handling", 
        "State Management"
    ]
    
    print(f"\nπŸ“‹ Test Breakdown:")
    for i, (name, result) in enumerate(zip(test_names, test_results)):
        status = "βœ…" if result else "❌"
        print(f"   {status} {name}")
    
    if passed == total:
        print("\nπŸš€ ALL WORKFLOW TESTS PASSED! System ready for production!")
        return True
    elif passed >= total * 0.8:
        print("\nβœ… MOST TESTS PASSED! System functional with minor issues.")
        return True
    else:
        print("\n⚠️  SIGNIFICANT ISSUES! Review failures above.")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)