|
|
|
""" |
|
TTRLVR + AZR ํตํฉ ๊ฒ์ฆ ์ค์ํธ |
|
|
|
์ ์ฒด ์์คํ
์ ๊ฒ์ฆ์ ์ํ ํตํฉ ์คํฌ๋ฆฝํธ: |
|
1. ํ๊ฒฝ ๊ฒ์ฆ |
|
2. ๋จ์ ํ
์คํธ |
|
3. ๋ฏธ๋ ํตํฉ ํ
์คํธ (1๋ผ์ด๋ ์คํ) |
|
4. ์ฑ๋ฅ ๋ฒค์น๋งํฌ |
|
5. ์ต์ข
๊ฒ์ฆ ๋ณด๊ณ ์ ์์ฑ |
|
""" |
|
|
|
import os |
|
import sys |
|
import json |
|
import subprocess |
|
import tempfile |
|
import time |
|
from datetime import datetime |
|
from pathlib import Path |
|
|
|
|
|
sys.path.append('/home/ubuntu/RLVR/TestTime-RLVR-v2') |
|
|
|
def run_command(command, description, timeout=300): |
|
"""๋ช
๋ น์ด ์คํ ๋ฐ ๊ฒฐ๊ณผ ๋ฐํ""" |
|
|
|
print(f"๐ {description}") |
|
print(f" Command: {command}") |
|
|
|
start_time = time.time() |
|
|
|
try: |
|
result = subprocess.run( |
|
command, |
|
shell=True, |
|
capture_output=True, |
|
text=True, |
|
timeout=timeout, |
|
cwd='/home/ubuntu/RLVR/TestTime-RLVR-v2' |
|
) |
|
|
|
duration = time.time() - start_time |
|
|
|
if result.returncode == 0: |
|
print(f"โ
{description} completed ({duration:.1f}s)") |
|
return True, result.stdout, result.stderr |
|
else: |
|
print(f"โ {description} failed ({duration:.1f}s)") |
|
print(f" Error: {result.stderr}") |
|
return False, result.stdout, result.stderr |
|
|
|
except subprocess.TimeoutExpired: |
|
print(f"โฐ {description} timed out after {timeout}s") |
|
return False, "", "Timeout" |
|
except Exception as e: |
|
print(f"๐ฅ {description} crashed: {e}") |
|
return False, "", str(e) |
|
|
|
|
|
def run_environment_validation(): |
|
"""ํ๊ฒฝ ๊ฒ์ฆ ์คํ""" |
|
|
|
print("\n" + "="*60) |
|
print("1๏ธโฃ ํ๊ฒฝ ๊ฒ์ฆ") |
|
print("="*60) |
|
|
|
success, _, _ = run_command( |
|
"cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python validate_environment.py", |
|
"Environment validation" |
|
) |
|
|
|
return success |
|
|
|
|
|
def run_unit_tests(): |
|
"""๋จ์ ํ
์คํธ ์คํ""" |
|
|
|
print("\n" + "="*60) |
|
print("2๏ธโฃ ๋จ์ ํ
์คํธ") |
|
print("="*60) |
|
|
|
success, _, _ = run_command( |
|
"cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python test_ttrlvr_azr_integration.py", |
|
"Unit tests" |
|
) |
|
|
|
return success |
|
|
|
|
|
def run_mini_integration_test(): |
|
"""๋ฏธ๋ ํตํฉ ํ
์คํธ (1๋ฌธ์ , 2๋ผ์ด๋)""" |
|
|
|
print("\n" + "="*60) |
|
print("3๏ธโฃ ๋ฏธ๋ ํตํฉ ํ
์คํธ") |
|
print("="*60) |
|
|
|
|
|
success, stdout, stderr = run_command( |
|
"cd /home/ubuntu/RLVR/TestTime-RLVR-v2/test && python train_ttrlvr_azr.py --benchmark mbpp --problems 1 --rounds 2 --debug", |
|
"Mini integration test (1 problem, 2 rounds)", |
|
timeout=1800 |
|
) |
|
|
|
if success: |
|
print("โ
Mini integration test completed successfully") |
|
|
|
results_dir = Path("/home/ubuntu/RLVR/TestTime-RLVR-v2/test/results/ttrlvr_azr") |
|
if results_dir.exists(): |
|
latest_result = max(results_dir.glob("*"), key=os.path.getctime, default=None) |
|
if latest_result: |
|
print(f"๐ Results saved to: {latest_result}") |
|
|
|
|
|
result_file = latest_result / "training_results.json" |
|
if result_file.exists(): |
|
with open(result_file, 'r') as f: |
|
results = json.load(f) |
|
|
|
print(f"๐ Test summary:") |
|
print(f" - Success: {results.get('success', False)}") |
|
print(f" - Completed rounds: {len(results.get('rounds', {}))}") |
|
print(f" - Final model: {results.get('final_model', 'N/A')}") |
|
|
|
return success |
|
|
|
|
|
def check_disk_space(): |
|
"""๋์คํฌ ๊ณต๊ฐ ํ์ธ""" |
|
|
|
print("\n" + "="*60) |
|
print("4๏ธโฃ ๋์คํฌ ๊ณต๊ฐ ํ์ธ") |
|
print("="*60) |
|
|
|
|
|
paths_to_check = [ |
|
"/home/ubuntu/RLVR", |
|
"/data", |
|
"/tmp" |
|
] |
|
|
|
all_good = True |
|
|
|
for path in paths_to_check: |
|
if os.path.exists(path): |
|
success, stdout, _ = run_command(f"df -h {path}", f"Disk usage for {path}") |
|
if success: |
|
lines = stdout.strip().split('\n') |
|
if len(lines) > 1: |
|
fields = lines[1].split() |
|
if len(fields) >= 5: |
|
used_percent = fields[4].rstrip('%') |
|
if used_percent.isdigit() and int(used_percent) > 90: |
|
print(f"โ ๏ธ Warning: {path} is {used_percent}% full") |
|
all_good = False |
|
else: |
|
print(f"โ
{path}: {used_percent}% used") |
|
else: |
|
print(f"โ ๏ธ Path not found: {path}") |
|
|
|
return all_good |
|
|
|
|
|
def run_performance_benchmark(): |
|
"""์ฑ๋ฅ ๋ฒค์น๋งํฌ""" |
|
|
|
print("\n" + "="*60) |
|
print("5๏ธโฃ ์ฑ๋ฅ ๋ฒค์น๋งํฌ") |
|
print("="*60) |
|
|
|
|
|
print("๐ฅ๏ธ GPU ๋ฉ๋ชจ๋ฆฌ ์ํ:") |
|
gpu_success, gpu_output, _ = run_command("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits", "GPU memory check") |
|
|
|
if gpu_success: |
|
for i, line in enumerate(gpu_output.strip().split('\n')): |
|
if line.strip(): |
|
try: |
|
used, total = map(int, line.split(', ')) |
|
usage_percent = (used / total) * 100 |
|
print(f" GPU {i}: {used}MB / {total}MB ({usage_percent:.1f}%)") |
|
except: |
|
print(f" GPU {i}: {line}") |
|
|
|
|
|
print("\n๐พ ์์คํ
๋ฉ๋ชจ๋ฆฌ ์ํ:") |
|
mem_success, mem_output, _ = run_command("free -h", "System memory check") |
|
if mem_success: |
|
for line in mem_output.split('\n')[:2]: |
|
print(f" {line}") |
|
|
|
|
|
print("\n๐ฅ๏ธ CPU ์ํ:") |
|
cpu_success, cpu_output, _ = run_command("top -bn1 | grep 'Cpu(s)' | head -1", "CPU usage check") |
|
if cpu_success: |
|
print(f" {cpu_output.strip()}") |
|
|
|
return gpu_success and mem_success |
|
|
|
|
|
def generate_validation_report(results): |
|
"""๊ฒ์ฆ ๋ณด๊ณ ์ ์์ฑ""" |
|
|
|
print("\n" + "="*60) |
|
print("6๏ธโฃ ๊ฒ์ฆ ๋ณด๊ณ ์ ์์ฑ") |
|
print("="*60) |
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
|
report_file = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.json" |
|
|
|
|
|
report = { |
|
'timestamp': datetime.now().isoformat(), |
|
'validation_results': results, |
|
'summary': { |
|
'total_tests': len(results), |
|
'passed_tests': sum(1 for result in results.values() if result['success']), |
|
'overall_success': all(result['success'] for result in results.values()) |
|
}, |
|
'recommendations': [] |
|
} |
|
|
|
|
|
html_report = f"/tmp/ttrlvr_azr_validation_report_{timestamp}.html" |
|
|
|
html_content = f""" |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<title>TTRLVR + AZR Validation Report</title> |
|
<style> |
|
body {{ font-family: Arial, sans-serif; margin: 40px; }} |
|
.header {{ background-color: #f0f0f0; padding: 20px; border-radius: 5px; }} |
|
.success {{ color: green; }} |
|
.failure {{ color: red; }} |
|
.test-section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }} |
|
.recommendations {{ background-color: #fff3cd; padding: 15px; border-radius: 5px; }} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="header"> |
|
<h1>TTRLVR + AZR Integration Validation Report</h1> |
|
<p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p> |
|
<p>Overall Status: <span class="{'success' if report['summary']['overall_success'] else 'failure'}"> |
|
{'โ
ALL TESTS PASSED' if report['summary']['overall_success'] else 'โ SOME TESTS FAILED'} |
|
</span></p> |
|
<p>Tests: {report['summary']['passed_tests']}/{report['summary']['total_tests']} passed</p> |
|
</div> |
|
|
|
<h2>Test Results</h2> |
|
""" |
|
|
|
for test_name, result in results.items(): |
|
status = "success" if result['success'] else "failure" |
|
icon = "โ
" if result['success'] else "โ" |
|
|
|
html_content += f""" |
|
<div class="test-section"> |
|
<h3 class="{status}">{icon} {test_name}</h3> |
|
<p><strong>Duration:</strong> {result.get('duration', 'N/A')}</p> |
|
<p><strong>Details:</strong> {result.get('details', 'No details available')}</p> |
|
</div> |
|
""" |
|
|
|
if report['recommendations']: |
|
html_content += """ |
|
<div class="recommendations"> |
|
<h2>Recommendations</h2> |
|
<ul> |
|
""" |
|
for rec in report['recommendations']: |
|
html_content += f"<li>{rec}</li>" |
|
|
|
html_content += """ |
|
</ul> |
|
</div> |
|
""" |
|
|
|
html_content += """ |
|
</body> |
|
</html> |
|
""" |
|
|
|
|
|
with open(report_file, 'w') as f: |
|
json.dump(report, f, indent=2) |
|
|
|
with open(html_report, 'w') as f: |
|
f.write(html_content) |
|
|
|
print(f"๐ JSON ๋ณด๊ณ ์: {report_file}") |
|
print(f"๐ HTML ๋ณด๊ณ ์: {html_report}") |
|
|
|
return report |
|
|
|
|
|
def main(): |
|
"""๋ฉ์ธ ์คํ ํจ์""" |
|
|
|
print("๐งช TTRLVR + AZR ํตํฉ ๊ฒ์ฆ ์ค์ํธ ์์") |
|
print("=" * 60) |
|
print(f"์์ ์๊ฐ: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
print("=" * 60) |
|
|
|
|
|
results = {} |
|
start_time = time.time() |
|
|
|
|
|
test_start = time.time() |
|
success = run_environment_validation() |
|
results['environment_validation'] = { |
|
'success': success, |
|
'duration': f"{time.time() - test_start:.1f}s", |
|
'details': 'Environment setup and dependencies check' |
|
} |
|
|
|
|
|
if success: |
|
test_start = time.time() |
|
success = run_unit_tests() |
|
results['unit_tests'] = { |
|
'success': success, |
|
'duration': f"{time.time() - test_start:.1f}s", |
|
'details': 'Component unit tests and integration tests' |
|
} |
|
else: |
|
results['unit_tests'] = { |
|
'success': False, |
|
'duration': '0s', |
|
'details': 'Skipped due to environment validation failure' |
|
} |
|
|
|
|
|
if results['unit_tests']['success']: |
|
test_start = time.time() |
|
success = run_mini_integration_test() |
|
results['mini_integration_test'] = { |
|
'success': success, |
|
'duration': f"{time.time() - test_start:.1f}s", |
|
'details': 'End-to-end pipeline test with 1 problem, 2 rounds' |
|
} |
|
else: |
|
results['mini_integration_test'] = { |
|
'success': False, |
|
'duration': '0s', |
|
'details': 'Skipped due to previous test failures' |
|
} |
|
|
|
|
|
test_start = time.time() |
|
success = check_disk_space() |
|
results['disk_space_check'] = { |
|
'success': success, |
|
'duration': f"{time.time() - test_start:.1f}s", |
|
'details': 'Available disk space in critical directories' |
|
} |
|
|
|
|
|
test_start = time.time() |
|
success = run_performance_benchmark() |
|
results['performance_benchmark'] = { |
|
'success': success, |
|
'duration': f"{time.time() - test_start:.1f}s", |
|
'details': 'System resource usage and performance metrics' |
|
} |
|
|
|
|
|
total_duration = time.time() - start_time |
|
print(f"\nโฑ๏ธ ์ด ์คํ ์๊ฐ: {total_duration:.1f}์ด ({total_duration/60:.1f}๋ถ)") |
|
|
|
report = generate_validation_report(results) |
|
|
|
|
|
print("\n" + "="*60) |
|
print("๐ ๊ฒ์ฆ ์ค์ํธ ์๋ฃ") |
|
print("="*60) |
|
|
|
passed = sum(1 for result in results.values() if result['success']) |
|
total = len(results) |
|
|
|
print(f"๐ ์ต์ข
๊ฒฐ๊ณผ: {passed}/{total} ํ
์คํธ ํต๊ณผ") |
|
|
|
if report['summary']['overall_success']: |
|
print("๐ ๋ชจ๋ ๊ฒ์ฆ ํต๊ณผ! TTRLVR + AZR ์์คํ
์คํ ์ค๋น ์๋ฃ") |
|
return 0 |
|
else: |
|
print("โ ๏ธ ์ผ๋ถ ๊ฒ์ฆ ์คํจ. ์์ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ๊ณ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ์ธ์.") |
|
return 1 |
|
|
|
|
|
if __name__ == '__main__': |
|
exit_code = main() |
|
sys.exit(exit_code) |