396 lines
12 KiB
Python
396 lines
12 KiB
Python
|
|
"""
|
||
|
|
Monte Carlo Runner
|
||
|
|
==================
|
||
|
|
|
||
|
|
Orchestration and parallel execution for MC envelope mapping.
|
||
|
|
|
||
|
|
Features:
|
||
|
|
- Parallel execution using multiprocessing
|
||
|
|
- Checkpointing and resume capability
|
||
|
|
- Batch processing
|
||
|
|
- Progress tracking
|
||
|
|
|
||
|
|
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 1, 5.4
|
||
|
|
"""
|
||
|
|
|
||
|
|
import time
|
||
|
|
import json
|
||
|
|
from typing import Dict, List, Optional, Any, Callable
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime
|
||
|
|
import multiprocessing as mp
|
||
|
|
from functools import partial
|
||
|
|
|
||
|
|
from .mc_sampler import MCSampler, MCTrialConfig
|
||
|
|
from .mc_validator import MCValidator, ValidationResult
|
||
|
|
from .mc_executor import MCExecutor
|
||
|
|
from .mc_store import MCStore
|
||
|
|
from .mc_metrics import MCTrialResult
|
||
|
|
|
||
|
|
|
||
|
|
class MCRunner:
|
||
|
|
"""
|
||
|
|
Monte Carlo Runner.
|
||
|
|
|
||
|
|
Orchestrates the full MC envelope mapping pipeline:
|
||
|
|
1. Generate trial configurations
|
||
|
|
2. Validate configurations
|
||
|
|
3. Execute trials (parallel)
|
||
|
|
4. Store results
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(
|
||
|
|
self,
|
||
|
|
output_dir: str = "mc_results",
|
||
|
|
n_workers: int = -1,
|
||
|
|
batch_size: int = 1000,
|
||
|
|
base_seed: int = 42,
|
||
|
|
verbose: bool = True
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Initialize the runner.
|
||
|
|
|
||
|
|
Parameters
|
||
|
|
----------
|
||
|
|
output_dir : str
|
||
|
|
Directory for results
|
||
|
|
n_workers : int
|
||
|
|
Number of parallel workers (-1 for auto)
|
||
|
|
batch_size : int
|
||
|
|
Trials per batch
|
||
|
|
base_seed : int
|
||
|
|
Master RNG seed
|
||
|
|
verbose : bool
|
||
|
|
Print progress
|
||
|
|
"""
|
||
|
|
self.output_dir = Path(output_dir)
|
||
|
|
self.n_workers = n_workers if n_workers > 0 else max(1, mp.cpu_count() - 1)
|
||
|
|
self.batch_size = batch_size
|
||
|
|
self.base_seed = base_seed
|
||
|
|
self.verbose = verbose
|
||
|
|
|
||
|
|
# Components
|
||
|
|
self.sampler = MCSampler(base_seed=base_seed)
|
||
|
|
self.store = MCStore(output_dir=output_dir, batch_size=batch_size)
|
||
|
|
|
||
|
|
# State
|
||
|
|
self.completed_trials: set = set()
|
||
|
|
self.stats: Dict[str, Any] = {}
|
||
|
|
|
||
|
|
def generate_and_validate(
|
||
|
|
self,
|
||
|
|
n_samples_per_switch: int = 500,
|
||
|
|
max_trials: Optional[int] = None
|
||
|
|
) -> List[MCTrialConfig]:
|
||
|
|
"""
|
||
|
|
Generate and validate trial configurations.
|
||
|
|
|
||
|
|
Parameters
|
||
|
|
----------
|
||
|
|
n_samples_per_switch : int
|
||
|
|
Samples per switch vector
|
||
|
|
max_trials : int, optional
|
||
|
|
Maximum total trials
|
||
|
|
|
||
|
|
Returns
|
||
|
|
-------
|
||
|
|
List[MCTrialConfig]
|
||
|
|
Valid trial configurations
|
||
|
|
"""
|
||
|
|
print("="*70)
|
||
|
|
print("PHASE 1: GENERATE & VALIDATE CONFIGURATIONS")
|
||
|
|
print("="*70)
|
||
|
|
|
||
|
|
# Generate trials
|
||
|
|
print(f"\n[1/3] Generating trials (n_samples_per_switch={n_samples_per_switch})...")
|
||
|
|
all_configs = self.sampler.generate_trials(
|
||
|
|
n_samples_per_switch=n_samples_per_switch,
|
||
|
|
max_trials=max_trials
|
||
|
|
)
|
||
|
|
|
||
|
|
# Validate
|
||
|
|
print(f"\n[2/3] Validating {len(all_configs)} configurations...")
|
||
|
|
validator = MCValidator(verbose=False)
|
||
|
|
validation_results = validator.validate_batch(all_configs)
|
||
|
|
|
||
|
|
# Filter valid configs
|
||
|
|
valid_configs = [
|
||
|
|
config for config, result in zip(all_configs, validation_results)
|
||
|
|
if result.is_valid()
|
||
|
|
]
|
||
|
|
|
||
|
|
# Save validation results
|
||
|
|
self.store.save_validation_results(validation_results, batch_id=0)
|
||
|
|
|
||
|
|
# Stats
|
||
|
|
stats = validator.get_validity_stats(validation_results)
|
||
|
|
print(f"\n[3/3] Validation complete:")
|
||
|
|
print(f" Total: {stats['total']}")
|
||
|
|
print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
|
||
|
|
print(f" Rejected: {stats['total'] - stats['valid']}")
|
||
|
|
|
||
|
|
self.stats['validation'] = stats
|
||
|
|
|
||
|
|
return valid_configs
|
||
|
|
|
||
|
|
def run_envelope_mapping(
|
||
|
|
self,
|
||
|
|
n_samples_per_switch: int = 500,
|
||
|
|
max_trials: Optional[int] = None,
|
||
|
|
resume: bool = True
|
||
|
|
) -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Run full envelope mapping.
|
||
|
|
|
||
|
|
Parameters
|
||
|
|
----------
|
||
|
|
n_samples_per_switch : int
|
||
|
|
Samples per switch vector
|
||
|
|
max_trials : int, optional
|
||
|
|
Maximum total trials
|
||
|
|
resume : bool
|
||
|
|
Resume from existing results
|
||
|
|
|
||
|
|
Returns
|
||
|
|
-------
|
||
|
|
Dict[str, Any]
|
||
|
|
Run statistics
|
||
|
|
"""
|
||
|
|
start_time = time.time()
|
||
|
|
|
||
|
|
# Generate and validate
|
||
|
|
valid_configs = self.generate_and_validate(
|
||
|
|
n_samples_per_switch=n_samples_per_switch,
|
||
|
|
max_trials=max_trials
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check for resume
|
||
|
|
if resume:
|
||
|
|
self._load_completed_trials()
|
||
|
|
valid_configs = [c for c in valid_configs if c.trial_id not in self.completed_trials]
|
||
|
|
print(f"\n[Resume] {len(self.completed_trials)} trials already completed")
|
||
|
|
print(f"[Resume] {len(valid_configs)} trials remaining")
|
||
|
|
|
||
|
|
if not valid_configs:
|
||
|
|
print("\n[OK] All trials already completed!")
|
||
|
|
return self._get_run_stats(start_time)
|
||
|
|
|
||
|
|
# Execute trials
|
||
|
|
print("\n" + "="*70)
|
||
|
|
print("PHASE 2: EXECUTE TRIALS")
|
||
|
|
print("="*70)
|
||
|
|
print(f"\nRunning {len(valid_configs)} trials with {self.n_workers} workers...")
|
||
|
|
|
||
|
|
# Split into batches
|
||
|
|
batches = self._split_into_batches(valid_configs)
|
||
|
|
print(f"Split into {len(batches)} batches (batch_size={self.batch_size})")
|
||
|
|
|
||
|
|
# Process batches
|
||
|
|
total_completed = 0
|
||
|
|
for batch_idx, batch_configs in enumerate(batches):
|
||
|
|
print(f"\n--- Batch {batch_idx+1}/{len(batches)} ({len(batch_configs)} trials) ---")
|
||
|
|
|
||
|
|
batch_start = time.time()
|
||
|
|
|
||
|
|
if self.n_workers > 1 and len(batch_configs) > 1:
|
||
|
|
# Parallel execution
|
||
|
|
results = self._execute_parallel(batch_configs)
|
||
|
|
else:
|
||
|
|
# Sequential execution
|
||
|
|
results = self._execute_sequential(batch_configs)
|
||
|
|
|
||
|
|
# Save results
|
||
|
|
self.store.save_trial_results(results, batch_id=batch_idx+1)
|
||
|
|
|
||
|
|
batch_time = time.time() - batch_start
|
||
|
|
total_completed += len(results)
|
||
|
|
|
||
|
|
print(f"Batch {batch_idx+1} complete in {batch_time:.1f}s "
|
||
|
|
f"({len(results)/batch_time:.1f} trials/sec)")
|
||
|
|
|
||
|
|
# Progress
|
||
|
|
progress = total_completed / len(valid_configs)
|
||
|
|
eta_seconds = (time.time() - start_time) / progress * (1 - progress) if progress > 0 else 0
|
||
|
|
print(f"Overall: {total_completed}/{len(valid_configs)} ({progress*100:.1f}%) "
|
||
|
|
f"ETA: {eta_seconds/60:.1f} min")
|
||
|
|
|
||
|
|
return self._get_run_stats(start_time)
|
||
|
|
|
||
|
|
def _split_into_batches(
|
||
|
|
self,
|
||
|
|
configs: List[MCTrialConfig]
|
||
|
|
) -> List[List[MCTrialConfig]]:
|
||
|
|
"""Split configurations into batches."""
|
||
|
|
batches = []
|
||
|
|
for i in range(0, len(configs), self.batch_size):
|
||
|
|
batches.append(configs[i:i+self.batch_size])
|
||
|
|
return batches
|
||
|
|
|
||
|
|
def _execute_sequential(
|
||
|
|
self,
|
||
|
|
configs: List[MCTrialConfig]
|
||
|
|
) -> List[MCTrialResult]:
|
||
|
|
"""Execute trials sequentially."""
|
||
|
|
executor = MCExecutor(verbose=self.verbose)
|
||
|
|
return executor.execute_batch(configs, progress_interval=max(1, len(configs)//10))
|
||
|
|
|
||
|
|
def _execute_parallel(
|
||
|
|
self,
|
||
|
|
configs: List[MCTrialConfig]
|
||
|
|
) -> List[MCTrialResult]:
|
||
|
|
"""Execute trials in parallel using multiprocessing."""
|
||
|
|
# Create worker function
|
||
|
|
worker = partial(_execute_trial_worker, initial_capital=25000.0)
|
||
|
|
|
||
|
|
# Run in pool
|
||
|
|
with mp.Pool(processes=self.n_workers) as pool:
|
||
|
|
results = pool.map(worker, configs)
|
||
|
|
|
||
|
|
return results
|
||
|
|
|
||
|
|
def _load_completed_trials(self):
|
||
|
|
"""Load IDs of already completed trials from index."""
|
||
|
|
entries = self.store.query_index(status='completed', limit=1000000)
|
||
|
|
self.completed_trials = {e['trial_id'] for e in entries}
|
||
|
|
|
||
|
|
def _get_run_stats(self, start_time: float) -> Dict[str, Any]:
|
||
|
|
"""Get final run statistics."""
|
||
|
|
total_time = time.time() - start_time
|
||
|
|
corpus_stats = self.store.get_corpus_stats()
|
||
|
|
|
||
|
|
stats = {
|
||
|
|
'total_time_sec': total_time,
|
||
|
|
'total_time_min': total_time / 60,
|
||
|
|
'total_time_hours': total_time / 3600,
|
||
|
|
**corpus_stats,
|
||
|
|
}
|
||
|
|
|
||
|
|
print("\n" + "="*70)
|
||
|
|
print("ENVELOPE MAPPING COMPLETE")
|
||
|
|
print("="*70)
|
||
|
|
print(f"\nTotal time: {total_time/3600:.2f} hours")
|
||
|
|
print(f"Total trials: {stats['total_trials']}")
|
||
|
|
print(f"Champion region: {stats['champion_count']}")
|
||
|
|
print(f"Catastrophic: {stats['catastrophic_count']}")
|
||
|
|
print(f"Avg ROI: {stats['avg_roi_pct']:.2f}%")
|
||
|
|
print(f"Avg Sharpe: {stats['avg_sharpe']:.2f}")
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
def generate_report(self, output_path: Optional[str] = None):
|
||
|
|
"""Generate a summary report."""
|
||
|
|
stats = self.store.get_corpus_stats()
|
||
|
|
|
||
|
|
report = f"""
|
||
|
|
# Monte Carlo Envelope Mapping Report
|
||
|
|
|
||
|
|
Generated: {datetime.now().isoformat()}
|
||
|
|
|
||
|
|
## Corpus Statistics
|
||
|
|
|
||
|
|
- Total trials: {stats['total_trials']}
|
||
|
|
- Champion region: {stats['champion_count']} ({stats['champion_count']/max(1,stats['total_trials'])*100:.1f}%)
|
||
|
|
- Catastrophic: {stats['catastrophic_count']} ({stats['catastrophic_count']/max(1,stats['total_trials'])*100:.1f}%)
|
||
|
|
|
||
|
|
## Performance Metrics
|
||
|
|
|
||
|
|
- Average ROI: {stats['avg_roi_pct']:.2f}%
|
||
|
|
- Min ROI: {stats['min_roi_pct']:.2f}%
|
||
|
|
- Max ROI: {stats['max_roi_pct']:.2f}%
|
||
|
|
- Average Sharpe: {stats['avg_sharpe']:.2f}
|
||
|
|
- Average Max DD: {stats['avg_max_dd_pct']:.2f}%
|
||
|
|
|
||
|
|
## Validation Summary
|
||
|
|
|
||
|
|
"""
|
||
|
|
if 'validation' in self.stats:
|
||
|
|
vstats = self.stats['validation']
|
||
|
|
report += f"""
|
||
|
|
- Total configs: {vstats['total']}
|
||
|
|
- Valid configs: {vstats['valid']} ({vstats['validity_rate']*100:.1f}%)
|
||
|
|
- Rejected V1 (range): {vstats.get('rejected_v1', 0)}
|
||
|
|
- Rejected V2 (constraints): {vstats.get('rejected_v2', 0)}
|
||
|
|
- Rejected V3 (cross-group): {vstats.get('rejected_v3', 0)}
|
||
|
|
- Rejected V4 (degenerate): {vstats.get('rejected_v4', 0)}
|
||
|
|
"""
|
||
|
|
|
||
|
|
if output_path:
|
||
|
|
with open(output_path, 'w') as f:
|
||
|
|
f.write(report)
|
||
|
|
print(f"\n[OK] Report saved: {output_path}")
|
||
|
|
|
||
|
|
return report
|
||
|
|
|
||
|
|
|
||
|
|
def _execute_trial_worker(
|
||
|
|
config: MCTrialConfig,
|
||
|
|
initial_capital: float = 25000.0
|
||
|
|
) -> MCTrialResult:
|
||
|
|
"""
|
||
|
|
Worker function for parallel execution.
|
||
|
|
|
||
|
|
Must be at module level for pickle serialization.
|
||
|
|
"""
|
||
|
|
executor = MCExecutor(initial_capital=initial_capital, verbose=False)
|
||
|
|
return executor.execute_trial(config, skip_validation=True)
|
||
|
|
|
||
|
|
|
||
|
|
def run_mc_envelope(
|
||
|
|
n_samples_per_switch: int = 100, # Reduced default for testing
|
||
|
|
max_trials: Optional[int] = None,
|
||
|
|
n_workers: int = -1,
|
||
|
|
output_dir: str = "mc_results",
|
||
|
|
resume: bool = True,
|
||
|
|
base_seed: int = 42
|
||
|
|
) -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Convenience function to run full MC envelope mapping.
|
||
|
|
|
||
|
|
Parameters
|
||
|
|
----------
|
||
|
|
n_samples_per_switch : int
|
||
|
|
Samples per switch vector
|
||
|
|
max_trials : int, optional
|
||
|
|
Maximum total trials
|
||
|
|
n_workers : int
|
||
|
|
Number of parallel workers (-1 for auto)
|
||
|
|
output_dir : str
|
||
|
|
Output directory
|
||
|
|
resume : bool
|
||
|
|
Resume from existing results
|
||
|
|
base_seed : int
|
||
|
|
Master RNG seed
|
||
|
|
|
||
|
|
Returns
|
||
|
|
-------
|
||
|
|
Dict[str, Any]
|
||
|
|
Run statistics
|
||
|
|
"""
|
||
|
|
runner = MCRunner(
|
||
|
|
output_dir=output_dir,
|
||
|
|
n_workers=n_workers,
|
||
|
|
base_seed=base_seed
|
||
|
|
)
|
||
|
|
|
||
|
|
stats = runner.run_envelope_mapping(
|
||
|
|
n_samples_per_switch=n_samples_per_switch,
|
||
|
|
max_trials=max_trials,
|
||
|
|
resume=resume
|
||
|
|
)
|
||
|
|
|
||
|
|
# Generate report
|
||
|
|
runner.generate_report(output_path=f"{output_dir}/envelope_report.md")
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
# Test run
|
||
|
|
stats = run_mc_envelope(
|
||
|
|
n_samples_per_switch=10,
|
||
|
|
max_trials=100,
|
||
|
|
n_workers=1,
|
||
|
|
output_dir="mc_results_test"
|
||
|
|
)
|
||
|
|
print("\nTest complete!")
|