""" Monte Carlo Runner ================== Orchestration and parallel execution for MC envelope mapping. Features: - Parallel execution using multiprocessing - Checkpointing and resume capability - Batch processing - Progress tracking Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 1, 5.4 """ import time import json from typing import Dict, List, Optional, Any, Callable from pathlib import Path from datetime import datetime import multiprocessing as mp from functools import partial from .mc_sampler import MCSampler, MCTrialConfig from .mc_validator import MCValidator, ValidationResult from .mc_executor import MCExecutor from .mc_store import MCStore from .mc_metrics import MCTrialResult class MCRunner: """ Monte Carlo Runner. Orchestrates the full MC envelope mapping pipeline: 1. Generate trial configurations 2. Validate configurations 3. Execute trials (parallel) 4. Store results """ def __init__( self, output_dir: str = "mc_results", n_workers: int = -1, batch_size: int = 1000, base_seed: int = 42, verbose: bool = True ): """ Initialize the runner. Parameters ---------- output_dir : str Directory for results n_workers : int Number of parallel workers (-1 for auto) batch_size : int Trials per batch base_seed : int Master RNG seed verbose : bool Print progress """ self.output_dir = Path(output_dir) self.n_workers = n_workers if n_workers > 0 else max(1, mp.cpu_count() - 1) self.batch_size = batch_size self.base_seed = base_seed self.verbose = verbose # Components self.sampler = MCSampler(base_seed=base_seed) self.store = MCStore(output_dir=output_dir, batch_size=batch_size) # State self.completed_trials: set = set() self.stats: Dict[str, Any] = {} def generate_and_validate( self, n_samples_per_switch: int = 500, max_trials: Optional[int] = None ) -> List[MCTrialConfig]: """ Generate and validate trial configurations. Parameters ---------- n_samples_per_switch : int Samples per switch vector max_trials : int, optional Maximum total trials Returns ------- List[MCTrialConfig] Valid trial configurations """ print("="*70) print("PHASE 1: GENERATE & VALIDATE CONFIGURATIONS") print("="*70) # Generate trials print(f"\n[1/3] Generating trials (n_samples_per_switch={n_samples_per_switch})...") all_configs = self.sampler.generate_trials( n_samples_per_switch=n_samples_per_switch, max_trials=max_trials ) # Validate print(f"\n[2/3] Validating {len(all_configs)} configurations...") validator = MCValidator(verbose=False) validation_results = validator.validate_batch(all_configs) # Filter valid configs valid_configs = [ config for config, result in zip(all_configs, validation_results) if result.is_valid() ] # Save validation results self.store.save_validation_results(validation_results, batch_id=0) # Stats stats = validator.get_validity_stats(validation_results) print(f"\n[3/3] Validation complete:") print(f" Total: {stats['total']}") print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)") print(f" Rejected: {stats['total'] - stats['valid']}") self.stats['validation'] = stats return valid_configs def run_envelope_mapping( self, n_samples_per_switch: int = 500, max_trials: Optional[int] = None, resume: bool = True ) -> Dict[str, Any]: """ Run full envelope mapping. Parameters ---------- n_samples_per_switch : int Samples per switch vector max_trials : int, optional Maximum total trials resume : bool Resume from existing results Returns ------- Dict[str, Any] Run statistics """ start_time = time.time() # Generate and validate valid_configs = self.generate_and_validate( n_samples_per_switch=n_samples_per_switch, max_trials=max_trials ) # Check for resume if resume: self._load_completed_trials() valid_configs = [c for c in valid_configs if c.trial_id not in self.completed_trials] print(f"\n[Resume] {len(self.completed_trials)} trials already completed") print(f"[Resume] {len(valid_configs)} trials remaining") if not valid_configs: print("\n[OK] All trials already completed!") return self._get_run_stats(start_time) # Execute trials print("\n" + "="*70) print("PHASE 2: EXECUTE TRIALS") print("="*70) print(f"\nRunning {len(valid_configs)} trials with {self.n_workers} workers...") # Split into batches batches = self._split_into_batches(valid_configs) print(f"Split into {len(batches)} batches (batch_size={self.batch_size})") # Process batches total_completed = 0 for batch_idx, batch_configs in enumerate(batches): print(f"\n--- Batch {batch_idx+1}/{len(batches)} ({len(batch_configs)} trials) ---") batch_start = time.time() if self.n_workers > 1 and len(batch_configs) > 1: # Parallel execution results = self._execute_parallel(batch_configs) else: # Sequential execution results = self._execute_sequential(batch_configs) # Save results self.store.save_trial_results(results, batch_id=batch_idx+1) batch_time = time.time() - batch_start total_completed += len(results) print(f"Batch {batch_idx+1} complete in {batch_time:.1f}s " f"({len(results)/batch_time:.1f} trials/sec)") # Progress progress = total_completed / len(valid_configs) eta_seconds = (time.time() - start_time) / progress * (1 - progress) if progress > 0 else 0 print(f"Overall: {total_completed}/{len(valid_configs)} ({progress*100:.1f}%) " f"ETA: {eta_seconds/60:.1f} min") return self._get_run_stats(start_time) def _split_into_batches( self, configs: List[MCTrialConfig] ) -> List[List[MCTrialConfig]]: """Split configurations into batches.""" batches = [] for i in range(0, len(configs), self.batch_size): batches.append(configs[i:i+self.batch_size]) return batches def _execute_sequential( self, configs: List[MCTrialConfig] ) -> List[MCTrialResult]: """Execute trials sequentially.""" executor = MCExecutor(verbose=self.verbose) return executor.execute_batch(configs, progress_interval=max(1, len(configs)//10)) def _execute_parallel( self, configs: List[MCTrialConfig] ) -> List[MCTrialResult]: """Execute trials in parallel using multiprocessing.""" # Create worker function worker = partial(_execute_trial_worker, initial_capital=25000.0) # Run in pool with mp.Pool(processes=self.n_workers) as pool: results = pool.map(worker, configs) return results def _load_completed_trials(self): """Load IDs of already completed trials from index.""" entries = self.store.query_index(status='completed', limit=1000000) self.completed_trials = {e['trial_id'] for e in entries} def _get_run_stats(self, start_time: float) -> Dict[str, Any]: """Get final run statistics.""" total_time = time.time() - start_time corpus_stats = self.store.get_corpus_stats() stats = { 'total_time_sec': total_time, 'total_time_min': total_time / 60, 'total_time_hours': total_time / 3600, **corpus_stats, } print("\n" + "="*70) print("ENVELOPE MAPPING COMPLETE") print("="*70) print(f"\nTotal time: {total_time/3600:.2f} hours") print(f"Total trials: {stats['total_trials']}") print(f"Champion region: {stats['champion_count']}") print(f"Catastrophic: {stats['catastrophic_count']}") print(f"Avg ROI: {stats['avg_roi_pct']:.2f}%") print(f"Avg Sharpe: {stats['avg_sharpe']:.2f}") return stats def generate_report(self, output_path: Optional[str] = None): """Generate a summary report.""" stats = self.store.get_corpus_stats() report = f""" # Monte Carlo Envelope Mapping Report Generated: {datetime.now().isoformat()} ## Corpus Statistics - Total trials: {stats['total_trials']} - Champion region: {stats['champion_count']} ({stats['champion_count']/max(1,stats['total_trials'])*100:.1f}%) - Catastrophic: {stats['catastrophic_count']} ({stats['catastrophic_count']/max(1,stats['total_trials'])*100:.1f}%) ## Performance Metrics - Average ROI: {stats['avg_roi_pct']:.2f}% - Min ROI: {stats['min_roi_pct']:.2f}% - Max ROI: {stats['max_roi_pct']:.2f}% - Average Sharpe: {stats['avg_sharpe']:.2f} - Average Max DD: {stats['avg_max_dd_pct']:.2f}% ## Validation Summary """ if 'validation' in self.stats: vstats = self.stats['validation'] report += f""" - Total configs: {vstats['total']} - Valid configs: {vstats['valid']} ({vstats['validity_rate']*100:.1f}%) - Rejected V1 (range): {vstats.get('rejected_v1', 0)} - Rejected V2 (constraints): {vstats.get('rejected_v2', 0)} - Rejected V3 (cross-group): {vstats.get('rejected_v3', 0)} - Rejected V4 (degenerate): {vstats.get('rejected_v4', 0)} """ if output_path: with open(output_path, 'w') as f: f.write(report) print(f"\n[OK] Report saved: {output_path}") return report def _execute_trial_worker( config: MCTrialConfig, initial_capital: float = 25000.0 ) -> MCTrialResult: """ Worker function for parallel execution. Must be at module level for pickle serialization. """ executor = MCExecutor(initial_capital=initial_capital, verbose=False) return executor.execute_trial(config, skip_validation=True) def run_mc_envelope( n_samples_per_switch: int = 100, # Reduced default for testing max_trials: Optional[int] = None, n_workers: int = -1, output_dir: str = "mc_results", resume: bool = True, base_seed: int = 42 ) -> Dict[str, Any]: """ Convenience function to run full MC envelope mapping. Parameters ---------- n_samples_per_switch : int Samples per switch vector max_trials : int, optional Maximum total trials n_workers : int Number of parallel workers (-1 for auto) output_dir : str Output directory resume : bool Resume from existing results base_seed : int Master RNG seed Returns ------- Dict[str, Any] Run statistics """ runner = MCRunner( output_dir=output_dir, n_workers=n_workers, base_seed=base_seed ) stats = runner.run_envelope_mapping( n_samples_per_switch=n_samples_per_switch, max_trials=max_trials, resume=resume ) # Generate report runner.generate_report(output_path=f"{output_dir}/envelope_report.md") return stats if __name__ == "__main__": # Test run stats = run_mc_envelope( n_samples_per_switch=10, max_trials=100, n_workers=1, output_dir="mc_results_test" ) print("\nTest complete!")