"""
Monte Carlo Runner
==================

Orchestration and parallel execution for MC envelope mapping.

Features:
- Parallel execution using multiprocessing
- Checkpointing and resume capability
- Batch processing
- Progress tracking

Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 1, 5.4
"""

import time
import json
from typing import Dict, List, Optional, Any, Callable
from pathlib import Path
from datetime import datetime
import multiprocessing as mp
from functools import partial

from .mc_sampler import MCSampler, MCTrialConfig
from .mc_validator import MCValidator, ValidationResult
from .mc_executor import MCExecutor
from .mc_store import MCStore
from .mc_metrics import MCTrialResult


class MCRunner:
    """
    Monte Carlo Runner.
    
    Orchestrates the full MC envelope mapping pipeline:
    1. Generate trial configurations
    2. Validate configurations
    3. Execute trials (parallel)
    4. Store results
    """
    
    def __init__(
        self,
        output_dir: str = "mc_results",
        n_workers: int = -1,
        batch_size: int = 1000,
        base_seed: int = 42,
        verbose: bool = True
    ):
        """
        Initialize the runner.
        
        Parameters
        ----------
        output_dir : str
            Directory for results
        n_workers : int
            Number of parallel workers (-1 for auto)
        batch_size : int
            Trials per batch
        base_seed : int
            Master RNG seed
        verbose : bool
            Print progress
        """
        self.output_dir = Path(output_dir)
        self.n_workers = n_workers if n_workers > 0 else max(1, mp.cpu_count() - 1)
        self.batch_size = batch_size
        self.base_seed = base_seed
        self.verbose = verbose
        
        # Components
        self.sampler = MCSampler(base_seed=base_seed)
        self.store = MCStore(output_dir=output_dir, batch_size=batch_size)
        
        # State
        self.completed_trials: set = set()
        self.stats: Dict[str, Any] = {}
        
    def generate_and_validate(
        self,
        n_samples_per_switch: int = 500,
        max_trials: Optional[int] = None
    ) -> List[MCTrialConfig]:
        """
        Generate and validate trial configurations.
        
        Parameters
        ----------
        n_samples_per_switch : int
            Samples per switch vector
        max_trials : int, optional
            Maximum total trials
            
        Returns
        -------
        List[MCTrialConfig]
            Valid trial configurations
        """
        print("="*70)
        print("PHASE 1: GENERATE & VALIDATE CONFIGURATIONS")
        print("="*70)
        
        # Generate trials
        print(f"\n[1/3] Generating trials (n_samples_per_switch={n_samples_per_switch})...")
        all_configs = self.sampler.generate_trials(
            n_samples_per_switch=n_samples_per_switch,
            max_trials=max_trials
        )
        
        # Validate
        print(f"\n[2/3] Validating {len(all_configs)} configurations...")
        validator = MCValidator(verbose=False)
        validation_results = validator.validate_batch(all_configs)
        
        # Filter valid configs
        valid_configs = [
            config for config, result in zip(all_configs, validation_results)
            if result.is_valid()
        ]
        
        # Save validation results
        self.store.save_validation_results(validation_results, batch_id=0)
        
        # Stats
        stats = validator.get_validity_stats(validation_results)
        print(f"\n[3/3] Validation complete:")
        print(f"  Total: {stats['total']}")
        print(f"  Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
        print(f"  Rejected: {stats['total'] - stats['valid']}")
        
        self.stats['validation'] = stats
        
        return valid_configs
    
    def run_envelope_mapping(
        self,
        n_samples_per_switch: int = 500,
        max_trials: Optional[int] = None,
        resume: bool = True
    ) -> Dict[str, Any]:
        """
        Run full envelope mapping.
        
        Parameters
        ----------
        n_samples_per_switch : int
            Samples per switch vector
        max_trials : int, optional
            Maximum total trials
        resume : bool
            Resume from existing results
            
        Returns
        -------
        Dict[str, Any]
            Run statistics
        """
        start_time = time.time()
        
        # Generate and validate
        valid_configs = self.generate_and_validate(
            n_samples_per_switch=n_samples_per_switch,
            max_trials=max_trials
        )
        
        # Check for resume
        if resume:
            self._load_completed_trials()
            valid_configs = [c for c in valid_configs if c.trial_id not in self.completed_trials]
            print(f"\n[Resume] {len(self.completed_trials)} trials already completed")
            print(f"[Resume] {len(valid_configs)} trials remaining")
        
        if not valid_configs:
            print("\n[OK] All trials already completed!")
            return self._get_run_stats(start_time)
        
        # Execute trials
        print("\n" + "="*70)
        print("PHASE 2: EXECUTE TRIALS")
        print("="*70)
        print(f"\nRunning {len(valid_configs)} trials with {self.n_workers} workers...")
        
        # Split into batches
        batches = self._split_into_batches(valid_configs)
        print(f"Split into {len(batches)} batches (batch_size={self.batch_size})")
        
        # Process batches
        total_completed = 0
        for batch_idx, batch_configs in enumerate(batches):
            print(f"\n--- Batch {batch_idx+1}/{len(batches)} ({len(batch_configs)} trials) ---")
            
            batch_start = time.time()
            
            if self.n_workers > 1 and len(batch_configs) > 1:
                # Parallel execution
                results = self._execute_parallel(batch_configs)
            else:
                # Sequential execution
                results = self._execute_sequential(batch_configs)
            
            # Save results
            self.store.save_trial_results(results, batch_id=batch_idx+1)
            
            batch_time = time.time() - batch_start
            total_completed += len(results)
            
            print(f"Batch {batch_idx+1} complete in {batch_time:.1f}s "
                  f"({len(results)/batch_time:.1f} trials/sec)")
            
            # Progress
            progress = total_completed / len(valid_configs)
            eta_seconds = (time.time() - start_time) / progress * (1 - progress) if progress > 0 else 0
            print(f"Overall: {total_completed}/{len(valid_configs)} ({progress*100:.1f}%) "
                  f"ETA: {eta_seconds/60:.1f} min")
        
        return self._get_run_stats(start_time)
    
    def _split_into_batches(
        self,
        configs: List[MCTrialConfig]
    ) -> List[List[MCTrialConfig]]:
        """Split configurations into batches."""
        batches = []
        for i in range(0, len(configs), self.batch_size):
            batches.append(configs[i:i+self.batch_size])
        return batches
    
    def _execute_sequential(
        self,
        configs: List[MCTrialConfig]
    ) -> List[MCTrialResult]:
        """Execute trials sequentially."""
        executor = MCExecutor(verbose=self.verbose)
        return executor.execute_batch(configs, progress_interval=max(1, len(configs)//10))
    
    def _execute_parallel(
        self,
        configs: List[MCTrialConfig]
    ) -> List[MCTrialResult]:
        """Execute trials in parallel using multiprocessing."""
        # Create worker function
        worker = partial(_execute_trial_worker, initial_capital=25000.0)
        
        # Run in pool
        with mp.Pool(processes=self.n_workers) as pool:
            results = pool.map(worker, configs)
        
        return results
    
    def _load_completed_trials(self):
        """Load IDs of already completed trials from index."""
        entries = self.store.query_index(status='completed', limit=1000000)
        self.completed_trials = {e['trial_id'] for e in entries}
    
    def _get_run_stats(self, start_time: float) -> Dict[str, Any]:
        """Get final run statistics."""
        total_time = time.time() - start_time
        corpus_stats = self.store.get_corpus_stats()
        
        stats = {
            'total_time_sec': total_time,
            'total_time_min': total_time / 60,
            'total_time_hours': total_time / 3600,
            **corpus_stats,
        }
        
        print("\n" + "="*70)
        print("ENVELOPE MAPPING COMPLETE")
        print("="*70)
        print(f"\nTotal time: {total_time/3600:.2f} hours")
        print(f"Total trials: {stats['total_trials']}")
        print(f"Champion region: {stats['champion_count']}")
        print(f"Catastrophic: {stats['catastrophic_count']}")
        print(f"Avg ROI: {stats['avg_roi_pct']:.2f}%")
        print(f"Avg Sharpe: {stats['avg_sharpe']:.2f}")
        
        return stats
    
    def generate_report(self, output_path: Optional[str] = None):
        """Generate a summary report."""
        stats = self.store.get_corpus_stats()
        
        report = f"""
# Monte Carlo Envelope Mapping Report

Generated: {datetime.now().isoformat()}

## Corpus Statistics

- Total trials: {stats['total_trials']}
- Champion region: {stats['champion_count']} ({stats['champion_count']/max(1,stats['total_trials'])*100:.1f}%)
- Catastrophic: {stats['catastrophic_count']} ({stats['catastrophic_count']/max(1,stats['total_trials'])*100:.1f}%)

## Performance Metrics

- Average ROI: {stats['avg_roi_pct']:.2f}%
- Min ROI: {stats['min_roi_pct']:.2f}%
- Max ROI: {stats['max_roi_pct']:.2f}%
- Average Sharpe: {stats['avg_sharpe']:.2f}
- Average Max DD: {stats['avg_max_dd_pct']:.2f}%

## Validation Summary

"""
        if 'validation' in self.stats:
            vstats = self.stats['validation']
            report += f"""
- Total configs: {vstats['total']}
- Valid configs: {vstats['valid']} ({vstats['validity_rate']*100:.1f}%)
- Rejected V1 (range): {vstats.get('rejected_v1', 0)}
- Rejected V2 (constraints): {vstats.get('rejected_v2', 0)}
- Rejected V3 (cross-group): {vstats.get('rejected_v3', 0)}
- Rejected V4 (degenerate): {vstats.get('rejected_v4', 0)}
"""
        
        if output_path:
            with open(output_path, 'w') as f:
                f.write(report)
            print(f"\n[OK] Report saved: {output_path}")
        
        return report


def _execute_trial_worker(
    config: MCTrialConfig,
    initial_capital: float = 25000.0
) -> MCTrialResult:
    """
    Worker function for parallel execution.
    
    Must be at module level for pickle serialization.
    """
    executor = MCExecutor(initial_capital=initial_capital, verbose=False)
    return executor.execute_trial(config, skip_validation=True)


def run_mc_envelope(
    n_samples_per_switch: int = 100,  # Reduced default for testing
    max_trials: Optional[int] = None,
    n_workers: int = -1,
    output_dir: str = "mc_results",
    resume: bool = True,
    base_seed: int = 42
) -> Dict[str, Any]:
    """
    Convenience function to run full MC envelope mapping.
    
    Parameters
    ----------
    n_samples_per_switch : int
        Samples per switch vector
    max_trials : int, optional
        Maximum total trials
    n_workers : int
        Number of parallel workers (-1 for auto)
    output_dir : str
        Output directory
    resume : bool
        Resume from existing results
    base_seed : int
        Master RNG seed
        
    Returns
    -------
    Dict[str, Any]
        Run statistics
    """
    runner = MCRunner(
        output_dir=output_dir,
        n_workers=n_workers,
        base_seed=base_seed
    )
    
    stats = runner.run_envelope_mapping(
        n_samples_per_switch=n_samples_per_switch,
        max_trials=max_trials,
        resume=resume
    )
    
    # Generate report
    runner.generate_report(output_path=f"{output_dir}/envelope_report.md")
    
    return stats


if __name__ == "__main__":
    # Test run
    stats = run_mc_envelope(
        n_samples_per_switch=10,
        max_trials=100,
        n_workers=1,
        output_dir="mc_results_test"
    )
    print("\nTest complete!")