""" Monte Carlo Parameter Sampler ============================= Parameter space definition and Latin Hypercube Sampling (LHS) implementation. This module defines the complete 33-parameter space across 7 sub-systems and implements the two-phase sampling strategy: 1. Phase A: Switch grid (boolean combinations) 2. Phase B: LHS continuous sampling per switch-vector Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 2, 3 """ import numpy as np from typing import Dict, List, Optional, Tuple, NamedTuple, Any, Union from dataclasses import dataclass, field from enum import Enum import json from pathlib import Path # Try to import scipy for LHS try: from scipy.stats import qmc SCIPY_AVAILABLE = True except ImportError: SCIPY_AVAILABLE = False class ParamType(Enum): """Parameter sampling types.""" CONTINUOUS = "continuous" DISCRETE = "discrete" CATEGORICAL = "categorical" BOOLEAN = "boolean" DERIVED = "derived" FIXED = "fixed" @dataclass class ParameterDef: """Definition of a single parameter.""" id: str name: str champion: Any param_type: ParamType lo: Optional[float] = None hi: Optional[float] = None log_transform: bool = False constraint_group: Optional[str] = None depends_on: Optional[str] = None # For conditional parameters categories: Optional[List[str]] = None # For CATEGORICAL def __post_init__(self): if self.param_type == ParamType.CATEGORICAL and self.categories is None: raise ValueError(f"Categorical parameter {self.name} must have categories") class MCTrialConfig(NamedTuple): """Complete parameter vector for a Monte Carlo trial.""" trial_id: int # P1 Signal vel_div_threshold: float vel_div_extreme: float use_direction_confirm: bool dc_lookback_bars: int dc_min_magnitude_bps: float dc_skip_contradicts: bool dc_leverage_boost: float dc_leverage_reduce: float vd_trend_lookback: int # P2 Leverage min_leverage: float max_leverage: float leverage_convexity: float fraction: float use_alpha_layers: bool use_dynamic_leverage: bool # P3 Exit fixed_tp_pct: float stop_pct: float max_hold_bars: int # P4 Fees use_sp_fees: bool use_sp_slippage: bool sp_maker_entry_rate: float sp_maker_exit_rate: float # P5 OB use_ob_edge: bool ob_edge_bps: float ob_confirm_rate: float ob_imbalance_bias: float ob_depth_scale: float # P6 Asset Selection use_asset_selection: bool min_irp_alignment: float lookback: int # P7 ACB acb_beta_high: float acb_beta_low: float acb_w750_threshold_pct: int def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" return { 'trial_id': self.trial_id, 'vel_div_threshold': self.vel_div_threshold, 'vel_div_extreme': self.vel_div_extreme, 'use_direction_confirm': self.use_direction_confirm, 'dc_lookback_bars': self.dc_lookback_bars, 'dc_min_magnitude_bps': self.dc_min_magnitude_bps, 'dc_skip_contradicts': self.dc_skip_contradicts, 'dc_leverage_boost': self.dc_leverage_boost, 'dc_leverage_reduce': self.dc_leverage_reduce, 'vd_trend_lookback': self.vd_trend_lookback, 'min_leverage': self.min_leverage, 'max_leverage': self.max_leverage, 'leverage_convexity': self.leverage_convexity, 'fraction': self.fraction, 'use_alpha_layers': self.use_alpha_layers, 'use_dynamic_leverage': self.use_dynamic_leverage, 'fixed_tp_pct': self.fixed_tp_pct, 'stop_pct': self.stop_pct, 'max_hold_bars': self.max_hold_bars, 'use_sp_fees': self.use_sp_fees, 'use_sp_slippage': self.use_sp_slippage, 'sp_maker_entry_rate': self.sp_maker_entry_rate, 'sp_maker_exit_rate': self.sp_maker_exit_rate, 'use_ob_edge': self.use_ob_edge, 'ob_edge_bps': self.ob_edge_bps, 'ob_confirm_rate': self.ob_confirm_rate, 'ob_imbalance_bias': self.ob_imbalance_bias, 'ob_depth_scale': self.ob_depth_scale, 'use_asset_selection': self.use_asset_selection, 'min_irp_alignment': self.min_irp_alignment, 'lookback': self.lookback, 'acb_beta_high': self.acb_beta_high, 'acb_beta_low': self.acb_beta_low, 'acb_w750_threshold_pct': self.acb_w750_threshold_pct, } @classmethod def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialConfig': """Create from dictionary.""" # Filter to only valid fields valid_fields = cls._fields filtered = {k: v for k, v in d.items() if k in valid_fields} return cls(**filtered) class MCSampler: """ Monte Carlo Parameter Sampler. Implements two-phase sampling: 1. Phase A: Enumerate all boolean switch combinations 2. Phase B: LHS continuous sampling per switch-vector """ # Champion configuration (baseline) CHAMPION = { 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50, 'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60, } # Parameter definitions PARAMS = { # P1 Signal Generator 'vel_div_threshold': ParameterDef('P1.01', 'vel_div_threshold', -0.020, ParamType.CONTINUOUS, -0.040, -0.008, False, 'CG-VD'), 'vel_div_extreme': ParameterDef('P1.02', 'vel_div_extreme', -0.050, ParamType.CONTINUOUS, -0.120, None, False, 'CG-VD'), # hi depends on threshold 'use_direction_confirm': ParameterDef('P1.03', 'use_direction_confirm', True, ParamType.BOOLEAN, constraint_group='CG-DC'), 'dc_lookback_bars': ParameterDef('P1.04', 'dc_lookback_bars', 7, ParamType.DISCRETE, 3, 25, False, 'CG-DC'), 'dc_min_magnitude_bps': ParameterDef('P1.05', 'dc_min_magnitude_bps', 0.75, ParamType.CONTINUOUS, 0.20, 3.00, False, 'CG-DC'), 'dc_skip_contradicts': ParameterDef('P1.06', 'dc_skip_contradicts', True, ParamType.BOOLEAN, constraint_group='CG-DC'), 'dc_leverage_boost': ParameterDef('P1.07', 'dc_leverage_boost', 1.00, ParamType.CONTINUOUS, 1.00, 1.50, False, 'CG-DC-LEV'), 'dc_leverage_reduce': ParameterDef('P1.08', 'dc_leverage_reduce', 0.50, ParamType.CONTINUOUS, 0.25, 0.90, False, 'CG-DC-LEV'), 'vd_trend_lookback': ParameterDef('P1.09', 'vd_trend_lookback', 10, ParamType.DISCRETE, 5, 30, False), # P2 Leverage 'min_leverage': ParameterDef('P2.01', 'min_leverage', 0.50, ParamType.CONTINUOUS, 0.10, 1.50, False, 'CG-LEV'), 'max_leverage': ParameterDef('P2.02', 'max_leverage', 5.00, ParamType.CONTINUOUS, 1.50, 12.00, False, 'CG-LEV'), 'leverage_convexity': ParameterDef('P2.03', 'leverage_convexity', 3.00, ParamType.CONTINUOUS, 0.75, 6.00, False), 'fraction': ParameterDef('P2.04', 'fraction', 0.20, ParamType.CONTINUOUS, 0.05, 0.40, False, 'CG-RISK'), 'use_alpha_layers': ParameterDef('P2.05', 'use_alpha_layers', True, ParamType.BOOLEAN), 'use_dynamic_leverage': ParameterDef('P2.06', 'use_dynamic_leverage', True, ParamType.BOOLEAN, constraint_group='CG-DYNLEV'), # P3 Exit 'fixed_tp_pct': ParameterDef('P3.01', 'fixed_tp_pct', 0.0099, ParamType.CONTINUOUS, 0.0030, 0.0300, True, 'CG-EXIT'), 'stop_pct': ParameterDef('P3.02', 'stop_pct', 1.00, ParamType.CONTINUOUS, 0.20, 5.00, True, 'CG-EXIT'), 'max_hold_bars': ParameterDef('P3.03', 'max_hold_bars', 120, ParamType.DISCRETE, 20, 600, False, 'CG-EXIT'), # P4 Fees 'use_sp_fees': ParameterDef('P4.01', 'use_sp_fees', True, ParamType.BOOLEAN), 'use_sp_slippage': ParameterDef('P4.02', 'use_sp_slippage', True, ParamType.BOOLEAN, constraint_group='CG-SP'), 'sp_maker_entry_rate': ParameterDef('P4.03', 'sp_maker_entry_rate', 0.62, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'), 'sp_maker_exit_rate': ParameterDef('P4.04', 'sp_maker_exit_rate', 0.50, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'), # P5 OB Intelligence 'use_ob_edge': ParameterDef('P5.01', 'use_ob_edge', True, ParamType.BOOLEAN, constraint_group='CG-OB'), 'ob_edge_bps': ParameterDef('P5.02', 'ob_edge_bps', 5.00, ParamType.CONTINUOUS, 1.00, 20.00, True, 'CG-OB'), 'ob_confirm_rate': ParameterDef('P5.03', 'ob_confirm_rate', 0.40, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-OB'), 'ob_imbalance_bias': ParameterDef('P5.04', 'ob_imbalance_bias', -0.09, ParamType.CONTINUOUS, -0.25, 0.15, False, 'CG-OB-SIG'), 'ob_depth_scale': ParameterDef('P5.05', 'ob_depth_scale', 1.00, ParamType.CONTINUOUS, 0.30, 2.00, True, 'CG-OB-SIG'), # P6 Asset Selection 'use_asset_selection': ParameterDef('P6.01', 'use_asset_selection', True, ParamType.BOOLEAN, constraint_group='CG-IRP'), 'min_irp_alignment': ParameterDef('P6.02', 'min_irp_alignment', 0.45, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-IRP'), 'lookback': ParameterDef('P6.03', 'lookback', 100, ParamType.DISCRETE, 30, 300, False, 'CG-IRP'), # P7 ACB 'acb_beta_high': ParameterDef('P7.01', 'acb_beta_high', 0.80, ParamType.CONTINUOUS, 0.40, 1.50, False, 'CG-ACB'), 'acb_beta_low': ParameterDef('P7.02', 'acb_beta_low', 0.20, ParamType.CONTINUOUS, 0.00, 0.60, False, 'CG-ACB'), 'acb_w750_threshold_pct': ParameterDef('P7.03', 'acb_w750_threshold_pct', 60, ParamType.DISCRETE, 20, 80, False), } # Boolean parameters for switch grid BOOLEAN_PARAMS = [ 'use_direction_confirm', 'dc_skip_contradicts', 'use_alpha_layers', 'use_dynamic_leverage', 'use_sp_fees', 'use_sp_slippage', 'use_ob_edge', 'use_asset_selection', ] # Parameters that become FIXED when their parent switch is False CONDITIONAL_PARAMS = { 'use_direction_confirm': ['dc_lookback_bars', 'dc_min_magnitude_bps', 'dc_skip_contradicts', 'dc_leverage_boost', 'dc_leverage_reduce'], 'use_sp_slippage': ['sp_maker_entry_rate', 'sp_maker_exit_rate'], 'use_ob_edge': ['ob_edge_bps', 'ob_confirm_rate'], 'use_asset_selection': ['min_irp_alignment', 'lookback'], } def __init__(self, base_seed: int = 42): """ Initialize the sampler. Parameters ---------- base_seed : int Master RNG seed for reproducibility """ self.base_seed = base_seed self.rng = np.random.RandomState(base_seed) def generate_switch_vectors(self) -> List[Dict[str, Any]]: """ Phase A: Generate all unique boolean switch combinations. After canonicalisation (collapsing equivalent configs), returns approximately 64-96 unique switch vectors. Returns ------- List[Dict[str, Any]] List of switch vectors (boolean parameter assignments) """ n_bool = len(self.BOOLEAN_PARAMS) n_combinations = 2 ** n_bool switch_vectors = [] seen_canonical = set() for i in range(n_combinations): # Decode integer to boolean switches switches = {} for j, param_name in enumerate(self.BOOLEAN_PARAMS): switches[param_name] = bool((i >> j) & 1) # Create canonical form (conditional params fixed to champion when parent is False) canonical = self._canonicalize_switch_vector(switches) canonical_key = tuple(sorted((k, v) for k, v in canonical.items() if isinstance(v, bool))) if canonical_key not in seen_canonical: seen_canonical.add(canonical_key) switch_vectors.append(canonical) return switch_vectors def _canonicalize_switch_vector(self, switches: Dict[str, bool]) -> Dict[str, Any]: """ Convert a raw switch vector to canonical form. When a parent switch is False, its conditional parameters are set to FIXED champion values. """ canonical = dict(switches) for parent, children in self.CONDITIONAL_PARAMS.items(): if not switches.get(parent, False): # Parent is disabled - fix children to champion for child in children: canonical[child] = self.CHAMPION[child] return canonical def get_free_continuous_params(self, switch_vector: Dict[str, Any]) -> List[str]: """ Get list of continuous/discrete parameters that are NOT fixed by the switch vector. """ free_params = [] for name, pdef in self.PARAMS.items(): if pdef.param_type in (ParamType.CONTINUOUS, ParamType.DISCRETE): # Check if this param is fixed by any switch is_fixed = False for parent, children in self.CONDITIONAL_PARAMS.items(): if name in children and not switch_vector.get(parent, True): is_fixed = True break if not is_fixed: free_params.append(name) return free_params def sample_continuous_params( self, switch_vector: Dict[str, Any], n_samples: int, seed: int ) -> List[Dict[str, Any]]: """ Phase B: Generate n LHS samples for continuous/discrete parameters. Parameters ---------- switch_vector : dict Fixed boolean parameters n_samples : int Number of samples to generate seed : int RNG seed for this batch Returns ------- List[Dict[str, Any]] List of complete parameter dicts (switch + continuous) """ free_params = self.get_free_continuous_params(switch_vector) n_free = len(free_params) if n_free == 0: # No free parameters - just return the switch vector return [dict(switch_vector)] # Generate LHS samples in unit hypercube if SCIPY_AVAILABLE: sampler = qmc.LatinHypercube(d=n_free, seed=seed) unit_samples = sampler.random(n=n_samples) else: # Fallback: random sampling with warning print(f"[WARN] scipy not available, using random sampling instead of LHS") rng = np.random.RandomState(seed) unit_samples = rng.rand(n_samples, n_free) # Scale to parameter ranges samples = [] for i in range(n_samples): sample = dict(switch_vector) for j, param_name in enumerate(free_params): pdef = self.PARAMS[param_name] u = unit_samples[i, j] # Handle dependent bounds lo = pdef.lo hi = pdef.hi if hi is None: # Compute dependent bound if param_name == 'vel_div_extreme': hi = sample['vel_div_threshold'] * 1.5 if pdef.param_type == ParamType.CONTINUOUS: if pdef.log_transform: # Log-space sampling: value = lo * (hi/lo) ** u value = lo * (hi / lo) ** u else: # Linear sampling value = lo + u * (hi - lo) elif pdef.param_type == ParamType.DISCRETE: # Discrete sampling value = int(round(lo + u * (hi - lo))) value = max(int(lo), min(int(hi), value)) else: value = pdef.champion sample[param_name] = value samples.append(sample) return samples def generate_trials( self, n_samples_per_switch: int = 500, max_trials: Optional[int] = None ) -> List[MCTrialConfig]: """ Generate all MC trial configurations. Parameters ---------- n_samples_per_switch : int Samples per unique switch vector max_trials : int, optional Maximum total trials (for testing) Returns ------- List[MCTrialConfig] All trial configurations """ switch_vectors = self.generate_switch_vectors() print(f"[INFO] Generated {len(switch_vectors)} unique switch vectors") trials = [] trial_id = 0 for switch_idx, switch_vector in enumerate(switch_vectors): # Generate seed for this switch vector switch_seed = (self.base_seed * 1000003 + switch_idx) % 2**31 # Generate continuous samples samples = self.sample_continuous_params( switch_vector, n_samples_per_switch, switch_seed ) for sample in samples: if max_trials and trial_id >= max_trials: break # Fill in any missing parameters with champion values full_params = dict(self.CHAMPION) full_params.update(sample) full_params['trial_id'] = trial_id # Create trial config try: config = MCTrialConfig(**full_params) trials.append(config) trial_id += 1 except Exception as e: print(f"[WARN] Failed to create trial {trial_id}: {e}") if max_trials and trial_id >= max_trials: break print(f"[INFO] Generated {len(trials)} total trial configurations") return trials def generate_champion_trial(self) -> MCTrialConfig: """Generate the champion configuration as a single trial.""" params = dict(self.CHAMPION) params['trial_id'] = -1 # Special ID for champion return MCTrialConfig(**params) def save_trials(self, trials: List[MCTrialConfig], path: Union[str, Path]): """Save trials to JSON.""" path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) data = [t.to_dict() for t in trials] with open(path, 'w') as f: json.dump(data, f, indent=2) print(f"[OK] Saved {len(trials)} trials to {path}") def load_trials(self, path: Union[str, Path]) -> List[MCTrialConfig]: """Load trials from JSON.""" with open(path, 'r') as f: data = json.load(f) trials = [MCTrialConfig.from_dict(d) for d in data] print(f"[OK] Loaded {len(trials)} trials from {path}") return trials def test_sampler(): """Quick test of the sampler.""" sampler = MCSampler(base_seed=42) # Test switch vector generation switches = sampler.generate_switch_vectors() print(f"Unique switch vectors: {len(switches)}") # Test trial generation (small) trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100) print(f"Generated trials: {len(trials)}") # Check parameter ranges for trial in trials[:5]: print(f"Trial {trial.trial_id}: vel_div_threshold={trial.vel_div_threshold:.4f}, " f"max_leverage={trial.max_leverage:.2f}, use_direction_confirm={trial.use_direction_confirm}") return trials if __name__ == "__main__": test_sampler()