Files
DOLPHIN/nautilus_dolphin/mc/mc_sampler.py

535 lines
21 KiB
Python
Raw Normal View History

"""
Monte Carlo Parameter Sampler
=============================
Parameter space definition and Latin Hypercube Sampling (LHS) implementation.
This module defines the complete 33-parameter space across 7 sub-systems
and implements the two-phase sampling strategy:
1. Phase A: Switch grid (boolean combinations)
2. Phase B: LHS continuous sampling per switch-vector
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 2, 3
"""
import numpy as np
from typing import Dict, List, Optional, Tuple, NamedTuple, Any, Union
from dataclasses import dataclass, field
from enum import Enum
import json
from pathlib import Path
# Try to import scipy for LHS
try:
from scipy.stats import qmc
SCIPY_AVAILABLE = True
except ImportError:
SCIPY_AVAILABLE = False
class ParamType(Enum):
"""Parameter sampling types."""
CONTINUOUS = "continuous"
DISCRETE = "discrete"
CATEGORICAL = "categorical"
BOOLEAN = "boolean"
DERIVED = "derived"
FIXED = "fixed"
@dataclass
class ParameterDef:
"""Definition of a single parameter."""
id: str
name: str
champion: Any
param_type: ParamType
lo: Optional[float] = None
hi: Optional[float] = None
log_transform: bool = False
constraint_group: Optional[str] = None
depends_on: Optional[str] = None # For conditional parameters
categories: Optional[List[str]] = None # For CATEGORICAL
def __post_init__(self):
if self.param_type == ParamType.CATEGORICAL and self.categories is None:
raise ValueError(f"Categorical parameter {self.name} must have categories")
class MCTrialConfig(NamedTuple):
"""Complete parameter vector for a Monte Carlo trial."""
trial_id: int
# P1 Signal
vel_div_threshold: float
vel_div_extreme: float
use_direction_confirm: bool
dc_lookback_bars: int
dc_min_magnitude_bps: float
dc_skip_contradicts: bool
dc_leverage_boost: float
dc_leverage_reduce: float
vd_trend_lookback: int
# P2 Leverage
min_leverage: float
max_leverage: float
leverage_convexity: float
fraction: float
use_alpha_layers: bool
use_dynamic_leverage: bool
# P3 Exit
fixed_tp_pct: float
stop_pct: float
max_hold_bars: int
# P4 Fees
use_sp_fees: bool
use_sp_slippage: bool
sp_maker_entry_rate: float
sp_maker_exit_rate: float
# P5 OB
use_ob_edge: bool
ob_edge_bps: float
ob_confirm_rate: float
ob_imbalance_bias: float
ob_depth_scale: float
# P6 Asset Selection
use_asset_selection: bool
min_irp_alignment: float
lookback: int
# P7 ACB
acb_beta_high: float
acb_beta_low: float
acb_w750_threshold_pct: int
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'trial_id': self.trial_id,
'vel_div_threshold': self.vel_div_threshold,
'vel_div_extreme': self.vel_div_extreme,
'use_direction_confirm': self.use_direction_confirm,
'dc_lookback_bars': self.dc_lookback_bars,
'dc_min_magnitude_bps': self.dc_min_magnitude_bps,
'dc_skip_contradicts': self.dc_skip_contradicts,
'dc_leverage_boost': self.dc_leverage_boost,
'dc_leverage_reduce': self.dc_leverage_reduce,
'vd_trend_lookback': self.vd_trend_lookback,
'min_leverage': self.min_leverage,
'max_leverage': self.max_leverage,
'leverage_convexity': self.leverage_convexity,
'fraction': self.fraction,
'use_alpha_layers': self.use_alpha_layers,
'use_dynamic_leverage': self.use_dynamic_leverage,
'fixed_tp_pct': self.fixed_tp_pct,
'stop_pct': self.stop_pct,
'max_hold_bars': self.max_hold_bars,
'use_sp_fees': self.use_sp_fees,
'use_sp_slippage': self.use_sp_slippage,
'sp_maker_entry_rate': self.sp_maker_entry_rate,
'sp_maker_exit_rate': self.sp_maker_exit_rate,
'use_ob_edge': self.use_ob_edge,
'ob_edge_bps': self.ob_edge_bps,
'ob_confirm_rate': self.ob_confirm_rate,
'ob_imbalance_bias': self.ob_imbalance_bias,
'ob_depth_scale': self.ob_depth_scale,
'use_asset_selection': self.use_asset_selection,
'min_irp_alignment': self.min_irp_alignment,
'lookback': self.lookback,
'acb_beta_high': self.acb_beta_high,
'acb_beta_low': self.acb_beta_low,
'acb_w750_threshold_pct': self.acb_w750_threshold_pct,
}
@classmethod
def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialConfig':
"""Create from dictionary."""
# Filter to only valid fields
valid_fields = cls._fields
filtered = {k: v for k, v in d.items() if k in valid_fields}
return cls(**filtered)
class MCSampler:
"""
Monte Carlo Parameter Sampler.
Implements two-phase sampling:
1. Phase A: Enumerate all boolean switch combinations
2. Phase B: LHS continuous sampling per switch-vector
"""
# Champion configuration (baseline)
CHAMPION = {
'vel_div_threshold': -0.020,
'vel_div_extreme': -0.050,
'use_direction_confirm': True,
'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75,
'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00,
'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10,
'min_leverage': 0.50,
'max_leverage': 5.00,
'leverage_convexity': 3.00,
'fraction': 0.20,
'use_alpha_layers': True,
'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0099,
'stop_pct': 1.00,
'max_hold_bars': 120,
'use_sp_fees': True,
'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62,
'sp_maker_exit_rate': 0.50,
'use_ob_edge': True,
'ob_edge_bps': 5.00,
'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09,
'ob_depth_scale': 1.00,
'use_asset_selection': True,
'min_irp_alignment': 0.45,
'lookback': 100,
'acb_beta_high': 0.80,
'acb_beta_low': 0.20,
'acb_w750_threshold_pct': 60,
}
# Parameter definitions
PARAMS = {
# P1 Signal Generator
'vel_div_threshold': ParameterDef('P1.01', 'vel_div_threshold', -0.020, ParamType.CONTINUOUS, -0.040, -0.008, False, 'CG-VD'),
'vel_div_extreme': ParameterDef('P1.02', 'vel_div_extreme', -0.050, ParamType.CONTINUOUS, -0.120, None, False, 'CG-VD'), # hi depends on threshold
'use_direction_confirm': ParameterDef('P1.03', 'use_direction_confirm', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
'dc_lookback_bars': ParameterDef('P1.04', 'dc_lookback_bars', 7, ParamType.DISCRETE, 3, 25, False, 'CG-DC'),
'dc_min_magnitude_bps': ParameterDef('P1.05', 'dc_min_magnitude_bps', 0.75, ParamType.CONTINUOUS, 0.20, 3.00, False, 'CG-DC'),
'dc_skip_contradicts': ParameterDef('P1.06', 'dc_skip_contradicts', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
'dc_leverage_boost': ParameterDef('P1.07', 'dc_leverage_boost', 1.00, ParamType.CONTINUOUS, 1.00, 1.50, False, 'CG-DC-LEV'),
'dc_leverage_reduce': ParameterDef('P1.08', 'dc_leverage_reduce', 0.50, ParamType.CONTINUOUS, 0.25, 0.90, False, 'CG-DC-LEV'),
'vd_trend_lookback': ParameterDef('P1.09', 'vd_trend_lookback', 10, ParamType.DISCRETE, 5, 30, False),
# P2 Leverage
'min_leverage': ParameterDef('P2.01', 'min_leverage', 0.50, ParamType.CONTINUOUS, 0.10, 1.50, False, 'CG-LEV'),
'max_leverage': ParameterDef('P2.02', 'max_leverage', 5.00, ParamType.CONTINUOUS, 1.50, 12.00, False, 'CG-LEV'),
'leverage_convexity': ParameterDef('P2.03', 'leverage_convexity', 3.00, ParamType.CONTINUOUS, 0.75, 6.00, False),
'fraction': ParameterDef('P2.04', 'fraction', 0.20, ParamType.CONTINUOUS, 0.05, 0.40, False, 'CG-RISK'),
'use_alpha_layers': ParameterDef('P2.05', 'use_alpha_layers', True, ParamType.BOOLEAN),
'use_dynamic_leverage': ParameterDef('P2.06', 'use_dynamic_leverage', True, ParamType.BOOLEAN, constraint_group='CG-DYNLEV'),
# P3 Exit
'fixed_tp_pct': ParameterDef('P3.01', 'fixed_tp_pct', 0.0099, ParamType.CONTINUOUS, 0.0030, 0.0300, True, 'CG-EXIT'),
'stop_pct': ParameterDef('P3.02', 'stop_pct', 1.00, ParamType.CONTINUOUS, 0.20, 5.00, True, 'CG-EXIT'),
'max_hold_bars': ParameterDef('P3.03', 'max_hold_bars', 120, ParamType.DISCRETE, 20, 600, False, 'CG-EXIT'),
# P4 Fees
'use_sp_fees': ParameterDef('P4.01', 'use_sp_fees', True, ParamType.BOOLEAN),
'use_sp_slippage': ParameterDef('P4.02', 'use_sp_slippage', True, ParamType.BOOLEAN, constraint_group='CG-SP'),
'sp_maker_entry_rate': ParameterDef('P4.03', 'sp_maker_entry_rate', 0.62, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
'sp_maker_exit_rate': ParameterDef('P4.04', 'sp_maker_exit_rate', 0.50, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
# P5 OB Intelligence
'use_ob_edge': ParameterDef('P5.01', 'use_ob_edge', True, ParamType.BOOLEAN, constraint_group='CG-OB'),
'ob_edge_bps': ParameterDef('P5.02', 'ob_edge_bps', 5.00, ParamType.CONTINUOUS, 1.00, 20.00, True, 'CG-OB'),
'ob_confirm_rate': ParameterDef('P5.03', 'ob_confirm_rate', 0.40, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-OB'),
'ob_imbalance_bias': ParameterDef('P5.04', 'ob_imbalance_bias', -0.09, ParamType.CONTINUOUS, -0.25, 0.15, False, 'CG-OB-SIG'),
'ob_depth_scale': ParameterDef('P5.05', 'ob_depth_scale', 1.00, ParamType.CONTINUOUS, 0.30, 2.00, True, 'CG-OB-SIG'),
# P6 Asset Selection
'use_asset_selection': ParameterDef('P6.01', 'use_asset_selection', True, ParamType.BOOLEAN, constraint_group='CG-IRP'),
'min_irp_alignment': ParameterDef('P6.02', 'min_irp_alignment', 0.45, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-IRP'),
'lookback': ParameterDef('P6.03', 'lookback', 100, ParamType.DISCRETE, 30, 300, False, 'CG-IRP'),
# P7 ACB
'acb_beta_high': ParameterDef('P7.01', 'acb_beta_high', 0.80, ParamType.CONTINUOUS, 0.40, 1.50, False, 'CG-ACB'),
'acb_beta_low': ParameterDef('P7.02', 'acb_beta_low', 0.20, ParamType.CONTINUOUS, 0.00, 0.60, False, 'CG-ACB'),
'acb_w750_threshold_pct': ParameterDef('P7.03', 'acb_w750_threshold_pct', 60, ParamType.DISCRETE, 20, 80, False),
}
# Boolean parameters for switch grid
BOOLEAN_PARAMS = [
'use_direction_confirm',
'dc_skip_contradicts',
'use_alpha_layers',
'use_dynamic_leverage',
'use_sp_fees',
'use_sp_slippage',
'use_ob_edge',
'use_asset_selection',
]
# Parameters that become FIXED when their parent switch is False
CONDITIONAL_PARAMS = {
'use_direction_confirm': ['dc_lookback_bars', 'dc_min_magnitude_bps', 'dc_skip_contradicts', 'dc_leverage_boost', 'dc_leverage_reduce'],
'use_sp_slippage': ['sp_maker_entry_rate', 'sp_maker_exit_rate'],
'use_ob_edge': ['ob_edge_bps', 'ob_confirm_rate'],
'use_asset_selection': ['min_irp_alignment', 'lookback'],
}
def __init__(self, base_seed: int = 42):
"""
Initialize the sampler.
Parameters
----------
base_seed : int
Master RNG seed for reproducibility
"""
self.base_seed = base_seed
self.rng = np.random.RandomState(base_seed)
def generate_switch_vectors(self) -> List[Dict[str, Any]]:
"""
Phase A: Generate all unique boolean switch combinations.
After canonicalisation (collapsing equivalent configs),
returns approximately 64-96 unique switch vectors.
Returns
-------
List[Dict[str, Any]]
List of switch vectors (boolean parameter assignments)
"""
n_bool = len(self.BOOLEAN_PARAMS)
n_combinations = 2 ** n_bool
switch_vectors = []
seen_canonical = set()
for i in range(n_combinations):
# Decode integer to boolean switches
switches = {}
for j, param_name in enumerate(self.BOOLEAN_PARAMS):
switches[param_name] = bool((i >> j) & 1)
# Create canonical form (conditional params fixed to champion when parent is False)
canonical = self._canonicalize_switch_vector(switches)
canonical_key = tuple(sorted((k, v) for k, v in canonical.items() if isinstance(v, bool)))
if canonical_key not in seen_canonical:
seen_canonical.add(canonical_key)
switch_vectors.append(canonical)
return switch_vectors
def _canonicalize_switch_vector(self, switches: Dict[str, bool]) -> Dict[str, Any]:
"""
Convert a raw switch vector to canonical form.
When a parent switch is False, its conditional parameters
are set to FIXED champion values.
"""
canonical = dict(switches)
for parent, children in self.CONDITIONAL_PARAMS.items():
if not switches.get(parent, False):
# Parent is disabled - fix children to champion
for child in children:
canonical[child] = self.CHAMPION[child]
return canonical
def get_free_continuous_params(self, switch_vector: Dict[str, Any]) -> List[str]:
"""
Get list of continuous/discrete parameters that are NOT fixed
by the switch vector.
"""
free_params = []
for name, pdef in self.PARAMS.items():
if pdef.param_type in (ParamType.CONTINUOUS, ParamType.DISCRETE):
# Check if this param is fixed by any switch
is_fixed = False
for parent, children in self.CONDITIONAL_PARAMS.items():
if name in children and not switch_vector.get(parent, True):
is_fixed = True
break
if not is_fixed:
free_params.append(name)
return free_params
def sample_continuous_params(
self,
switch_vector: Dict[str, Any],
n_samples: int,
seed: int
) -> List[Dict[str, Any]]:
"""
Phase B: Generate n LHS samples for continuous/discrete parameters.
Parameters
----------
switch_vector : dict
Fixed boolean parameters
n_samples : int
Number of samples to generate
seed : int
RNG seed for this batch
Returns
-------
List[Dict[str, Any]]
List of complete parameter dicts (switch + continuous)
"""
free_params = self.get_free_continuous_params(switch_vector)
n_free = len(free_params)
if n_free == 0:
# No free parameters - just return the switch vector
return [dict(switch_vector)]
# Generate LHS samples in unit hypercube
if SCIPY_AVAILABLE:
sampler = qmc.LatinHypercube(d=n_free, seed=seed)
unit_samples = sampler.random(n=n_samples)
else:
# Fallback: random sampling with warning
print(f"[WARN] scipy not available, using random sampling instead of LHS")
rng = np.random.RandomState(seed)
unit_samples = rng.rand(n_samples, n_free)
# Scale to parameter ranges
samples = []
for i in range(n_samples):
sample = dict(switch_vector)
for j, param_name in enumerate(free_params):
pdef = self.PARAMS[param_name]
u = unit_samples[i, j]
# Handle dependent bounds
lo = pdef.lo
hi = pdef.hi
if hi is None:
# Compute dependent bound
if param_name == 'vel_div_extreme':
hi = sample['vel_div_threshold'] * 1.5
if pdef.param_type == ParamType.CONTINUOUS:
if pdef.log_transform:
# Log-space sampling: value = lo * (hi/lo) ** u
value = lo * (hi / lo) ** u
else:
# Linear sampling
value = lo + u * (hi - lo)
elif pdef.param_type == ParamType.DISCRETE:
# Discrete sampling
value = int(round(lo + u * (hi - lo)))
value = max(int(lo), min(int(hi), value))
else:
value = pdef.champion
sample[param_name] = value
samples.append(sample)
return samples
def generate_trials(
self,
n_samples_per_switch: int = 500,
max_trials: Optional[int] = None
) -> List[MCTrialConfig]:
"""
Generate all MC trial configurations.
Parameters
----------
n_samples_per_switch : int
Samples per unique switch vector
max_trials : int, optional
Maximum total trials (for testing)
Returns
-------
List[MCTrialConfig]
All trial configurations
"""
switch_vectors = self.generate_switch_vectors()
print(f"[INFO] Generated {len(switch_vectors)} unique switch vectors")
trials = []
trial_id = 0
for switch_idx, switch_vector in enumerate(switch_vectors):
# Generate seed for this switch vector
switch_seed = (self.base_seed * 1000003 + switch_idx) % 2**31
# Generate continuous samples
samples = self.sample_continuous_params(
switch_vector, n_samples_per_switch, switch_seed
)
for sample in samples:
if max_trials and trial_id >= max_trials:
break
# Fill in any missing parameters with champion values
full_params = dict(self.CHAMPION)
full_params.update(sample)
full_params['trial_id'] = trial_id
# Create trial config
try:
config = MCTrialConfig(**full_params)
trials.append(config)
trial_id += 1
except Exception as e:
print(f"[WARN] Failed to create trial {trial_id}: {e}")
if max_trials and trial_id >= max_trials:
break
print(f"[INFO] Generated {len(trials)} total trial configurations")
return trials
def generate_champion_trial(self) -> MCTrialConfig:
"""Generate the champion configuration as a single trial."""
params = dict(self.CHAMPION)
params['trial_id'] = -1 # Special ID for champion
return MCTrialConfig(**params)
def save_trials(self, trials: List[MCTrialConfig], path: Union[str, Path]):
"""Save trials to JSON."""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
data = [t.to_dict() for t in trials]
with open(path, 'w') as f:
json.dump(data, f, indent=2)
print(f"[OK] Saved {len(trials)} trials to {path}")
def load_trials(self, path: Union[str, Path]) -> List[MCTrialConfig]:
"""Load trials from JSON."""
with open(path, 'r') as f:
data = json.load(f)
trials = [MCTrialConfig.from_dict(d) for d in data]
print(f"[OK] Loaded {len(trials)} trials from {path}")
return trials
def test_sampler():
"""Quick test of the sampler."""
sampler = MCSampler(base_seed=42)
# Test switch vector generation
switches = sampler.generate_switch_vectors()
print(f"Unique switch vectors: {len(switches)}")
# Test trial generation (small)
trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100)
print(f"Generated trials: {len(trials)}")
# Check parameter ranges
for trial in trials[:5]:
print(f"Trial {trial.trial_id}: vel_div_threshold={trial.vel_div_threshold:.4f}, "
f"max_leverage={trial.max_leverage:.2f}, use_direction_confirm={trial.use_direction_confirm}")
return trials
if __name__ == "__main__":
test_sampler()