initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

85
nautilus_dolphin/mc/__init__.py Executable file
View File

@@ -0,0 +1,85 @@
"""
Monte Carlo System Envelope Mapping for DOLPHIN NG
==================================================
Full-system operational envelope simulation and ML forewarning integration.
This package implements the Monte Carlo System Envelope Specification for
the Nautilus-Dolphin trading system. It provides:
1. Parameter space sampling (Latin Hypercube Sampling)
2. Internal consistency validation (V1-V4 constraint groups)
3. Trial execution harness (backtest runner)
4. Metric extraction (48 metrics, 10 classification labels)
5. Result persistence (Parquet + SQLite index)
6. ML envelope learning (One-Class SVM, XGBoost)
7. Live forewarning API (risk assessment for configurations)
Usage:
from nautilus_dolphin.mc import MCSampler, MCValidator, MCExecutor
# Run envelope testing
python run_mc_envelope.py --mode run --stage 1 --n-samples 500
# Train ML models on results
python run_mc_envelope.py --mode train --output-dir mc_results/
# Assess a live configuration
python run_mc_envelope.py --mode assess --assess my_config.json
Reference:
MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md - Complete specification document
"""
__version__ = "1.0.0"
__author__ = "DOLPHIN NG Team"
# Core modules (lazy import to avoid heavy dependencies on import)
def __getattr__(name):
if name == "MCSampler":
from .mc_sampler import MCSampler
return MCSampler
elif name == "MCValidator":
from .mc_validator import MCValidator
return MCValidator
elif name == "MCExecutor":
from .mc_executor import MCExecutor
return MCExecutor
elif name == "MCMetrics":
from .mc_metrics import MCMetrics
return MCMetrics
elif name == "MCStore":
from .mc_store import MCStore
return MCStore
elif name == "MCRunner":
from .mc_runner import MCRunner
return MCRunner
elif name == "MCML":
from .mc_ml import MCML
return MCML
elif name == "DolphinForewarner":
from .mc_ml import DolphinForewarner
return DolphinForewarner
elif name == "MCTrialConfig":
from .mc_sampler import MCTrialConfig
return MCTrialConfig
elif name == "MCTrialResult":
from .mc_metrics import MCTrialResult
return MCTrialResult
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
__all__ = [
# Core classes
"MCSampler",
"MCValidator",
"MCExecutor",
"MCMetrics",
"MCStore",
"MCRunner",
"MCML",
"DolphinForewarner",
"MCTrialConfig",
"MCTrialResult",
# Version
"__version__",
]

View File

@@ -0,0 +1,387 @@
"""
Monte Carlo Trial Executor
==========================
Trial execution harness for running backtests with parameter configurations.
This module interfaces with the Nautilus-Dolphin system to run backtests
with sampled parameter configurations and extract metrics.
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 5
"""
import time
from typing import Dict, List, Optional, Any, Tuple
from pathlib import Path
from datetime import datetime
import numpy as np
from .mc_sampler import MCTrialConfig
from .mc_validator import MCValidator, ValidationResult
from .mc_metrics import MCMetrics, MCTrialResult
class MCExecutor:
"""
Monte Carlo Trial Executor.
Runs backtests for parameter configurations and extracts metrics.
"""
def __init__(
self,
initial_capital: float = 25000.0,
data_period: Tuple[str, str] = ('2025-12-31', '2026-02-18'),
preflight_bars: int = 500,
preflight_min_trades: int = 2,
verbose: bool = False
):
"""
Initialize the executor.
Parameters
----------
initial_capital : float
Starting capital for backtests
data_period : Tuple[str, str]
(start_date, end_date) for backtest
preflight_bars : int
Bars for preflight check (V4)
preflight_min_trades : int
Minimum trades for preflight to pass
verbose : bool
Print detailed execution info
"""
self.initial_capital = initial_capital
self.data_period = data_period
self.preflight_bars = preflight_bars
self.preflight_min_trades = preflight_min_trades
self.verbose = verbose
self.validator = MCValidator(verbose=verbose)
self.metrics = MCMetrics(initial_capital=initial_capital)
# Try to import Nautilus-Dolphin components
self._init_nd_components()
def _init_nd_components(self):
"""Initialize Nautilus-Dolphin components if available."""
self.nd_available = False
try:
# Import key components from Nautilus-Dolphin
from nautilus_dolphin.nautilus.strategy_config import DolphinStrategyConfig
from nautilus_dolphin.nautilus.backtest_runner import run_backtest
self.DolphinStrategyConfig = DolphinStrategyConfig
self.run_nd_backtest = run_backtest
self.nd_available = True
if self.verbose:
print("[OK] Nautilus-Dolphin components loaded")
except ImportError as e:
if self.verbose:
print(f"[WARN] Nautilus-Dolphin not available: {e}")
print("[WARN] Will use simulation mode for testing")
def execute_trial(
self,
config: MCTrialConfig,
skip_validation: bool = False
) -> MCTrialResult:
"""
Execute a single MC trial.
Parameters
----------
config : MCTrialConfig
Trial configuration
skip_validation : bool
Skip validation (if already validated)
Returns
-------
MCTrialResult
Complete trial result with metrics
"""
start_time = time.time()
# Step 1: Validation (V1-V4)
if not skip_validation:
validation = self.validator.validate(config)
if not validation.is_valid():
result = MCTrialResult(
trial_id=config.trial_id,
config=config,
status=validation.status.value,
error_message=validation.reject_reason
)
result.execution_time_sec = time.time() - start_time
return result
# Step 2: Preflight check (V4 lightweight)
preflight_passed, preflight_msg = self._run_preflight(config)
if not preflight_passed:
result = MCTrialResult(
trial_id=config.trial_id,
config=config,
status='PREFLIGHT_FAIL',
error_message=preflight_msg
)
result.execution_time_sec = time.time() - start_time
return result
# Step 3: Full backtest
try:
if self.nd_available:
trades, daily_pnls, date_stats, signal_stats = self._run_nd_backtest(config)
else:
trades, daily_pnls, date_stats, signal_stats = self._run_simulated_backtest(config)
# Step 4: Compute metrics
execution_time = time.time() - start_time
result = self.metrics.compute(
config, trades, daily_pnls, date_stats, signal_stats, execution_time
)
if self.verbose:
print(f" Trial {config.trial_id}: ROI={result.roi_pct:.2f}%, "
f"Trades={result.n_trades}, Sharpe={result.sharpe_ratio:.2f}")
return result
except Exception as e:
if self.verbose:
print(f" Trial {config.trial_id}: ERROR - {e}")
result = MCTrialResult(
trial_id=config.trial_id,
config=config,
status='ERROR',
error_message=str(e)
)
result.execution_time_sec = time.time() - start_time
return result
def _run_preflight(self, config: MCTrialConfig) -> Tuple[bool, str]:
"""
Run lightweight preflight check (V4).
Returns (passed, message).
"""
# Check for extreme values that would cause issues
# Fraction too small
if config.fraction < 0.02:
return False, f"FRACTION_TOO_SMALL: {config.fraction}"
# Leverage range issues
leverage_range = config.max_leverage - config.min_leverage
if leverage_range < 0.5 and config.leverage_convexity > 2.0:
return False, f"NARROW_RANGE_HIGH_CONVEXITY"
# Hold period too short
if config.max_hold_bars < config.vd_trend_lookback + 10:
return False, f"HOLD_TOO_SHORT"
# TP/SL ratio check
tp_sl_ratio = config.fixed_tp_pct / (config.stop_pct / 100)
if tp_sl_ratio > 10:
return False, f"TP_SL_RATIO_EXTREME: {tp_sl_ratio}"
return True, "OK"
def _run_nd_backtest(
self,
config: MCTrialConfig
) -> Tuple[List[Dict], List[float], List[Dict], Dict[str, Any]]:
"""
Run actual Nautilus-Dolphin backtest.
Returns (trades, daily_pnls, date_stats, signal_stats).
"""
# Convert MC config to ND config
nd_config = self._mc_to_nd_config(config)
# Run backtest
backtest_result = self.run_nd_backtest(nd_config)
# Extract results
trades = backtest_result.get('trades', [])
daily_pnls = backtest_result.get('daily_pnls', [])
date_stats = backtest_result.get('date_stats', [])
signal_stats = backtest_result.get('signal_stats', {})
return trades, daily_pnls, date_stats, signal_stats
def _mc_to_nd_config(self, config: MCTrialConfig) -> Dict[str, Any]:
"""Convert MC trial config to Nautilus-Dolphin config."""
return {
'venue': 'BINANCE_FUTURES',
'environment': 'BACKTEST',
'trader_id': f'DOLPHIN-MC-{config.trial_id}',
'strategy': {
'venue': 'BINANCE_FUTURES',
'direction': 'SHORT',
'vel_div_threshold': config.vel_div_threshold,
'vel_div_extreme': config.vel_div_extreme,
'max_leverage': config.max_leverage,
'min_leverage': config.min_leverage,
'leverage_convexity': config.leverage_convexity,
'capital_fraction': config.fraction,
'max_hold_bars': config.max_hold_bars,
'tp_bps': int(config.fixed_tp_pct * 10000),
'fixed_tp_pct': config.fixed_tp_pct,
'stop_pct': config.stop_pct,
'use_trailing': False,
'irp_alignment_min': config.min_irp_alignment,
'lookback': config.lookback,
'excluded_assets': ['TUSDUSDT', 'USDCUSDT'],
'acb_enabled': True,
'max_concurrent_positions': 1,
'daily_loss_limit_pct': 10.0,
'use_sp_fees': config.use_sp_fees,
'use_sp_slippage': config.use_sp_slippage,
'sp_maker_fill_rate': config.sp_maker_entry_rate,
'sp_maker_exit_rate': config.sp_maker_exit_rate,
'use_ob_edge': config.use_ob_edge,
'ob_edge_bps': config.ob_edge_bps,
'ob_confirm_rate': config.ob_confirm_rate,
'ob_imbalance_bias': config.ob_imbalance_bias,
'ob_depth_scale': config.ob_depth_scale,
'use_direction_confirm': config.use_direction_confirm,
'dc_lookback_bars': config.dc_lookback_bars,
'dc_min_magnitude_bps': config.dc_min_magnitude_bps,
'dc_skip_contradicts': config.dc_skip_contradicts,
'dc_leverage_boost': config.dc_leverage_boost,
'dc_leverage_reduce': config.dc_leverage_reduce,
'use_alpha_layers': config.use_alpha_layers,
'use_dynamic_leverage': config.use_dynamic_leverage,
'acb_beta_high': config.acb_beta_high,
'acb_beta_low': config.acb_beta_low,
'acb_w750_threshold_pct': config.acb_w750_threshold_pct,
},
'data_catalog': {
'eigenvalues_dir': '../eigenvalues',
'catalog_path': 'nautilus_dolphin/catalog',
'start_date': self.data_period[0],
'end_date': self.data_period[1],
'assets': [
'BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'SOLUSDT', 'DOTUSDT',
'AVAXUSDT', 'MATICUSDT', 'LINKUSDT', 'UNIUSDT', 'ATOMUSDT'
],
},
}
def _run_simulated_backtest(
self,
config: MCTrialConfig
) -> Tuple[List[Dict], List[float], List[Dict], Dict[str, Any]]:
"""
Run simulated backtest for testing without Nautilus.
This produces realistic-looking results based on parameter configuration
without actually running a full backtest.
"""
# Number of trades based on vel_div_threshold (lower = more trades)
base_trades = 500
threshold_factor = abs(-0.02 / config.vel_div_threshold)
n_trades = int(base_trades * threshold_factor * np.random.uniform(0.8, 1.2))
n_trades = max(20, min(2000, n_trades))
# Win rate based on parameters
base_wr = 0.48
if config.use_direction_confirm:
base_wr += 0.05
if config.use_ob_edge:
base_wr += 0.02
win_rate = np.clip(base_wr + np.random.normal(0, 0.05), 0.3, 0.7)
# Generate trades
trades = []
n_wins = int(n_trades * win_rate)
n_losses = n_trades - n_wins
for i in range(n_trades):
is_win = i < n_wins
if is_win:
pnl_pct = np.random.exponential(0.008) + 0.002
pnl = pnl_pct * self.initial_capital * config.fraction * config.max_leverage
exit_type = 'tp' if np.random.random() < 0.7 else 'hold'
else:
pnl_pct = -np.random.exponential(0.006) - 0.001
pnl = pnl_pct * self.initial_capital * config.fraction * config.max_leverage
exit_type = np.random.choice(['stop', 'hold'], p=[0.3, 0.7])
trades.append({
'pnl': pnl,
'pnl_pct': pnl_pct,
'exit_type': exit_type,
'bars_held': np.random.randint(10, config.max_hold_bars),
'asset': np.random.choice(['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 'ADAUSDT']),
})
# Shuffle trades
np.random.shuffle(trades)
# Generate daily P&Ls (48 days)
daily_pnls = []
date_stats = []
trades_per_day = len(trades) // 48
for day in range(48):
day_trades = trades[day * trades_per_day:(day + 1) * trades_per_day]
day_pnl = sum(t['pnl'] for t in day_trades)
daily_pnls.append(day_pnl)
date_str = f'2026-01-{day % 31 + 1:02d}' if day < 31 else f'2026-02-{day - 30:02d}'
date_stats.append({
'date': date_str,
'pnl': day_pnl,
})
# Signal stats
signal_stats = {
'dc_skip_rate': 0.1 if config.use_direction_confirm else 0.0,
'ob_skip_rate': 0.05 if config.use_ob_edge else 0.0,
'dc_confirm_rate': 0.7 if config.use_direction_confirm else 0.0,
'irp_match_rate': 0.6 if config.use_asset_selection else 0.0,
'entry_attempt_rate': 0.3,
'signal_to_trade_rate': len(trades) / (48 * 1000), # Approximate
}
return trades, daily_pnls, date_stats, signal_stats
def execute_batch(
self,
configs: List[MCTrialConfig],
progress_interval: int = 10
) -> List[MCTrialResult]:
"""
Execute a batch of trials.
Parameters
----------
configs : List[MCTrialConfig]
Trial configurations
progress_interval : int
Print progress every N trials
Returns
-------
List[MCTrialResult]
Results for all trials
"""
results = []
total = len(configs)
for i, config in enumerate(configs):
result = self.execute_trial(config)
results.append(result)
if (i + 1) % progress_interval == 0 or i == total - 1:
print(f" Progress: {i+1}/{total} ({(i+1)/total*100:.1f}%)")
return results

737
nautilus_dolphin/mc/mc_metrics.py Executable file
View File

@@ -0,0 +1,737 @@
"""
Monte Carlo Metrics Extractor
=============================
Extract 48 metrics and 10 classification labels from trial results.
Metric Categories:
M01-M15: Primary Performance Metrics
M16-M32: Risk / Stability Metrics
M33-M38: Signal Quality Metrics
M39-M43: Capital Path Metrics
M44-M48: Regime Metrics
L01-L10: Derived Classification Labels
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 6
"""
from typing import Dict, List, Optional, NamedTuple, Any, Tuple
from dataclasses import dataclass, field
from datetime import datetime
import numpy as np
from .mc_sampler import MCTrialConfig
@dataclass
class MCTrialResult:
"""Complete result from a Monte Carlo trial."""
trial_id: int
config: MCTrialConfig
# Primary Performance Metrics (M01-M15)
roi_pct: float = 0.0
profit_factor: float = 0.0
win_rate: float = 0.0
n_trades: int = 0
max_drawdown_pct: float = 0.0
sharpe_ratio: float = 0.0
sortino_ratio: float = 0.0
calmar_ratio: float = 0.0
avg_win_pct: float = 0.0
avg_loss_pct: float = 0.0
win_loss_ratio: float = 0.0
expectancy_pct: float = 0.0
h1_roi_pct: float = 0.0
h2_roi_pct: float = 0.0
h2_h1_ratio: float = 0.0
# Risk / Stability Metrics (M16-M32)
n_consecutive_losses_max: int = 0
n_stop_exits: int = 0
n_tp_exits: int = 0
n_hold_exits: int = 0
stop_rate: float = 0.0
tp_rate: float = 0.0
hold_rate: float = 0.0
avg_hold_bars: float = 0.0
vol_of_daily_pnl: float = 0.0
skew_daily_pnl: float = 0.0
kurtosis_daily_pnl: float = 0.0
worst_day_pct: float = 0.0
best_day_pct: float = 0.0
n_days_profitable: int = 0
n_days_loss: int = 0
profitable_day_rate: float = 0.0
max_daily_drawdown_pct: float = 0.0
# Signal Quality Metrics (M33-M38)
dc_skip_rate: float = 0.0
ob_skip_rate: float = 0.0
dc_confirm_rate: float = 0.0
irp_match_rate: float = 0.0
entry_attempt_rate: float = 0.0
signal_to_trade_rate: float = 0.0
# Capital Path Metrics (M39-M43)
equity_curve_slope: float = 0.0
equity_curve_r2: float = 0.0
equity_curve_autocorr: float = 0.0
max_underwater_days: int = 0
recovery_factor: float = 0.0
# Regime Metrics (M44-M48)
date_pnl_std: float = 0.0
date_pnl_range: float = 0.0
q10_date_pnl: float = 0.0
q90_date_pnl: float = 0.0
tail_ratio: float = 0.0
# Classification Labels (L01-L10)
profitable: bool = False
strongly_profitable: bool = False
drawdown_ok: bool = False
sharpe_ok: bool = False
pf_ok: bool = False
wr_ok: bool = False
champion_region: bool = False
catastrophic: bool = False
inert: bool = False
h2_degradation: bool = False
# Metadata
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
execution_time_sec: float = 0.0
status: str = "pending"
error_message: Optional[str] = None
def compute_labels(self):
"""Compute classification labels from metrics."""
# L01: profitable
self.profitable = self.roi_pct > 0
# L02: strongly_profitable
self.strongly_profitable = self.roi_pct > 30
# L03: drawdown_ok
self.drawdown_ok = self.max_drawdown_pct < 20
# L04: sharpe_ok
self.sharpe_ok = self.sharpe_ratio > 1.5
# L05: pf_ok
self.pf_ok = self.profit_factor > 1.10
# L06: wr_ok
self.wr_ok = self.win_rate > 0.45
# L07: champion_region
self.champion_region = (
self.strongly_profitable and
self.drawdown_ok and
self.sharpe_ok and
self.pf_ok and
self.wr_ok
)
# L08: catastrophic
self.catastrophic = (
self.roi_pct < -30 or
self.max_drawdown_pct > 40
)
# L09: inert
self.inert = self.n_trades < 50
# L10: h2_degradation
self.h2_degradation = self.h2_h1_ratio < 0.50
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary (flat structure for DataFrame)."""
result = {
# IDs
'trial_id': self.trial_id,
'timestamp': self.timestamp,
'execution_time_sec': self.execution_time_sec,
'status': self.status,
'error_message': self.error_message,
}
# Add all config parameters with P_ prefix
config_dict = self.config.to_dict()
for k, v in config_dict.items():
result[f'P_{k}'] = v
# Add metrics with M_ prefix
result.update({
'M_roi_pct': self.roi_pct,
'M_profit_factor': self.profit_factor,
'M_win_rate': self.win_rate,
'M_n_trades': self.n_trades,
'M_max_drawdown_pct': self.max_drawdown_pct,
'M_sharpe_ratio': self.sharpe_ratio,
'M_sortino_ratio': self.sortino_ratio,
'M_calmar_ratio': self.calmar_ratio,
'M_avg_win_pct': self.avg_win_pct,
'M_avg_loss_pct': self.avg_loss_pct,
'M_win_loss_ratio': self.win_loss_ratio,
'M_expectancy_pct': self.expectancy_pct,
'M_h1_roi_pct': self.h1_roi_pct,
'M_h2_roi_pct': self.h2_roi_pct,
'M_h2_h1_ratio': self.h2_h1_ratio,
'M_n_consecutive_losses_max': self.n_consecutive_losses_max,
'M_n_stop_exits': self.n_stop_exits,
'M_n_tp_exits': self.n_tp_exits,
'M_n_hold_exits': self.n_hold_exits,
'M_stop_rate': self.stop_rate,
'M_tp_rate': self.tp_rate,
'M_hold_rate': self.hold_rate,
'M_avg_hold_bars': self.avg_hold_bars,
'M_vol_of_daily_pnl': self.vol_of_daily_pnl,
'M_skew_daily_pnl': self.skew_daily_pnl,
'M_kurtosis_daily_pnl': self.kurtosis_daily_pnl,
'M_worst_day_pct': self.worst_day_pct,
'M_best_day_pct': self.best_day_pct,
'M_n_days_profitable': self.n_days_profitable,
'M_n_days_loss': self.n_days_loss,
'M_profitable_day_rate': self.profitable_day_rate,
'M_max_daily_drawdown_pct': self.max_daily_drawdown_pct,
'M_dc_skip_rate': self.dc_skip_rate,
'M_ob_skip_rate': self.ob_skip_rate,
'M_dc_confirm_rate': self.dc_confirm_rate,
'M_irp_match_rate': self.irp_match_rate,
'M_entry_attempt_rate': self.entry_attempt_rate,
'M_signal_to_trade_rate': self.signal_to_trade_rate,
'M_equity_curve_slope': self.equity_curve_slope,
'M_equity_curve_r2': self.equity_curve_r2,
'M_equity_curve_autocorr': self.equity_curve_autocorr,
'M_max_underwater_days': self.max_underwater_days,
'M_recovery_factor': self.recovery_factor,
'M_date_pnl_std': self.date_pnl_std,
'M_date_pnl_range': self.date_pnl_range,
'M_q10_date_pnl': self.q10_date_pnl,
'M_q90_date_pnl': self.q90_date_pnl,
'M_tail_ratio': self.tail_ratio,
})
# Add labels with L_ prefix
result.update({
'L_profitable': self.profitable,
'L_strongly_profitable': self.strongly_profitable,
'L_drawdown_ok': self.drawdown_ok,
'L_sharpe_ok': self.sharpe_ok,
'L_pf_ok': self.pf_ok,
'L_wr_ok': self.wr_ok,
'L_champion_region': self.champion_region,
'L_catastrophic': self.catastrophic,
'L_inert': self.inert,
'L_h2_degradation': self.h2_degradation,
})
return result
@classmethod
def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialResult':
"""Create from dictionary."""
# Extract config
config_dict = {k[2:]: v for k, v in d.items() if k.startswith('P_') and k != 'P_trial_id'}
config = MCTrialConfig.from_dict(config_dict)
# Create result
result = cls(trial_id=d.get('trial_id', 0), config=config)
# Set metrics
for k, v in d.items():
if k.startswith('M_'):
attr_name = k[2:]
if hasattr(result, attr_name):
setattr(result, attr_name, v)
elif k.startswith('L_'):
attr_name = k[2:]
if hasattr(result, attr_name):
setattr(result, attr_name, v)
# Set metadata
result.timestamp = d.get('timestamp', datetime.now().isoformat())
result.execution_time_sec = d.get('execution_time_sec', 0.0)
result.status = d.get('status', 'completed')
result.error_message = d.get('error_message')
return result
class MCMetrics:
"""
Monte Carlo Metrics Extractor.
Computes all 48 metrics and 10 classification labels from backtest results.
"""
def __init__(self, initial_capital: float = 25000.0):
"""
Initialize metrics extractor.
Parameters
----------
initial_capital : float
Initial capital for ROI calculation
"""
self.initial_capital = initial_capital
def compute(
self,
config: MCTrialConfig,
trades: List[Dict],
daily_pnls: List[float],
date_stats: List[Dict],
signal_stats: Dict[str, Any],
execution_time_sec: float = 0.0
) -> MCTrialResult:
"""
Compute all metrics from backtest results.
Parameters
----------
config : MCTrialConfig
Trial configuration
trades : List[Dict]
Trade records with keys: pnl, pnl_pct, exit_type, bars_held, etc.
daily_pnls : List[float]
Daily P&L values
date_stats : List[Dict]
Per-date statistics
signal_stats : Dict[str, Any]
Signal processing statistics
execution_time_sec : float
Trial execution time
Returns
-------
MCTrialResult
Complete trial result with all metrics
"""
result = MCTrialResult(trial_id=config.trial_id, config=config)
result.execution_time_sec = execution_time_sec
# Compute metrics
self._compute_performance_metrics(result, trades, daily_pnls, date_stats)
self._compute_risk_metrics(result, trades, daily_pnls)
self._compute_signal_metrics(result, signal_stats)
self._compute_capital_metrics(result, daily_pnls)
self._compute_regime_metrics(result, daily_pnls)
# Compute labels
result.compute_labels()
result.status = "completed"
return result
def _compute_performance_metrics(
self,
result: MCTrialResult,
trades: List[Dict],
daily_pnls: List[float],
date_stats: List[Dict]
):
"""Compute M01-M15: Primary Performance Metrics."""
n_trades = len(trades)
result.n_trades = n_trades
if n_trades == 0:
# No trades - all metrics stay at defaults
return
# Win/loss separation
winning_trades = [t for t in trades if t.get('pnl', 0) > 0]
losing_trades = [t for t in trades if t.get('pnl', 0) <= 0]
n_wins = len(winning_trades)
n_losses = len(losing_trades)
# M01: roi_pct
final_capital = self.initial_capital + sum(daily_pnls) if daily_pnls else self.initial_capital
result.roi_pct = (final_capital - self.initial_capital) / self.initial_capital * 100
# M02: profit_factor
gross_wins = sum(t.get('pnl', 0) for t in winning_trades)
gross_losses = abs(sum(t.get('pnl', 0) for t in losing_trades))
result.profit_factor = gross_wins / gross_losses if gross_losses > 0 else float('inf')
# M03: win_rate
result.win_rate = n_wins / n_trades if n_trades > 0 else 0
# M05: max_drawdown_pct
result.max_drawdown_pct = self._compute_max_drawdown_pct(daily_pnls)
# M06: sharpe_ratio (annualized)
result.sharpe_ratio = self._compute_sharpe_ratio(daily_pnls)
# M07: sortino_ratio
result.sortino_ratio = self._compute_sortino_ratio(daily_pnls)
# M08: calmar_ratio
result.calmar_ratio = result.roi_pct / result.max_drawdown_pct if result.max_drawdown_pct > 0 else float('inf')
# M09: avg_win_pct
win_pnls_pct = [t.get('pnl_pct', 0) * 100 for t in winning_trades]
result.avg_win_pct = np.mean(win_pnls_pct) if win_pnls_pct else 0
# M10: avg_loss_pct
loss_pnls_pct = [t.get('pnl_pct', 0) * 100 for t in losing_trades]
result.avg_loss_pct = np.mean(loss_pnls_pct) if loss_pnls_pct else 0
# M11: win_loss_ratio
result.win_loss_ratio = abs(result.avg_win_pct / result.avg_loss_pct) if result.avg_loss_pct != 0 else float('inf')
# M12: expectancy_pct
wr = result.win_rate
result.expectancy_pct = wr * result.avg_win_pct + (1 - wr) * result.avg_loss_pct
# M13-M15: H1/H2 metrics
if len(date_stats) >= 2:
mid = len(date_stats) // 2
h1_pnl = sum(d.get('pnl', 0) for d in date_stats[:mid])
h2_pnl = sum(d.get('pnl', 0) for d in date_stats[mid:])
h1_capital = self.initial_capital + h1_pnl
result.h1_roi_pct = h1_pnl / self.initial_capital * 100
result.h2_roi_pct = h2_pnl / self.initial_capital * 100
result.h2_h1_ratio = h2_pnl / h1_pnl if h1_pnl != 0 else 0
def _compute_risk_metrics(
self,
result: MCTrialResult,
trades: List[Dict],
daily_pnls: List[float]
):
"""Compute M16-M32: Risk / Stability Metrics."""
# M16: n_consecutive_losses_max
result.n_consecutive_losses_max = self._compute_max_consecutive_losses(trades)
# M17-M19: Exit type counts
result.n_stop_exits = sum(1 for t in trades if t.get('exit_type') == 'stop')
result.n_tp_exits = sum(1 for t in trades if t.get('exit_type') == 'tp')
result.n_hold_exits = sum(1 for t in trades if t.get('exit_type') == 'hold')
# M20-M22: Exit rates
n_trades = len(trades)
if n_trades > 0:
result.stop_rate = result.n_stop_exits / n_trades
result.tp_rate = result.n_tp_exits / n_trades
result.hold_rate = result.n_hold_exits / n_trades
# M23: avg_hold_bars
hold_bars = [t.get('bars_held', 0) for t in trades]
result.avg_hold_bars = np.mean(hold_bars) if hold_bars else 0
# M24-M26: Daily P&L distribution stats
if len(daily_pnls) >= 2:
result.vol_of_daily_pnl = np.std(daily_pnls, ddof=1)
result.skew_daily_pnl = self._compute_skewness(daily_pnls)
result.kurtosis_daily_pnl = self._compute_kurtosis(daily_pnls)
# M27-M28: Best/worst day
if daily_pnls:
result.worst_day_pct = min(daily_pnls) / self.initial_capital * 100
result.best_day_pct = max(daily_pnls) / self.initial_capital * 100
# M29-M31: Profitable days
result.n_days_profitable = sum(1 for pnl in daily_pnls if pnl > 0)
result.n_days_loss = sum(1 for pnl in daily_pnls if pnl <= 0)
if daily_pnls:
result.profitable_day_rate = result.n_days_profitable / len(daily_pnls)
# M32: max_daily_drawdown_pct
result.max_daily_drawdown_pct = self._compute_max_daily_drawdown_pct(daily_pnls)
def _compute_signal_metrics(
self,
result: MCTrialResult,
signal_stats: Dict[str, Any]
):
"""Compute M33-M38: Signal Quality Metrics."""
result.dc_skip_rate = signal_stats.get('dc_skip_rate', 0)
result.ob_skip_rate = signal_stats.get('ob_skip_rate', 0)
result.dc_confirm_rate = signal_stats.get('dc_confirm_rate', 0)
result.irp_match_rate = signal_stats.get('irp_match_rate', 0)
result.entry_attempt_rate = signal_stats.get('entry_attempt_rate', 0)
result.signal_to_trade_rate = signal_stats.get('signal_to_trade_rate', 0)
def _compute_capital_metrics(
self,
result: MCTrialResult,
daily_pnls: List[float]
):
"""Compute M39-M43: Capital Path Metrics."""
if len(daily_pnls) < 2:
return
# Compute equity curve
equity = [self.initial_capital]
for pnl in daily_pnls:
equity.append(equity[-1] + pnl)
# M39: equity_curve_slope (linear regression)
days = np.arange(len(equity))
result.equity_curve_slope, result.equity_curve_r2 = self._linear_regression(days, equity)
# M41: equity_curve_autocorr
returns = np.diff(equity) / equity[:-1]
if len(returns) > 1:
result.equity_curve_autocorr = np.corrcoef(returns[:-1], returns[1:])[0, 1] if len(returns) > 2 else 0
# M42: max_underwater_days
result.max_underwater_days = self._compute_max_underwater_days(equity)
# M43: recovery_factor
total_return = sum(daily_pnls)
max_dd = self._compute_max_drawdown_value(daily_pnls)
result.recovery_factor = total_return / max_dd if max_dd > 0 else float('inf')
def _compute_regime_metrics(
self,
result: MCTrialResult,
daily_pnls: List[float]
):
"""Compute M44-M48: Regime Metrics."""
if len(daily_pnls) < 2:
return
# M44: date_pnl_std
result.date_pnl_std = np.std(daily_pnls, ddof=1)
# M45: date_pnl_range
result.date_pnl_range = max(daily_pnls) - min(daily_pnls)
# M46-M47: Quantiles
result.q10_date_pnl = np.percentile(daily_pnls, 10)
result.q90_date_pnl = np.percentile(daily_pnls, 90)
# M48: tail_ratio
if result.q90_date_pnl != 0:
result.tail_ratio = abs(result.q10_date_pnl) / abs(result.q90_date_pnl)
# --- Helper Methods ---
def _compute_max_drawdown_pct(self, daily_pnls: List[float]) -> float:
"""Compute maximum drawdown as percentage."""
if not daily_pnls:
return 0
equity = [self.initial_capital]
for pnl in daily_pnls:
equity.append(equity[-1] + pnl)
peak = equity[0]
max_dd = 0
for e in equity:
if e > peak:
peak = e
dd = (peak - e) / peak
max_dd = max(max_dd, dd)
return max_dd * 100
def _compute_max_drawdown_value(self, daily_pnls: List[float]) -> float:
"""Compute maximum drawdown as value."""
if not daily_pnls:
return 0
equity = [self.initial_capital]
for pnl in daily_pnls:
equity.append(equity[-1] + pnl)
peak = equity[0]
max_dd = 0
for e in equity:
if e > peak:
peak = e
dd = peak - e
max_dd = max(max_dd, dd)
return max_dd
def _compute_sharpe_ratio(self, daily_pnls: List[float]) -> float:
"""Compute annualized Sharpe ratio."""
if len(daily_pnls) < 2:
return 0
returns = [p / self.initial_capital for p in daily_pnls]
mean_ret = np.mean(returns)
std_ret = np.std(returns, ddof=1)
if std_ret == 0:
return 0
# Annualize (assuming 365 trading days)
return (mean_ret / std_ret) * np.sqrt(365)
def _compute_sortino_ratio(self, daily_pnls: List[float]) -> float:
"""Compute annualized Sortino ratio."""
if len(daily_pnls) < 2:
return 0
returns = [p / self.initial_capital for p in daily_pnls]
mean_ret = np.mean(returns)
# Downside deviation (only negative returns)
downside_returns = [r for r in returns if r < 0]
if not downside_returns:
return float('inf')
downside_std = np.std(downside_returns, ddof=1)
if downside_std == 0:
return float('inf')
return (mean_ret / downside_std) * np.sqrt(365)
def _compute_max_consecutive_losses(self, trades: List[Dict]) -> int:
"""Compute maximum consecutive losing trades."""
max_consec = 0
current_consec = 0
for trade in trades:
if trade.get('pnl', 0) <= 0:
current_consec += 1
max_consec = max(max_consec, current_consec)
else:
current_consec = 0
return max_consec
def _compute_skewness(self, data: List[float]) -> float:
"""Compute skewness."""
if len(data) < 3:
return 0
n = len(data)
mean = np.mean(data)
std = np.std(data, ddof=1)
if std == 0:
return 0
skew = sum(((x - mean) / std) ** 3 for x in data) * n / ((n - 1) * (n - 2))
return skew
def _compute_kurtosis(self, data: List[float]) -> float:
"""Compute excess kurtosis."""
if len(data) < 4:
return 0
n = len(data)
mean = np.mean(data)
std = np.std(data, ddof=1)
if std == 0:
return 0
kurt = sum(((x - mean) / std) ** 4 for x in data) * n * (n + 1) / ((n - 1) * (n - 2) * (n - 3))
kurt -= 3 * (n - 1) ** 2 / ((n - 2) * (n - 3))
return kurt
def _linear_regression(self, x: np.ndarray, y: List[float]) -> Tuple[float, float]:
"""Simple linear regression. Returns (slope, r_squared)."""
if len(x) < 2:
return 0, 0
x_mean = np.mean(x)
y_mean = np.mean(y)
numerator = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, y))
denom_x = sum((xi - x_mean) ** 2 for xi in x)
denom_y = sum((yi - y_mean) ** 2 for yi in y)
if denom_x == 0:
return 0, 0
slope = numerator / denom_x
if denom_y == 0:
r_squared = 0
else:
r_squared = (numerator ** 2) / (denom_x * denom_y)
return slope, r_squared
def _compute_max_underwater_days(self, equity: List[float]) -> int:
"""Compute maximum consecutive days in drawdown."""
max_underwater = 0
current_underwater = 0
peak = equity[0]
for e in equity:
if e >= peak:
peak = e
current_underwater = 0
else:
current_underwater += 1
max_underwater = max(max_underwater, current_underwater)
return max_underwater
def _compute_max_daily_drawdown_pct(self, daily_pnls: List[float]) -> float:
"""Compute worst single-day drawdown percentage."""
if not daily_pnls:
return 0
equity = [self.initial_capital]
for pnl in daily_pnls:
equity.append(equity[-1] + pnl)
max_dd_pct = 0
for i in range(1, len(equity)):
prev_equity = equity[i-1]
if prev_equity > 0:
dd_pct = min(0, daily_pnls[i-1]) / prev_equity * 100
max_dd_pct = min(max_dd_pct, dd_pct)
return max_dd_pct
def test_metrics():
"""Quick test of metrics computation."""
from .mc_sampler import MCSampler
sampler = MCSampler()
config = sampler.generate_champion_trial()
# Create dummy data
trades = [
{'pnl': 100, 'pnl_pct': 0.004, 'exit_type': 'tp', 'bars_held': 50},
{'pnl': -50, 'pnl_pct': -0.002, 'exit_type': 'stop', 'bars_held': 20},
{'pnl': 150, 'pnl_pct': 0.006, 'exit_type': 'tp', 'bars_held': 80},
] * 20 # 60 trades
daily_pnls = [50, -20, 80, -10, 100, -30, 60, 40, -15, 90] * 5 # 50 days
date_stats = [{'date': f'2026-01-{i+1:02d}', 'pnl': daily_pnls[i]} for i in range(len(daily_pnls))]
signal_stats = {
'dc_skip_rate': 0.1,
'ob_skip_rate': 0.05,
'dc_confirm_rate': 0.7,
'irp_match_rate': 0.6,
'entry_attempt_rate': 0.3,
'signal_to_trade_rate': 0.15,
}
metrics = MCMetrics()
result = metrics.compute(config, trades, daily_pnls, date_stats, signal_stats)
print("Test Metrics Result:")
print(f" ROI: {result.roi_pct:.2f}%")
print(f" Profit Factor: {result.profit_factor:.2f}")
print(f" Win Rate: {result.win_rate:.2%}")
print(f" Sharpe: {result.sharpe_ratio:.2f}")
print(f" Max DD: {result.max_drawdown_pct:.2f}%")
print(f" Champion Region: {result.champion_region}")
return result
if __name__ == "__main__":
test_metrics()

505
nautilus_dolphin/mc/mc_ml.py Executable file
View File

@@ -0,0 +1,505 @@
"""
Monte Carlo ML Envelope Learning
================================
Train ML models on MC results for envelope boundary estimation and forewarning.
Models:
- Regression models for ROI, DD, PF, WR prediction
- Classification models for champion_region, catastrophic
- One-Class SVM for envelope boundary estimation
- SHAP for feature importance
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 9, 12
"""
import json
import pickle
from typing import Dict, List, Optional, Any, Tuple
from pathlib import Path
from dataclasses import dataclass
import numpy as np
# Try to import ML libraries
try:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
SKLEARN_AVAILABLE = True
except ImportError:
SKLEARN_AVAILABLE = False
print("[WARN] scikit-learn not available - ML training disabled")
try:
import xgboost as xgb
XGBOOST_AVAILABLE = True
except ImportError:
XGBOOST_AVAILABLE = False
try:
import shap
SHAP_AVAILABLE = True
except ImportError:
SHAP_AVAILABLE = False
from .mc_sampler import MCTrialConfig, MCSampler
from .mc_store import MCStore
@dataclass
class ForewarningReport:
"""Forewarning report for a configuration."""
config: Dict[str, Any]
predicted_roi: float
predicted_roi_p10: float
predicted_roi_p90: float
predicted_max_dd: float
champion_probability: float
catastrophic_probability: float
envelope_score: float
warnings: List[str]
nearest_champion: Optional[Dict[str, Any]]
parameter_risks: Dict[str, float]
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'config': self.config,
'predicted_roi': self.predicted_roi,
'predicted_roi_p10': self.predicted_roi_p10,
'predicted_roi_p90': self.predicted_roi_p90,
'predicted_max_dd': self.predicted_max_dd,
'champion_probability': self.champion_probability,
'catastrophic_probability': self.catastrophic_probability,
'envelope_score': self.envelope_score,
'warnings': self.warnings,
'nearest_champion': self.nearest_champion,
'parameter_risks': self.parameter_risks,
}
class MCML:
"""
Monte Carlo ML Envelope Learning.
Trains models on MC results and provides forewarning capabilities.
"""
def __init__(
self,
output_dir: str = "mc_results",
models_dir: Optional[str] = None
):
"""
Initialize ML trainer.
Parameters
----------
output_dir : str
MC results directory
models_dir : str, optional
Directory to save trained models
"""
self.output_dir = Path(output_dir)
self.models_dir = Path(models_dir) if models_dir else self.output_dir / "models"
self.models_dir.mkdir(parents=True, exist_ok=True)
self.store = MCStore(output_dir=output_dir)
# Models
self.models: Dict[str, Any] = {}
self.scalers: Dict[str, StandardScaler] = {}
self.feature_names: List[str] = []
self._init_feature_names()
def _init_feature_names(self):
"""Initialize feature names from parameter space."""
sampler = MCSampler()
self.feature_names = list(sampler.CHAMPION.keys())
def load_corpus(self) -> Optional[Any]:
"""Load full corpus from store."""
return self.store.load_corpus()
def train_all_models(self, test_size: float = 0.2) -> Dict[str, Any]:
"""
Train all ML models on the corpus.
Parameters
----------
test_size : float
Fraction of data for testing
Returns
-------
Dict[str, Any]
Training results and metrics
"""
if not SKLEARN_AVAILABLE:
raise RuntimeError("scikit-learn required for training")
print("="*70)
print("TRAINING ML MODELS")
print("="*70)
# Load corpus
print("\n[1/6] Loading corpus...")
df = self.load_corpus()
if df is None or len(df) == 0:
raise ValueError("No corpus data available")
print(f" Loaded {len(df)} trials")
# Prepare features
print("\n[2/6] Preparing features...")
X = self._extract_features(df)
# Train regression models
print("\n[3/6] Training regression models...")
self._train_regression_model(X, df, 'M_roi_pct', 'model_roi')
self._train_regression_model(X, df, 'M_max_drawdown_pct', 'model_dd')
self._train_regression_model(X, df, 'M_profit_factor', 'model_pf')
self._train_regression_model(X, df, 'M_win_rate', 'model_wr')
# Train classification models
print("\n[4/6] Training classification models...")
self._train_classification_model(X, df, 'L_champion_region', 'model_champ')
self._train_classification_model(X, df, 'L_catastrophic', 'model_catas')
self._train_classification_model(X, df, 'L_inert', 'model_inert')
self._train_classification_model(X, df, 'L_h2_degradation', 'model_h2deg')
# Train envelope model (One-Class SVM on champions)
print("\n[5/6] Training envelope boundary model...")
self._train_envelope_model(X, df)
# Save models
print("\n[6/6] Saving models...")
self._save_models()
print("\n[OK] All models trained and saved")
return {'status': 'success', 'n_samples': len(df)}
def _extract_features(self, df: Any) -> np.ndarray:
"""Extract feature matrix from DataFrame."""
# Get parameter columns
param_cols = [f'P_{name}' for name in self.feature_names if f'P_{name}' in df.columns]
# Extract and normalize
X = df[param_cols].values
# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
self.scalers['default'] = scaler
return X_scaled
def _train_regression_model(
self,
X: np.ndarray,
df: Any,
target_col: str,
model_name: str
):
"""Train a regression model."""
if target_col not in df.columns:
print(f" [SKIP] {model_name}: target column not found")
return
y = df[target_col].values
# Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Train
model = GradientBoostingRegressor(
n_estimators=100,
max_depth=5,
learning_rate=0.1,
random_state=42
)
model.fit(X_train, y_train)
# Evaluate
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f" {model_name}: R² train={train_score:.3f}, test={test_score:.3f}")
self.models[model_name] = model
def _train_classification_model(
self,
X: np.ndarray,
df: Any,
target_col: str,
model_name: str
):
"""Train a classification model."""
if target_col not in df.columns:
print(f" [SKIP] {model_name}: target column not found")
return
y = df[target_col].astype(int).values
# Check if we have both classes
if len(set(y)) < 2:
print(f" [SKIP] {model_name}: only one class present")
return
# Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Train with XGBoost if available, else RandomForest
if XGBOOST_AVAILABLE:
model = xgb.XGBClassifier(
n_estimators=100,
max_depth=5,
learning_rate=0.1,
random_state=42,
use_label_encoder=False,
eval_metric='logloss'
)
else:
model = RandomForestClassifier(
n_estimators=100,
max_depth=5,
random_state=42
)
model.fit(X_train, y_train)
# Evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f" {model_name}: accuracy={acc:.3f}")
self.models[model_name] = model
def _train_envelope_model(self, X: np.ndarray, df: Any):
"""Train One-Class SVM on champion region configurations."""
if 'L_champion_region' not in df.columns:
print(" [SKIP] envelope: champion_region column not found")
return
# Filter to champions
champion_mask = df['L_champion_region'].astype(bool)
X_champions = X[champion_mask]
if len(X_champions) < 100:
print(f" [SKIP] envelope: only {len(X_champions)} champions (need 100+)")
return
print(f" Training on {len(X_champions)} champion configurations")
# Train One-Class SVM
model = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
model.fit(X_champions)
self.models['envelope'] = model
print(f" Envelope model trained")
def _save_models(self):
"""Save all trained models."""
# Save models
for name, model in self.models.items():
path = self.models_dir / f"{name}.pkl"
with open(path, 'wb') as f:
pickle.dump(model, f)
# Save scalers
for name, scaler in self.scalers.items():
path = self.models_dir / f"scaler_{name}.pkl"
with open(path, 'wb') as f:
pickle.dump(scaler, f)
# Save feature names
with open(self.models_dir / "feature_names.json", 'w') as f:
json.dump(self.feature_names, f)
print(f" Saved {len(self.models)} models to {self.models_dir}")
def load_models(self):
"""Load trained models from disk."""
# Load feature names
with open(self.models_dir / "feature_names.json", 'r') as f:
self.feature_names = json.load(f)
# Load models — skip any that fail (e.g. XGBoost pickle when xgboost not installed)
model_files = list(self.models_dir.glob("*.pkl"))
for path in model_files:
if 'scaler_' in path.name:
continue
try:
with open(path, 'rb') as f:
self.models[path.stem] = pickle.load(f)
except Exception as e:
print(f" [WARN] Skipping {path.name}: {e}")
# Load scalers
for path in self.models_dir.glob("scaler_*.pkl"):
name = path.stem.replace('scaler_', '')
try:
with open(path, 'rb') as f:
self.scalers[name] = pickle.load(f)
except Exception as e:
print(f" [WARN] Skipping scaler {path.name}: {e}")
loaded = list(self.models.keys())
print(f"[OK] Loaded {len(loaded)} models: {loaded}")
def predict(self, config: MCTrialConfig) -> Dict[str, float]:
"""
Make predictions for a configuration.
Parameters
----------
config : MCTrialConfig
Configuration to predict
Returns
-------
Dict[str, float]
Predictions for all targets
"""
if not self.models:
self.load_models()
# Extract features
X = self._config_to_features(config)
predictions = {}
# Regression predictions
if 'model_roi' in self.models:
predictions['roi'] = self.models['model_roi'].predict(X)[0]
if 'model_dd' in self.models:
predictions['max_dd'] = self.models['model_dd'].predict(X)[0]
if 'model_pf' in self.models:
predictions['profit_factor'] = self.models['model_pf'].predict(X)[0]
if 'model_wr' in self.models:
predictions['win_rate'] = self.models['model_wr'].predict(X)[0]
# Classification predictions (probability of positive class)
if 'model_champ' in self.models:
if hasattr(self.models['model_champ'], 'predict_proba'):
predictions['champion_prob'] = self.models['model_champ'].predict_proba(X)[0, 1]
else:
predictions['champion_prob'] = float(self.models['model_champ'].predict(X)[0])
if 'model_catas' in self.models:
if hasattr(self.models['model_catas'], 'predict_proba'):
predictions['catastrophic_prob'] = self.models['model_catas'].predict_proba(X)[0, 1]
else:
predictions['catastrophic_prob'] = float(self.models['model_catas'].predict(X)[0])
# Envelope score
if 'envelope' in self.models:
predictions['envelope_score'] = self.models['envelope'].decision_function(X)[0]
return predictions
def _config_to_features(self, config: MCTrialConfig) -> np.ndarray:
"""Convert config to feature vector."""
features = []
for name in self.feature_names:
value = getattr(config, name, MCSampler.CHAMPION[name])
features.append(value)
X = np.array([features])
# Scale
if 'default' in self.scalers:
X = self.scalers['default'].transform(X)
return X
class DolphinForewarner:
"""
Live forewarning system for Dolphin configurations.
Provides risk assessment based on trained MC envelope model.
"""
def __init__(self, models_dir: str = "mc_results/models"):
"""
Initialize forewarner.
Parameters
----------
models_dir : str
Directory with trained models
"""
self.ml = MCML(models_dir=models_dir)
self.ml.load_models()
def assess(self, config: MCTrialConfig) -> ForewarningReport:
"""
Assess a configuration and return forewarning report.
Parameters
----------
config : MCTrialConfig
Configuration to assess
Returns
-------
ForewarningReport
Complete risk assessment
"""
# Get predictions
preds = self.ml.predict(config)
# Build warnings
warnings = []
if preds.get('catastrophic_prob', 0) > 0.10:
warnings.append(f"Catastrophic risk: {preds['catastrophic_prob']:.1%}")
if preds.get('envelope_score', 0) < 0:
warnings.append("Configuration outside safe operating envelope")
# Check parameter boundaries
if config.max_leverage > 6.0:
warnings.append(f"High leverage: {config.max_leverage:.1f}x")
if config.fraction * config.max_leverage > 1.5:
warnings.append(f"High notional exposure: {config.fraction * config.max_leverage:.2f}x")
# Create report
report = ForewarningReport(
config=config.to_dict(),
predicted_roi=preds.get('roi', 0),
predicted_roi_p10=preds.get('roi', 0) * 0.5, # Simplified
predicted_roi_p90=preds.get('roi', 0) * 1.5,
predicted_max_dd=preds.get('max_dd', 0),
champion_probability=preds.get('champion_prob', 0),
catastrophic_probability=preds.get('catastrophic_prob', 0),
envelope_score=preds.get('envelope_score', 0),
warnings=warnings,
nearest_champion=None, # Would require search
parameter_risks={}
)
return report
def assess_config_dict(self, config_dict: Dict[str, Any]) -> ForewarningReport:
"""Assess from a configuration dictionary."""
config = MCTrialConfig.from_dict(config_dict)
return self.assess(config)
if __name__ == "__main__":
# Test
print("MC ML module loaded")
print("Run training with: MCML().train_all_models()")

395
nautilus_dolphin/mc/mc_runner.py Executable file
View File

@@ -0,0 +1,395 @@
"""
Monte Carlo Runner
==================
Orchestration and parallel execution for MC envelope mapping.
Features:
- Parallel execution using multiprocessing
- Checkpointing and resume capability
- Batch processing
- Progress tracking
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 1, 5.4
"""
import time
import json
from typing import Dict, List, Optional, Any, Callable
from pathlib import Path
from datetime import datetime
import multiprocessing as mp
from functools import partial
from .mc_sampler import MCSampler, MCTrialConfig
from .mc_validator import MCValidator, ValidationResult
from .mc_executor import MCExecutor
from .mc_store import MCStore
from .mc_metrics import MCTrialResult
class MCRunner:
"""
Monte Carlo Runner.
Orchestrates the full MC envelope mapping pipeline:
1. Generate trial configurations
2. Validate configurations
3. Execute trials (parallel)
4. Store results
"""
def __init__(
self,
output_dir: str = "mc_results",
n_workers: int = -1,
batch_size: int = 1000,
base_seed: int = 42,
verbose: bool = True
):
"""
Initialize the runner.
Parameters
----------
output_dir : str
Directory for results
n_workers : int
Number of parallel workers (-1 for auto)
batch_size : int
Trials per batch
base_seed : int
Master RNG seed
verbose : bool
Print progress
"""
self.output_dir = Path(output_dir)
self.n_workers = n_workers if n_workers > 0 else max(1, mp.cpu_count() - 1)
self.batch_size = batch_size
self.base_seed = base_seed
self.verbose = verbose
# Components
self.sampler = MCSampler(base_seed=base_seed)
self.store = MCStore(output_dir=output_dir, batch_size=batch_size)
# State
self.completed_trials: set = set()
self.stats: Dict[str, Any] = {}
def generate_and_validate(
self,
n_samples_per_switch: int = 500,
max_trials: Optional[int] = None
) -> List[MCTrialConfig]:
"""
Generate and validate trial configurations.
Parameters
----------
n_samples_per_switch : int
Samples per switch vector
max_trials : int, optional
Maximum total trials
Returns
-------
List[MCTrialConfig]
Valid trial configurations
"""
print("="*70)
print("PHASE 1: GENERATE & VALIDATE CONFIGURATIONS")
print("="*70)
# Generate trials
print(f"\n[1/3] Generating trials (n_samples_per_switch={n_samples_per_switch})...")
all_configs = self.sampler.generate_trials(
n_samples_per_switch=n_samples_per_switch,
max_trials=max_trials
)
# Validate
print(f"\n[2/3] Validating {len(all_configs)} configurations...")
validator = MCValidator(verbose=False)
validation_results = validator.validate_batch(all_configs)
# Filter valid configs
valid_configs = [
config for config, result in zip(all_configs, validation_results)
if result.is_valid()
]
# Save validation results
self.store.save_validation_results(validation_results, batch_id=0)
# Stats
stats = validator.get_validity_stats(validation_results)
print(f"\n[3/3] Validation complete:")
print(f" Total: {stats['total']}")
print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
print(f" Rejected: {stats['total'] - stats['valid']}")
self.stats['validation'] = stats
return valid_configs
def run_envelope_mapping(
self,
n_samples_per_switch: int = 500,
max_trials: Optional[int] = None,
resume: bool = True
) -> Dict[str, Any]:
"""
Run full envelope mapping.
Parameters
----------
n_samples_per_switch : int
Samples per switch vector
max_trials : int, optional
Maximum total trials
resume : bool
Resume from existing results
Returns
-------
Dict[str, Any]
Run statistics
"""
start_time = time.time()
# Generate and validate
valid_configs = self.generate_and_validate(
n_samples_per_switch=n_samples_per_switch,
max_trials=max_trials
)
# Check for resume
if resume:
self._load_completed_trials()
valid_configs = [c for c in valid_configs if c.trial_id not in self.completed_trials]
print(f"\n[Resume] {len(self.completed_trials)} trials already completed")
print(f"[Resume] {len(valid_configs)} trials remaining")
if not valid_configs:
print("\n[OK] All trials already completed!")
return self._get_run_stats(start_time)
# Execute trials
print("\n" + "="*70)
print("PHASE 2: EXECUTE TRIALS")
print("="*70)
print(f"\nRunning {len(valid_configs)} trials with {self.n_workers} workers...")
# Split into batches
batches = self._split_into_batches(valid_configs)
print(f"Split into {len(batches)} batches (batch_size={self.batch_size})")
# Process batches
total_completed = 0
for batch_idx, batch_configs in enumerate(batches):
print(f"\n--- Batch {batch_idx+1}/{len(batches)} ({len(batch_configs)} trials) ---")
batch_start = time.time()
if self.n_workers > 1 and len(batch_configs) > 1:
# Parallel execution
results = self._execute_parallel(batch_configs)
else:
# Sequential execution
results = self._execute_sequential(batch_configs)
# Save results
self.store.save_trial_results(results, batch_id=batch_idx+1)
batch_time = time.time() - batch_start
total_completed += len(results)
print(f"Batch {batch_idx+1} complete in {batch_time:.1f}s "
f"({len(results)/batch_time:.1f} trials/sec)")
# Progress
progress = total_completed / len(valid_configs)
eta_seconds = (time.time() - start_time) / progress * (1 - progress) if progress > 0 else 0
print(f"Overall: {total_completed}/{len(valid_configs)} ({progress*100:.1f}%) "
f"ETA: {eta_seconds/60:.1f} min")
return self._get_run_stats(start_time)
def _split_into_batches(
self,
configs: List[MCTrialConfig]
) -> List[List[MCTrialConfig]]:
"""Split configurations into batches."""
batches = []
for i in range(0, len(configs), self.batch_size):
batches.append(configs[i:i+self.batch_size])
return batches
def _execute_sequential(
self,
configs: List[MCTrialConfig]
) -> List[MCTrialResult]:
"""Execute trials sequentially."""
executor = MCExecutor(verbose=self.verbose)
return executor.execute_batch(configs, progress_interval=max(1, len(configs)//10))
def _execute_parallel(
self,
configs: List[MCTrialConfig]
) -> List[MCTrialResult]:
"""Execute trials in parallel using multiprocessing."""
# Create worker function
worker = partial(_execute_trial_worker, initial_capital=25000.0)
# Run in pool
with mp.Pool(processes=self.n_workers) as pool:
results = pool.map(worker, configs)
return results
def _load_completed_trials(self):
"""Load IDs of already completed trials from index."""
entries = self.store.query_index(status='completed', limit=1000000)
self.completed_trials = {e['trial_id'] for e in entries}
def _get_run_stats(self, start_time: float) -> Dict[str, Any]:
"""Get final run statistics."""
total_time = time.time() - start_time
corpus_stats = self.store.get_corpus_stats()
stats = {
'total_time_sec': total_time,
'total_time_min': total_time / 60,
'total_time_hours': total_time / 3600,
**corpus_stats,
}
print("\n" + "="*70)
print("ENVELOPE MAPPING COMPLETE")
print("="*70)
print(f"\nTotal time: {total_time/3600:.2f} hours")
print(f"Total trials: {stats['total_trials']}")
print(f"Champion region: {stats['champion_count']}")
print(f"Catastrophic: {stats['catastrophic_count']}")
print(f"Avg ROI: {stats['avg_roi_pct']:.2f}%")
print(f"Avg Sharpe: {stats['avg_sharpe']:.2f}")
return stats
def generate_report(self, output_path: Optional[str] = None):
"""Generate a summary report."""
stats = self.store.get_corpus_stats()
report = f"""
# Monte Carlo Envelope Mapping Report
Generated: {datetime.now().isoformat()}
## Corpus Statistics
- Total trials: {stats['total_trials']}
- Champion region: {stats['champion_count']} ({stats['champion_count']/max(1,stats['total_trials'])*100:.1f}%)
- Catastrophic: {stats['catastrophic_count']} ({stats['catastrophic_count']/max(1,stats['total_trials'])*100:.1f}%)
## Performance Metrics
- Average ROI: {stats['avg_roi_pct']:.2f}%
- Min ROI: {stats['min_roi_pct']:.2f}%
- Max ROI: {stats['max_roi_pct']:.2f}%
- Average Sharpe: {stats['avg_sharpe']:.2f}
- Average Max DD: {stats['avg_max_dd_pct']:.2f}%
## Validation Summary
"""
if 'validation' in self.stats:
vstats = self.stats['validation']
report += f"""
- Total configs: {vstats['total']}
- Valid configs: {vstats['valid']} ({vstats['validity_rate']*100:.1f}%)
- Rejected V1 (range): {vstats.get('rejected_v1', 0)}
- Rejected V2 (constraints): {vstats.get('rejected_v2', 0)}
- Rejected V3 (cross-group): {vstats.get('rejected_v3', 0)}
- Rejected V4 (degenerate): {vstats.get('rejected_v4', 0)}
"""
if output_path:
with open(output_path, 'w') as f:
f.write(report)
print(f"\n[OK] Report saved: {output_path}")
return report
def _execute_trial_worker(
config: MCTrialConfig,
initial_capital: float = 25000.0
) -> MCTrialResult:
"""
Worker function for parallel execution.
Must be at module level for pickle serialization.
"""
executor = MCExecutor(initial_capital=initial_capital, verbose=False)
return executor.execute_trial(config, skip_validation=True)
def run_mc_envelope(
n_samples_per_switch: int = 100, # Reduced default for testing
max_trials: Optional[int] = None,
n_workers: int = -1,
output_dir: str = "mc_results",
resume: bool = True,
base_seed: int = 42
) -> Dict[str, Any]:
"""
Convenience function to run full MC envelope mapping.
Parameters
----------
n_samples_per_switch : int
Samples per switch vector
max_trials : int, optional
Maximum total trials
n_workers : int
Number of parallel workers (-1 for auto)
output_dir : str
Output directory
resume : bool
Resume from existing results
base_seed : int
Master RNG seed
Returns
-------
Dict[str, Any]
Run statistics
"""
runner = MCRunner(
output_dir=output_dir,
n_workers=n_workers,
base_seed=base_seed
)
stats = runner.run_envelope_mapping(
n_samples_per_switch=n_samples_per_switch,
max_trials=max_trials,
resume=resume
)
# Generate report
runner.generate_report(output_path=f"{output_dir}/envelope_report.md")
return stats
if __name__ == "__main__":
# Test run
stats = run_mc_envelope(
n_samples_per_switch=10,
max_trials=100,
n_workers=1,
output_dir="mc_results_test"
)
print("\nTest complete!")

534
nautilus_dolphin/mc/mc_sampler.py Executable file
View File

@@ -0,0 +1,534 @@
"""
Monte Carlo Parameter Sampler
=============================
Parameter space definition and Latin Hypercube Sampling (LHS) implementation.
This module defines the complete 33-parameter space across 7 sub-systems
and implements the two-phase sampling strategy:
1. Phase A: Switch grid (boolean combinations)
2. Phase B: LHS continuous sampling per switch-vector
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 2, 3
"""
import numpy as np
from typing import Dict, List, Optional, Tuple, NamedTuple, Any, Union
from dataclasses import dataclass, field
from enum import Enum
import json
from pathlib import Path
# Try to import scipy for LHS
try:
from scipy.stats import qmc
SCIPY_AVAILABLE = True
except ImportError:
SCIPY_AVAILABLE = False
class ParamType(Enum):
"""Parameter sampling types."""
CONTINUOUS = "continuous"
DISCRETE = "discrete"
CATEGORICAL = "categorical"
BOOLEAN = "boolean"
DERIVED = "derived"
FIXED = "fixed"
@dataclass
class ParameterDef:
"""Definition of a single parameter."""
id: str
name: str
champion: Any
param_type: ParamType
lo: Optional[float] = None
hi: Optional[float] = None
log_transform: bool = False
constraint_group: Optional[str] = None
depends_on: Optional[str] = None # For conditional parameters
categories: Optional[List[str]] = None # For CATEGORICAL
def __post_init__(self):
if self.param_type == ParamType.CATEGORICAL and self.categories is None:
raise ValueError(f"Categorical parameter {self.name} must have categories")
class MCTrialConfig(NamedTuple):
"""Complete parameter vector for a Monte Carlo trial."""
trial_id: int
# P1 Signal
vel_div_threshold: float
vel_div_extreme: float
use_direction_confirm: bool
dc_lookback_bars: int
dc_min_magnitude_bps: float
dc_skip_contradicts: bool
dc_leverage_boost: float
dc_leverage_reduce: float
vd_trend_lookback: int
# P2 Leverage
min_leverage: float
max_leverage: float
leverage_convexity: float
fraction: float
use_alpha_layers: bool
use_dynamic_leverage: bool
# P3 Exit
fixed_tp_pct: float
stop_pct: float
max_hold_bars: int
# P4 Fees
use_sp_fees: bool
use_sp_slippage: bool
sp_maker_entry_rate: float
sp_maker_exit_rate: float
# P5 OB
use_ob_edge: bool
ob_edge_bps: float
ob_confirm_rate: float
ob_imbalance_bias: float
ob_depth_scale: float
# P6 Asset Selection
use_asset_selection: bool
min_irp_alignment: float
lookback: int
# P7 ACB
acb_beta_high: float
acb_beta_low: float
acb_w750_threshold_pct: int
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'trial_id': self.trial_id,
'vel_div_threshold': self.vel_div_threshold,
'vel_div_extreme': self.vel_div_extreme,
'use_direction_confirm': self.use_direction_confirm,
'dc_lookback_bars': self.dc_lookback_bars,
'dc_min_magnitude_bps': self.dc_min_magnitude_bps,
'dc_skip_contradicts': self.dc_skip_contradicts,
'dc_leverage_boost': self.dc_leverage_boost,
'dc_leverage_reduce': self.dc_leverage_reduce,
'vd_trend_lookback': self.vd_trend_lookback,
'min_leverage': self.min_leverage,
'max_leverage': self.max_leverage,
'leverage_convexity': self.leverage_convexity,
'fraction': self.fraction,
'use_alpha_layers': self.use_alpha_layers,
'use_dynamic_leverage': self.use_dynamic_leverage,
'fixed_tp_pct': self.fixed_tp_pct,
'stop_pct': self.stop_pct,
'max_hold_bars': self.max_hold_bars,
'use_sp_fees': self.use_sp_fees,
'use_sp_slippage': self.use_sp_slippage,
'sp_maker_entry_rate': self.sp_maker_entry_rate,
'sp_maker_exit_rate': self.sp_maker_exit_rate,
'use_ob_edge': self.use_ob_edge,
'ob_edge_bps': self.ob_edge_bps,
'ob_confirm_rate': self.ob_confirm_rate,
'ob_imbalance_bias': self.ob_imbalance_bias,
'ob_depth_scale': self.ob_depth_scale,
'use_asset_selection': self.use_asset_selection,
'min_irp_alignment': self.min_irp_alignment,
'lookback': self.lookback,
'acb_beta_high': self.acb_beta_high,
'acb_beta_low': self.acb_beta_low,
'acb_w750_threshold_pct': self.acb_w750_threshold_pct,
}
@classmethod
def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialConfig':
"""Create from dictionary."""
# Filter to only valid fields
valid_fields = cls._fields
filtered = {k: v for k, v in d.items() if k in valid_fields}
return cls(**filtered)
class MCSampler:
"""
Monte Carlo Parameter Sampler.
Implements two-phase sampling:
1. Phase A: Enumerate all boolean switch combinations
2. Phase B: LHS continuous sampling per switch-vector
"""
# Champion configuration (baseline)
CHAMPION = {
'vel_div_threshold': -0.020,
'vel_div_extreme': -0.050,
'use_direction_confirm': True,
'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75,
'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00,
'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10,
'min_leverage': 0.50,
'max_leverage': 5.00,
'leverage_convexity': 3.00,
'fraction': 0.20,
'use_alpha_layers': True,
'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0099,
'stop_pct': 1.00,
'max_hold_bars': 120,
'use_sp_fees': True,
'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62,
'sp_maker_exit_rate': 0.50,
'use_ob_edge': True,
'ob_edge_bps': 5.00,
'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09,
'ob_depth_scale': 1.00,
'use_asset_selection': True,
'min_irp_alignment': 0.45,
'lookback': 100,
'acb_beta_high': 0.80,
'acb_beta_low': 0.20,
'acb_w750_threshold_pct': 60,
}
# Parameter definitions
PARAMS = {
# P1 Signal Generator
'vel_div_threshold': ParameterDef('P1.01', 'vel_div_threshold', -0.020, ParamType.CONTINUOUS, -0.040, -0.008, False, 'CG-VD'),
'vel_div_extreme': ParameterDef('P1.02', 'vel_div_extreme', -0.050, ParamType.CONTINUOUS, -0.120, None, False, 'CG-VD'), # hi depends on threshold
'use_direction_confirm': ParameterDef('P1.03', 'use_direction_confirm', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
'dc_lookback_bars': ParameterDef('P1.04', 'dc_lookback_bars', 7, ParamType.DISCRETE, 3, 25, False, 'CG-DC'),
'dc_min_magnitude_bps': ParameterDef('P1.05', 'dc_min_magnitude_bps', 0.75, ParamType.CONTINUOUS, 0.20, 3.00, False, 'CG-DC'),
'dc_skip_contradicts': ParameterDef('P1.06', 'dc_skip_contradicts', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
'dc_leverage_boost': ParameterDef('P1.07', 'dc_leverage_boost', 1.00, ParamType.CONTINUOUS, 1.00, 1.50, False, 'CG-DC-LEV'),
'dc_leverage_reduce': ParameterDef('P1.08', 'dc_leverage_reduce', 0.50, ParamType.CONTINUOUS, 0.25, 0.90, False, 'CG-DC-LEV'),
'vd_trend_lookback': ParameterDef('P1.09', 'vd_trend_lookback', 10, ParamType.DISCRETE, 5, 30, False),
# P2 Leverage
'min_leverage': ParameterDef('P2.01', 'min_leverage', 0.50, ParamType.CONTINUOUS, 0.10, 1.50, False, 'CG-LEV'),
'max_leverage': ParameterDef('P2.02', 'max_leverage', 5.00, ParamType.CONTINUOUS, 1.50, 12.00, False, 'CG-LEV'),
'leverage_convexity': ParameterDef('P2.03', 'leverage_convexity', 3.00, ParamType.CONTINUOUS, 0.75, 6.00, False),
'fraction': ParameterDef('P2.04', 'fraction', 0.20, ParamType.CONTINUOUS, 0.05, 0.40, False, 'CG-RISK'),
'use_alpha_layers': ParameterDef('P2.05', 'use_alpha_layers', True, ParamType.BOOLEAN),
'use_dynamic_leverage': ParameterDef('P2.06', 'use_dynamic_leverage', True, ParamType.BOOLEAN, constraint_group='CG-DYNLEV'),
# P3 Exit
'fixed_tp_pct': ParameterDef('P3.01', 'fixed_tp_pct', 0.0099, ParamType.CONTINUOUS, 0.0030, 0.0300, True, 'CG-EXIT'),
'stop_pct': ParameterDef('P3.02', 'stop_pct', 1.00, ParamType.CONTINUOUS, 0.20, 5.00, True, 'CG-EXIT'),
'max_hold_bars': ParameterDef('P3.03', 'max_hold_bars', 120, ParamType.DISCRETE, 20, 600, False, 'CG-EXIT'),
# P4 Fees
'use_sp_fees': ParameterDef('P4.01', 'use_sp_fees', True, ParamType.BOOLEAN),
'use_sp_slippage': ParameterDef('P4.02', 'use_sp_slippage', True, ParamType.BOOLEAN, constraint_group='CG-SP'),
'sp_maker_entry_rate': ParameterDef('P4.03', 'sp_maker_entry_rate', 0.62, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
'sp_maker_exit_rate': ParameterDef('P4.04', 'sp_maker_exit_rate', 0.50, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
# P5 OB Intelligence
'use_ob_edge': ParameterDef('P5.01', 'use_ob_edge', True, ParamType.BOOLEAN, constraint_group='CG-OB'),
'ob_edge_bps': ParameterDef('P5.02', 'ob_edge_bps', 5.00, ParamType.CONTINUOUS, 1.00, 20.00, True, 'CG-OB'),
'ob_confirm_rate': ParameterDef('P5.03', 'ob_confirm_rate', 0.40, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-OB'),
'ob_imbalance_bias': ParameterDef('P5.04', 'ob_imbalance_bias', -0.09, ParamType.CONTINUOUS, -0.25, 0.15, False, 'CG-OB-SIG'),
'ob_depth_scale': ParameterDef('P5.05', 'ob_depth_scale', 1.00, ParamType.CONTINUOUS, 0.30, 2.00, True, 'CG-OB-SIG'),
# P6 Asset Selection
'use_asset_selection': ParameterDef('P6.01', 'use_asset_selection', True, ParamType.BOOLEAN, constraint_group='CG-IRP'),
'min_irp_alignment': ParameterDef('P6.02', 'min_irp_alignment', 0.45, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-IRP'),
'lookback': ParameterDef('P6.03', 'lookback', 100, ParamType.DISCRETE, 30, 300, False, 'CG-IRP'),
# P7 ACB
'acb_beta_high': ParameterDef('P7.01', 'acb_beta_high', 0.80, ParamType.CONTINUOUS, 0.40, 1.50, False, 'CG-ACB'),
'acb_beta_low': ParameterDef('P7.02', 'acb_beta_low', 0.20, ParamType.CONTINUOUS, 0.00, 0.60, False, 'CG-ACB'),
'acb_w750_threshold_pct': ParameterDef('P7.03', 'acb_w750_threshold_pct', 60, ParamType.DISCRETE, 20, 80, False),
}
# Boolean parameters for switch grid
BOOLEAN_PARAMS = [
'use_direction_confirm',
'dc_skip_contradicts',
'use_alpha_layers',
'use_dynamic_leverage',
'use_sp_fees',
'use_sp_slippage',
'use_ob_edge',
'use_asset_selection',
]
# Parameters that become FIXED when their parent switch is False
CONDITIONAL_PARAMS = {
'use_direction_confirm': ['dc_lookback_bars', 'dc_min_magnitude_bps', 'dc_skip_contradicts', 'dc_leverage_boost', 'dc_leverage_reduce'],
'use_sp_slippage': ['sp_maker_entry_rate', 'sp_maker_exit_rate'],
'use_ob_edge': ['ob_edge_bps', 'ob_confirm_rate'],
'use_asset_selection': ['min_irp_alignment', 'lookback'],
}
def __init__(self, base_seed: int = 42):
"""
Initialize the sampler.
Parameters
----------
base_seed : int
Master RNG seed for reproducibility
"""
self.base_seed = base_seed
self.rng = np.random.RandomState(base_seed)
def generate_switch_vectors(self) -> List[Dict[str, Any]]:
"""
Phase A: Generate all unique boolean switch combinations.
After canonicalisation (collapsing equivalent configs),
returns approximately 64-96 unique switch vectors.
Returns
-------
List[Dict[str, Any]]
List of switch vectors (boolean parameter assignments)
"""
n_bool = len(self.BOOLEAN_PARAMS)
n_combinations = 2 ** n_bool
switch_vectors = []
seen_canonical = set()
for i in range(n_combinations):
# Decode integer to boolean switches
switches = {}
for j, param_name in enumerate(self.BOOLEAN_PARAMS):
switches[param_name] = bool((i >> j) & 1)
# Create canonical form (conditional params fixed to champion when parent is False)
canonical = self._canonicalize_switch_vector(switches)
canonical_key = tuple(sorted((k, v) for k, v in canonical.items() if isinstance(v, bool)))
if canonical_key not in seen_canonical:
seen_canonical.add(canonical_key)
switch_vectors.append(canonical)
return switch_vectors
def _canonicalize_switch_vector(self, switches: Dict[str, bool]) -> Dict[str, Any]:
"""
Convert a raw switch vector to canonical form.
When a parent switch is False, its conditional parameters
are set to FIXED champion values.
"""
canonical = dict(switches)
for parent, children in self.CONDITIONAL_PARAMS.items():
if not switches.get(parent, False):
# Parent is disabled - fix children to champion
for child in children:
canonical[child] = self.CHAMPION[child]
return canonical
def get_free_continuous_params(self, switch_vector: Dict[str, Any]) -> List[str]:
"""
Get list of continuous/discrete parameters that are NOT fixed
by the switch vector.
"""
free_params = []
for name, pdef in self.PARAMS.items():
if pdef.param_type in (ParamType.CONTINUOUS, ParamType.DISCRETE):
# Check if this param is fixed by any switch
is_fixed = False
for parent, children in self.CONDITIONAL_PARAMS.items():
if name in children and not switch_vector.get(parent, True):
is_fixed = True
break
if not is_fixed:
free_params.append(name)
return free_params
def sample_continuous_params(
self,
switch_vector: Dict[str, Any],
n_samples: int,
seed: int
) -> List[Dict[str, Any]]:
"""
Phase B: Generate n LHS samples for continuous/discrete parameters.
Parameters
----------
switch_vector : dict
Fixed boolean parameters
n_samples : int
Number of samples to generate
seed : int
RNG seed for this batch
Returns
-------
List[Dict[str, Any]]
List of complete parameter dicts (switch + continuous)
"""
free_params = self.get_free_continuous_params(switch_vector)
n_free = len(free_params)
if n_free == 0:
# No free parameters - just return the switch vector
return [dict(switch_vector)]
# Generate LHS samples in unit hypercube
if SCIPY_AVAILABLE:
sampler = qmc.LatinHypercube(d=n_free, seed=seed)
unit_samples = sampler.random(n=n_samples)
else:
# Fallback: random sampling with warning
print(f"[WARN] scipy not available, using random sampling instead of LHS")
rng = np.random.RandomState(seed)
unit_samples = rng.rand(n_samples, n_free)
# Scale to parameter ranges
samples = []
for i in range(n_samples):
sample = dict(switch_vector)
for j, param_name in enumerate(free_params):
pdef = self.PARAMS[param_name]
u = unit_samples[i, j]
# Handle dependent bounds
lo = pdef.lo
hi = pdef.hi
if hi is None:
# Compute dependent bound
if param_name == 'vel_div_extreme':
hi = sample['vel_div_threshold'] * 1.5
if pdef.param_type == ParamType.CONTINUOUS:
if pdef.log_transform:
# Log-space sampling: value = lo * (hi/lo) ** u
value = lo * (hi / lo) ** u
else:
# Linear sampling
value = lo + u * (hi - lo)
elif pdef.param_type == ParamType.DISCRETE:
# Discrete sampling
value = int(round(lo + u * (hi - lo)))
value = max(int(lo), min(int(hi), value))
else:
value = pdef.champion
sample[param_name] = value
samples.append(sample)
return samples
def generate_trials(
self,
n_samples_per_switch: int = 500,
max_trials: Optional[int] = None
) -> List[MCTrialConfig]:
"""
Generate all MC trial configurations.
Parameters
----------
n_samples_per_switch : int
Samples per unique switch vector
max_trials : int, optional
Maximum total trials (for testing)
Returns
-------
List[MCTrialConfig]
All trial configurations
"""
switch_vectors = self.generate_switch_vectors()
print(f"[INFO] Generated {len(switch_vectors)} unique switch vectors")
trials = []
trial_id = 0
for switch_idx, switch_vector in enumerate(switch_vectors):
# Generate seed for this switch vector
switch_seed = (self.base_seed * 1000003 + switch_idx) % 2**31
# Generate continuous samples
samples = self.sample_continuous_params(
switch_vector, n_samples_per_switch, switch_seed
)
for sample in samples:
if max_trials and trial_id >= max_trials:
break
# Fill in any missing parameters with champion values
full_params = dict(self.CHAMPION)
full_params.update(sample)
full_params['trial_id'] = trial_id
# Create trial config
try:
config = MCTrialConfig(**full_params)
trials.append(config)
trial_id += 1
except Exception as e:
print(f"[WARN] Failed to create trial {trial_id}: {e}")
if max_trials and trial_id >= max_trials:
break
print(f"[INFO] Generated {len(trials)} total trial configurations")
return trials
def generate_champion_trial(self) -> MCTrialConfig:
"""Generate the champion configuration as a single trial."""
params = dict(self.CHAMPION)
params['trial_id'] = -1 # Special ID for champion
return MCTrialConfig(**params)
def save_trials(self, trials: List[MCTrialConfig], path: Union[str, Path]):
"""Save trials to JSON."""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
data = [t.to_dict() for t in trials]
with open(path, 'w') as f:
json.dump(data, f, indent=2)
print(f"[OK] Saved {len(trials)} trials to {path}")
def load_trials(self, path: Union[str, Path]) -> List[MCTrialConfig]:
"""Load trials from JSON."""
with open(path, 'r') as f:
data = json.load(f)
trials = [MCTrialConfig.from_dict(d) for d in data]
print(f"[OK] Loaded {len(trials)} trials from {path}")
return trials
def test_sampler():
"""Quick test of the sampler."""
sampler = MCSampler(base_seed=42)
# Test switch vector generation
switches = sampler.generate_switch_vectors()
print(f"Unique switch vectors: {len(switches)}")
# Test trial generation (small)
trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100)
print(f"Generated trials: {len(trials)}")
# Check parameter ranges
for trial in trials[:5]:
print(f"Trial {trial.trial_id}: vel_div_threshold={trial.vel_div_threshold:.4f}, "
f"max_leverage={trial.max_leverage:.2f}, use_direction_confirm={trial.use_direction_confirm}")
return trials
if __name__ == "__main__":
test_sampler()

327
nautilus_dolphin/mc/mc_store.py Executable file
View File

@@ -0,0 +1,327 @@
"""
Monte Carlo Result Store
========================
Persistence layer for MC trial results.
Supports:
- Parquet files for bulk data storage
- SQLite index for fast querying
- Incremental/resumable runs
- Batch organization
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 8
"""
import json
import sqlite3
from pathlib import Path
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
import numpy as np
# Try to import pandas/pyarrow
try:
import pandas as pd
PANDAS_AVAILABLE = True
except ImportError:
PANDAS_AVAILABLE = False
print("[WARN] pandas not available - Parquet storage disabled")
from .mc_metrics import MCTrialResult
from .mc_validator import ValidationResult
class MCStore:
"""
Monte Carlo Result Store.
Manages persistence of trial configurations, results, and indices.
"""
def __init__(
self,
output_dir: Union[str, Path] = "mc_results",
batch_size: int = 1000
):
"""
Initialize the store.
Parameters
----------
output_dir : str or Path
Directory for all MC results
batch_size : int
Number of trials per batch file
"""
self.output_dir = Path(output_dir)
self.batch_size = batch_size
# Create directory structure
self.manifests_dir = self.output_dir / "manifests"
self.results_dir = self.output_dir / "results"
self.models_dir = self.output_dir / "models"
self.manifests_dir.mkdir(parents=True, exist_ok=True)
self.results_dir.mkdir(parents=True, exist_ok=True)
self.models_dir.mkdir(parents=True, exist_ok=True)
# SQLite index
self.index_path = self.output_dir / "mc_index.sqlite"
self._init_index()
self.current_batch = self._get_latest_batch() + 1
def _init_index(self):
"""Initialize SQLite index."""
conn = sqlite3.connect(self.index_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS mc_index (
trial_id INTEGER PRIMARY KEY,
batch_id INTEGER,
status TEXT,
roi_pct REAL,
profit_factor REAL,
win_rate REAL,
max_dd_pct REAL,
sharpe REAL,
n_trades INTEGER,
champion_region INTEGER,
catastrophic INTEGER,
created_at INTEGER
)
''')
# Create indices
cursor.execute('CREATE INDEX IF NOT EXISTS idx_roi ON mc_index (roi_pct)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_champion ON mc_index (champion_region)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_catastrophic ON mc_index (catastrophic)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_batch ON mc_index (batch_id)')
conn.commit()
conn.close()
def _get_latest_batch(self) -> int:
"""Get the highest batch ID in the index."""
conn = sqlite3.connect(self.index_path)
cursor = conn.cursor()
cursor.execute('SELECT MAX(batch_id) FROM mc_index')
result = cursor.fetchone()
conn.close()
return result[0] if result and result[0] else 0
def save_validation_results(self, results: List[ValidationResult], batch_id: int):
"""Save validation results to manifest."""
manifest_path = self.manifests_dir / f"batch_{batch_id:04d}_validation.json"
data = [r.to_dict() for r in results]
with open(manifest_path, 'w') as f:
json.dump(data, f, indent=2)
print(f"[OK] Saved validation manifest: {manifest_path}")
def save_trial_results(
self,
results: List[MCTrialResult],
batch_id: Optional[int] = None
):
"""
Save trial results to Parquet and update index.
Parameters
----------
results : List[MCTrialResult]
Trial results to save
batch_id : int, optional
Batch ID (auto-incremented if not provided)
"""
if batch_id is None:
batch_id = self.current_batch
self.current_batch += 1
if not results:
return
# Save to Parquet
if PANDAS_AVAILABLE:
self._save_parquet(results, batch_id)
# Update SQLite index
self._update_index(results, batch_id)
print(f"[OK] Saved batch {batch_id}: {len(results)} trials")
def _save_parquet(self, results: List[MCTrialResult], batch_id: int):
"""Save results to Parquet file."""
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
# Convert to DataFrame
data = [r.to_dict() for r in results]
df = pd.DataFrame(data)
# Save
df.to_parquet(parquet_path, index=False, compression='zstd')
def _update_index(self, results: List[MCTrialResult], batch_id: int):
"""Update SQLite index with result summaries."""
conn = sqlite3.connect(self.index_path)
cursor = conn.cursor()
timestamp = int(datetime.now().timestamp())
for r in results:
cursor.execute('''
INSERT OR REPLACE INTO mc_index
(trial_id, batch_id, status, roi_pct, profit_factor, win_rate,
max_dd_pct, sharpe, n_trades, champion_region, catastrophic, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
r.trial_id,
batch_id,
r.status,
r.roi_pct,
r.profit_factor,
r.win_rate,
r.max_drawdown_pct,
r.sharpe_ratio,
r.n_trades,
int(r.champion_region),
int(r.catastrophic),
timestamp
))
conn.commit()
conn.close()
def query_index(
self,
status: Optional[str] = None,
min_roi: Optional[float] = None,
champion_only: bool = False,
catastrophic_only: bool = False,
limit: int = 1000
) -> List[Dict[str, Any]]:
"""
Query the SQLite index.
Parameters
----------
status : str, optional
Filter by status
min_roi : float, optional
Minimum ROI percentage
champion_only : bool
Only champion region configs
catastrophic_only : bool
Only catastrophic configs
limit : int
Maximum results
Returns
-------
List[Dict]
Matching index entries
"""
conn = sqlite3.connect(self.index_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
query = 'SELECT * FROM mc_index WHERE 1=1'
params = []
if status:
query += ' AND status = ?'
params.append(status)
if min_roi is not None:
query += ' AND roi_pct >= ?'
params.append(min_roi)
if champion_only:
query += ' AND champion_region = 1'
if catastrophic_only:
query += ' AND catastrophic = 1'
query += ' ORDER BY roi_pct DESC LIMIT ?'
params.append(limit)
cursor.execute(query, params)
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
def get_corpus_stats(self) -> Dict[str, Any]:
"""Get statistics about the stored corpus."""
conn = sqlite3.connect(self.index_path)
cursor = conn.cursor()
# Total trials
cursor.execute('SELECT COUNT(*) FROM mc_index')
total = cursor.fetchone()[0]
# By status
cursor.execute('SELECT status, COUNT(*) FROM mc_index GROUP BY status')
by_status = {row[0]: row[1] for row in cursor.fetchall()}
# Champion region
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE champion_region = 1')
champion_count = cursor.fetchone()[0]
# Catastrophic
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE catastrophic = 1')
catastrophic_count = cursor.fetchone()[0]
# ROI stats
cursor.execute('''
SELECT AVG(roi_pct), MIN(roi_pct), MAX(roi_pct),
AVG(sharpe), AVG(max_dd_pct)
FROM mc_index WHERE status = 'completed'
''')
roi_stats = cursor.fetchone()
conn.close()
return {
'total_trials': total,
'by_status': by_status,
'champion_count': champion_count,
'catastrophic_count': catastrophic_count,
'avg_roi_pct': roi_stats[0] if roi_stats else 0,
'min_roi_pct': roi_stats[1] if roi_stats else 0,
'max_roi_pct': roi_stats[2] if roi_stats else 0,
'avg_sharpe': roi_stats[3] if roi_stats else 0,
'avg_max_dd_pct': roi_stats[4] if roi_stats else 0,
}
def load_batch(self, batch_id: int) -> Optional[pd.DataFrame]:
"""Load a batch of results from Parquet."""
if not PANDAS_AVAILABLE:
return None
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
if not parquet_path.exists():
return None
return pd.read_parquet(parquet_path)
def load_corpus(self) -> Optional[pd.DataFrame]:
"""Load entire corpus from all batches."""
if not PANDAS_AVAILABLE:
return None
batches = []
for parquet_file in sorted(self.results_dir.glob("batch_*_results.parquet")):
df = pd.read_parquet(parquet_file)
batches.append(df)
if not batches:
return None
return pd.concat(batches, ignore_index=True)

View File

@@ -0,0 +1,547 @@
"""
Monte Carlo Configuration Validator
===================================
Internal consistency validation for all constraint groups V1-V4.
Validation Pipeline:
V1: Range check - each param within declared [lo, hi]
V2: Constraint groups - CG-VD, CG-LEV, CG-EXIT, CG-RISK, CG-ACB, etc.
V3: Cross-group check - inter-subsystem coherence
V4: Degenerate check - would produce 0 trades or infinite leverage
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 4
"""
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from enum import Enum
import numpy as np
from .mc_sampler import MCTrialConfig, MCSampler
class ValidationStatus(Enum):
"""Validation result status."""
VALID = "VALID"
REJECTED_V1 = "REJECTED_V1" # Range check failed
REJECTED_V2 = "REJECTED_V2" # Constraint group failed
REJECTED_V3 = "REJECTED_V3" # Cross-group check failed
REJECTED_V4 = "REJECTED_V4" # Degenerate configuration
@dataclass
class ValidationResult:
"""Result of validation."""
status: ValidationStatus
trial_id: int
reject_reason: Optional[str] = None
warnings: List[str] = None
def __post_init__(self):
if self.warnings is None:
self.warnings = []
def is_valid(self) -> bool:
"""Check if configuration is valid."""
return self.status == ValidationStatus.VALID
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'status': self.status.value,
'trial_id': self.trial_id,
'reject_reason': self.reject_reason,
'warnings': self.warnings,
}
class MCValidator:
"""
Monte Carlo Configuration Validator.
Implements the full V1-V4 validation pipeline.
"""
def __init__(self, verbose: bool = False):
"""
Initialize validator.
Parameters
----------
verbose : bool
Print detailed validation messages
"""
self.verbose = verbose
self.sampler = MCSampler()
def validate(self, config: MCTrialConfig) -> ValidationResult:
"""
Run full validation pipeline on a configuration.
Parameters
----------
config : MCTrialConfig
Configuration to validate
Returns
-------
ValidationResult
Validation result with status and details
"""
warnings = []
# V1: Range checks
v1_passed, v1_reason = self._validate_v1_ranges(config)
if not v1_passed:
return ValidationResult(
status=ValidationStatus.REJECTED_V1,
trial_id=config.trial_id,
reject_reason=v1_reason,
warnings=warnings
)
# V2: Constraint group rules
v2_passed, v2_reason = self._validate_v2_constraint_groups(config)
if not v2_passed:
return ValidationResult(
status=ValidationStatus.REJECTED_V2,
trial_id=config.trial_id,
reject_reason=v2_reason,
warnings=warnings
)
# V3: Cross-group checks
v3_passed, v3_reason, v3_warnings = self._validate_v3_cross_group(config)
warnings.extend(v3_warnings)
if not v3_passed:
return ValidationResult(
status=ValidationStatus.REJECTED_V3,
trial_id=config.trial_id,
reject_reason=v3_reason,
warnings=warnings
)
# V4: Degenerate check (lightweight - no actual backtest)
v4_passed, v4_reason = self._validate_v4_degenerate(config)
if not v4_passed:
return ValidationResult(
status=ValidationStatus.REJECTED_V4,
trial_id=config.trial_id,
reject_reason=v4_reason,
warnings=warnings
)
return ValidationResult(
status=ValidationStatus.VALID,
trial_id=config.trial_id,
reject_reason=None,
warnings=warnings
)
def _validate_v1_ranges(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
"""
V1: Range checks - each param within declared [lo, hi].
"""
params = config._asdict()
for name, pdef in self.sampler.PARAMS.items():
if pdef.param_type.value in ('derived', 'fixed'):
continue
value = params.get(name)
if value is None:
return False, f"Missing parameter: {name}"
# Check lower bound
if pdef.lo is not None and value < pdef.lo:
return False, f"{name}={value} below minimum {pdef.lo}"
# Check upper bound (handle dependent bounds)
hi = pdef.hi
if hi is None and name == 'vel_div_extreme':
hi = params.get('vel_div_threshold', -0.02) * 1.5
if hi is not None and value > hi:
return False, f"{name}={value} above maximum {hi}"
return True, None
def _validate_v2_constraint_groups(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
"""
V2: Constraint group rules.
"""
# CG-VD: Velocity Divergence thresholds
if not self._check_cg_vd(config):
return False, "CG-VD: Velocity divergence constraints violated"
# CG-LEV: Leverage bounds
if not self._check_cg_lev(config):
return False, "CG-LEV: Leverage constraints violated"
# CG-EXIT: Exit management
if not self._check_cg_exit(config):
return False, "CG-EXIT: Exit constraints violated"
# CG-RISK: Combined risk
if not self._check_cg_risk(config):
return False, "CG-RISK: Risk cap exceeded"
# CG-DC-LEV: DC leverage adjustments
if not self._check_cg_dc_lev(config):
return False, "CG-DC-LEV: DC leverage adjustment constraints violated"
# CG-ACB: ACB beta bounds
if not self._check_cg_acb(config):
return False, "CG-ACB: ACB beta constraints violated"
# CG-SP: SmartPlacer rates
if not self._check_cg_sp(config):
return False, "CG-SP: SmartPlacer rate constraints violated"
# CG-OB-SIG: OB signal constraints
if not self._check_cg_ob_sig(config):
return False, "CG-OB-SIG: OB signal constraints violated"
return True, None
def _check_cg_vd(self, config: MCTrialConfig) -> bool:
"""CG-VD: Velocity Divergence constraints."""
# extreme < threshold (both negative; extreme is more negative)
if config.vel_div_extreme >= config.vel_div_threshold:
if self.verbose:
print(f" CG-VD fail: extreme={config.vel_div_extreme} >= threshold={config.vel_div_threshold}")
return False
# extreme >= -0.15 (below this, no bars fire at all)
if config.vel_div_extreme < -0.15:
if self.verbose:
print(f" CG-VD fail: extreme={config.vel_div_extreme} < -0.15")
return False
# threshold <= -0.005 (above this, too many spurious entries)
if config.vel_div_threshold > -0.005:
if self.verbose:
print(f" CG-VD fail: threshold={config.vel_div_threshold} > -0.005")
return False
# abs(extreme / threshold) >= 1.5 (meaningful separation)
separation = abs(config.vel_div_extreme / config.vel_div_threshold)
if separation < 1.5:
if self.verbose:
print(f" CG-VD fail: separation={separation:.2f} < 1.5")
return False
return True
def _check_cg_lev(self, config: MCTrialConfig) -> bool:
"""CG-LEV: Leverage bounds."""
# min_leverage < max_leverage
if config.min_leverage >= config.max_leverage:
if self.verbose:
print(f" CG-LEV fail: min={config.min_leverage} >= max={config.max_leverage}")
return False
# max_leverage - min_leverage >= 1.0 (meaningful range)
if config.max_leverage - config.min_leverage < 1.0:
if self.verbose:
print(f" CG-LEV fail: range={config.max_leverage - config.min_leverage:.2f} < 1.0")
return False
# max_leverage * fraction <= 2.0 (notional-capital safety cap)
notional_cap = config.max_leverage * config.fraction
if notional_cap > 2.0:
if self.verbose:
print(f" CG-LEV fail: notional_cap={notional_cap:.2f} > 2.0")
return False
return True
def _check_cg_exit(self, config: MCTrialConfig) -> bool:
"""CG-EXIT: Exit management constraints."""
tp_decimal = config.fixed_tp_pct
sl_decimal = config.stop_pct / 100.0 # Convert from percentage to decimal
# TP must be achievable before SL
if tp_decimal > sl_decimal * 5.0:
if self.verbose:
print(f" CG-EXIT fail: TP={tp_decimal:.4f} > SL*5={sl_decimal*5:.4f}")
return False
# minimum 30 bps TP
if tp_decimal < 0.0030:
if self.verbose:
print(f" CG-EXIT fail: TP={tp_decimal:.4f} < 0.0030")
return False
# minimum 20 bps SL width
if sl_decimal < 0.0020:
if self.verbose:
print(f" CG-EXIT fail: SL={sl_decimal:.4f} < 0.0020")
return False
# minimum meaningful hold period
if config.max_hold_bars < 20:
if self.verbose:
print(f" CG-EXIT fail: max_hold={config.max_hold_bars} < 20")
return False
# TP:SL ratio >= 0.10x
if sl_decimal > 0 and tp_decimal / sl_decimal < 0.10:
if self.verbose:
print(f" CG-EXIT fail: TP/SL ratio={tp_decimal/sl_decimal:.2f} < 0.10")
return False
return True
def _check_cg_risk(self, config: MCTrialConfig) -> bool:
"""CG-RISK: Combined risk constraints."""
# fraction * max_leverage <= 2.0 (mirrors CG-LEV)
max_notional_fraction = config.fraction * config.max_leverage
if max_notional_fraction > 2.0:
if self.verbose:
print(f" CG-RISK fail: max_notional={max_notional_fraction:.2f} > 2.0")
return False
# minimum meaningful position
if max_notional_fraction < 0.10:
if self.verbose:
print(f" CG-RISK fail: max_notional={max_notional_fraction:.2f} < 0.10")
return False
return True
def _check_cg_dc_lev(self, config: MCTrialConfig) -> bool:
"""CG-DC-LEV: DC leverage adjustment constraints."""
if not config.use_direction_confirm:
# DC not used - constraints don't apply
return True
# dc_leverage_boost >= 1.0 (must boost, not reduce)
if config.dc_leverage_boost < 1.0:
if self.verbose:
print(f" CG-DC-LEV fail: boost={config.dc_leverage_boost:.2f} < 1.0")
return False
# dc_leverage_reduce < 1.0 (must reduce, not boost)
if config.dc_leverage_reduce >= 1.0:
if self.verbose:
print(f" CG-DC-LEV fail: reduce={config.dc_leverage_reduce:.2f} >= 1.0")
return False
# DC swing bounded: boost * (1/reduce) <= 4.0
dc_swing = config.dc_leverage_boost * (1.0 / config.dc_leverage_reduce)
if dc_swing > 4.0:
if self.verbose:
print(f" CG-DC-LEV fail: dc_swing={dc_swing:.2f} > 4.0")
return False
return True
def _check_cg_acb(self, config: MCTrialConfig) -> bool:
"""CG-ACB: ACB beta bounds."""
# acb_beta_low < acb_beta_high
if config.acb_beta_low >= config.acb_beta_high:
if self.verbose:
print(f" CG-ACB fail: low={config.acb_beta_low:.2f} >= high={config.acb_beta_high:.2f}")
return False
# acb_beta_high - acb_beta_low >= 0.20 (meaningful dynamic range)
if config.acb_beta_high - config.acb_beta_low < 0.20:
if self.verbose:
print(f" CG-ACB fail: range={config.acb_beta_high - config.acb_beta_low:.2f} < 0.20")
return False
# acb_beta_high <= 1.50 (cap at 150%)
if config.acb_beta_high > 1.50:
if self.verbose:
print(f" CG-ACB fail: high={config.acb_beta_high:.2f} > 1.50")
return False
return True
def _check_cg_sp(self, config: MCTrialConfig) -> bool:
"""CG-SP: SmartPlacer rate constraints."""
if not config.use_sp_slippage:
# Slippage disabled - rates don't matter
return True
# Rates must be in [0, 1]
if not (0.0 <= config.sp_maker_entry_rate <= 1.0):
if self.verbose:
print(f" CG-SP fail: entry_rate={config.sp_maker_entry_rate:.2f} not in [0,1]")
return False
if not (0.0 <= config.sp_maker_exit_rate <= 1.0):
if self.verbose:
print(f" CG-SP fail: exit_rate={config.sp_maker_exit_rate:.2f} not in [0,1]")
return False
return True
def _check_cg_ob_sig(self, config: MCTrialConfig) -> bool:
"""CG-OB-SIG: OB signal constraints."""
# ob_imbalance_bias in [-1.0, 1.0]
if not (-1.0 <= config.ob_imbalance_bias <= 1.0):
if self.verbose:
print(f" CG-OB-SIG fail: bias={config.ob_imbalance_bias:.2f} not in [-1,1]")
return False
# ob_depth_scale > 0
if config.ob_depth_scale <= 0:
if self.verbose:
print(f" CG-OB-SIG fail: depth_scale={config.ob_depth_scale:.2f} <= 0")
return False
return True
def _validate_v3_cross_group(
self, config: MCTrialConfig
) -> Tuple[bool, Optional[str], List[str]]:
"""
V3: Cross-group coherence checks.
Returns (passed, reason, warnings).
"""
warnings = []
# Signal threshold vs exit: TP must be achievable before max_hold_bars expires
# Approximate: at typical vol, price moves ~0.03% per 5s bar
expected_tp_bars = config.fixed_tp_pct / 0.0003
if expected_tp_bars > config.max_hold_bars * 3:
warnings.append(
f"TP_TIME_RISK: expected_tp_bars={expected_tp_bars:.0f} > max_hold*3={config.max_hold_bars*3}"
)
# Leverage convexity vs range: extreme convexity with wide leverage range
# produces near-binary leverage
if config.leverage_convexity > 5.0 and (config.max_leverage - config.min_leverage) > 5.0:
warnings.append(
f"HIGH_CONVEXITY_WIDE_RANGE: near-binary leverage behaviour likely"
)
# OB skip + DC skip double-filtering: very few trades may fire
if config.dc_skip_contradicts and config.ob_imbalance_bias > 0.15:
warnings.append(
f"DOUBLE_FILTER_RISK: DC skip + strong OB contradiction may starve trades"
)
# Reject only on critical cross-group violations
# (none currently defined - all are warnings)
return True, None, warnings
def _validate_v4_degenerate(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
"""
V4: Degenerate configuration check (lightweight heuristics).
Full pre-flight with 500 bars is done in mc_executor during actual trial.
This is just a quick sanity check.
"""
# Check for numerical extremes that would cause issues
# Fraction too small - would produce micro-positions
if config.fraction < 0.02:
return False, f"FRACTION_TOO_SMALL: fraction={config.fraction} < 0.02"
# Leverage range too narrow for convexity to matter
leverage_range = config.max_leverage - config.min_leverage
if leverage_range < 0.5 and config.leverage_convexity > 2.0:
return False, f"NARROW_RANGE_HIGH_CONVEXITY: range={leverage_range:.2f}, convexity={config.leverage_convexity:.2f}"
# Max hold too short for vol filter to stabilize
if config.max_hold_bars < config.vd_trend_lookback + 10:
return False, f"HOLD_TOO_SHORT: max_hold={config.max_hold_bars} < trend_lookback+10={config.vd_trend_lookback+10}"
# IRP lookback too short for meaningful alignment
if config.lookback < 50:
return False, f"LOOKBACK_TOO_SHORT: lookback={config.lookback} < 50"
return True, None
def validate_batch(
self,
configs: List[MCTrialConfig]
) -> List[ValidationResult]:
"""
Validate a batch of configurations.
Parameters
----------
configs : List[MCTrialConfig]
Configurations to validate
Returns
-------
List[ValidationResult]
Validation results (same order as input)
"""
results = []
for config in configs:
result = self.validate(config)
results.append(result)
return results
def get_validity_stats(self, results: List[ValidationResult]) -> Dict[str, Any]:
"""
Get statistics about validation results.
"""
total = len(results)
if total == 0:
return {'total': 0}
by_status = {}
for status in ValidationStatus:
by_status[status.value] = sum(1 for r in results if r.status == status)
rejection_reasons = {}
for r in results:
if r.reject_reason:
reason = r.reject_reason.split(':')[0] if ':' in r.reject_reason else r.reject_reason
rejection_reasons[reason] = rejection_reasons.get(reason, 0) + 1
return {
'total': total,
'valid': by_status.get(ValidationStatus.VALID.value, 0),
'rejected_v1': by_status.get(ValidationStatus.REJECTED_V1.value, 0),
'rejected_v2': by_status.get(ValidationStatus.REJECTED_V2.value, 0),
'rejected_v3': by_status.get(ValidationStatus.REJECTED_V3.value, 0),
'rejected_v4': by_status.get(ValidationStatus.REJECTED_V4.value, 0),
'validity_rate': by_status.get(ValidationStatus.VALID.value, 0) / total,
'rejection_reasons': rejection_reasons,
}
def test_validator():
"""Quick test of the validator."""
validator = MCValidator(verbose=True)
sampler = MCSampler(base_seed=42)
# Generate some test configurations
trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100)
# Validate
results = validator.validate_batch(trials)
# Stats
stats = validator.get_validity_stats(results)
print(f"\nValidation Stats:")
print(f" Total: {stats['total']}")
print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
print(f" Rejected V1: {stats['rejected_v1']}")
print(f" Rejected V2: {stats['rejected_v2']}")
print(f" Rejected V3: {stats['rejected_v3']}")
print(f" Rejected V4: {stats['rejected_v4']}")
# Show some rejections
print("\nSample Rejections:")
for r in results:
if not r.is_valid():
print(f" Trial {r.trial_id}: {r.status.value} - {r.reject_reason}")
if len([x for x in results if not x.is_valid()]) > 5:
break
return results
if __name__ == "__main__":
test_validator()