initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
85
nautilus_dolphin/mc/__init__.py
Executable file
85
nautilus_dolphin/mc/__init__.py
Executable file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Monte Carlo System Envelope Mapping for DOLPHIN NG
|
||||
==================================================
|
||||
|
||||
Full-system operational envelope simulation and ML forewarning integration.
|
||||
|
||||
This package implements the Monte Carlo System Envelope Specification for
|
||||
the Nautilus-Dolphin trading system. It provides:
|
||||
|
||||
1. Parameter space sampling (Latin Hypercube Sampling)
|
||||
2. Internal consistency validation (V1-V4 constraint groups)
|
||||
3. Trial execution harness (backtest runner)
|
||||
4. Metric extraction (48 metrics, 10 classification labels)
|
||||
5. Result persistence (Parquet + SQLite index)
|
||||
6. ML envelope learning (One-Class SVM, XGBoost)
|
||||
7. Live forewarning API (risk assessment for configurations)
|
||||
|
||||
Usage:
|
||||
from nautilus_dolphin.mc import MCSampler, MCValidator, MCExecutor
|
||||
|
||||
# Run envelope testing
|
||||
python run_mc_envelope.py --mode run --stage 1 --n-samples 500
|
||||
|
||||
# Train ML models on results
|
||||
python run_mc_envelope.py --mode train --output-dir mc_results/
|
||||
|
||||
# Assess a live configuration
|
||||
python run_mc_envelope.py --mode assess --assess my_config.json
|
||||
|
||||
Reference:
|
||||
MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md - Complete specification document
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "DOLPHIN NG Team"
|
||||
|
||||
# Core modules (lazy import to avoid heavy dependencies on import)
|
||||
def __getattr__(name):
|
||||
if name == "MCSampler":
|
||||
from .mc_sampler import MCSampler
|
||||
return MCSampler
|
||||
elif name == "MCValidator":
|
||||
from .mc_validator import MCValidator
|
||||
return MCValidator
|
||||
elif name == "MCExecutor":
|
||||
from .mc_executor import MCExecutor
|
||||
return MCExecutor
|
||||
elif name == "MCMetrics":
|
||||
from .mc_metrics import MCMetrics
|
||||
return MCMetrics
|
||||
elif name == "MCStore":
|
||||
from .mc_store import MCStore
|
||||
return MCStore
|
||||
elif name == "MCRunner":
|
||||
from .mc_runner import MCRunner
|
||||
return MCRunner
|
||||
elif name == "MCML":
|
||||
from .mc_ml import MCML
|
||||
return MCML
|
||||
elif name == "DolphinForewarner":
|
||||
from .mc_ml import DolphinForewarner
|
||||
return DolphinForewarner
|
||||
elif name == "MCTrialConfig":
|
||||
from .mc_sampler import MCTrialConfig
|
||||
return MCTrialConfig
|
||||
elif name == "MCTrialResult":
|
||||
from .mc_metrics import MCTrialResult
|
||||
return MCTrialResult
|
||||
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
||||
|
||||
__all__ = [
|
||||
# Core classes
|
||||
"MCSampler",
|
||||
"MCValidator",
|
||||
"MCExecutor",
|
||||
"MCMetrics",
|
||||
"MCStore",
|
||||
"MCRunner",
|
||||
"MCML",
|
||||
"DolphinForewarner",
|
||||
"MCTrialConfig",
|
||||
"MCTrialResult",
|
||||
# Version
|
||||
"__version__",
|
||||
]
|
||||
387
nautilus_dolphin/mc/mc_executor.py
Executable file
387
nautilus_dolphin/mc/mc_executor.py
Executable file
@@ -0,0 +1,387 @@
|
||||
"""
|
||||
Monte Carlo Trial Executor
|
||||
==========================
|
||||
|
||||
Trial execution harness for running backtests with parameter configurations.
|
||||
|
||||
This module interfaces with the Nautilus-Dolphin system to run backtests
|
||||
with sampled parameter configurations and extract metrics.
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 5
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
from .mc_sampler import MCTrialConfig
|
||||
from .mc_validator import MCValidator, ValidationResult
|
||||
from .mc_metrics import MCMetrics, MCTrialResult
|
||||
|
||||
|
||||
class MCExecutor:
|
||||
"""
|
||||
Monte Carlo Trial Executor.
|
||||
|
||||
Runs backtests for parameter configurations and extracts metrics.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
initial_capital: float = 25000.0,
|
||||
data_period: Tuple[str, str] = ('2025-12-31', '2026-02-18'),
|
||||
preflight_bars: int = 500,
|
||||
preflight_min_trades: int = 2,
|
||||
verbose: bool = False
|
||||
):
|
||||
"""
|
||||
Initialize the executor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
initial_capital : float
|
||||
Starting capital for backtests
|
||||
data_period : Tuple[str, str]
|
||||
(start_date, end_date) for backtest
|
||||
preflight_bars : int
|
||||
Bars for preflight check (V4)
|
||||
preflight_min_trades : int
|
||||
Minimum trades for preflight to pass
|
||||
verbose : bool
|
||||
Print detailed execution info
|
||||
"""
|
||||
self.initial_capital = initial_capital
|
||||
self.data_period = data_period
|
||||
self.preflight_bars = preflight_bars
|
||||
self.preflight_min_trades = preflight_min_trades
|
||||
self.verbose = verbose
|
||||
|
||||
self.validator = MCValidator(verbose=verbose)
|
||||
self.metrics = MCMetrics(initial_capital=initial_capital)
|
||||
|
||||
# Try to import Nautilus-Dolphin components
|
||||
self._init_nd_components()
|
||||
|
||||
def _init_nd_components(self):
|
||||
"""Initialize Nautilus-Dolphin components if available."""
|
||||
self.nd_available = False
|
||||
|
||||
try:
|
||||
# Import key components from Nautilus-Dolphin
|
||||
from nautilus_dolphin.nautilus.strategy_config import DolphinStrategyConfig
|
||||
from nautilus_dolphin.nautilus.backtest_runner import run_backtest
|
||||
|
||||
self.DolphinStrategyConfig = DolphinStrategyConfig
|
||||
self.run_nd_backtest = run_backtest
|
||||
self.nd_available = True
|
||||
|
||||
if self.verbose:
|
||||
print("[OK] Nautilus-Dolphin components loaded")
|
||||
|
||||
except ImportError as e:
|
||||
if self.verbose:
|
||||
print(f"[WARN] Nautilus-Dolphin not available: {e}")
|
||||
print("[WARN] Will use simulation mode for testing")
|
||||
|
||||
def execute_trial(
|
||||
self,
|
||||
config: MCTrialConfig,
|
||||
skip_validation: bool = False
|
||||
) -> MCTrialResult:
|
||||
"""
|
||||
Execute a single MC trial.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : MCTrialConfig
|
||||
Trial configuration
|
||||
skip_validation : bool
|
||||
Skip validation (if already validated)
|
||||
|
||||
Returns
|
||||
-------
|
||||
MCTrialResult
|
||||
Complete trial result with metrics
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Step 1: Validation (V1-V4)
|
||||
if not skip_validation:
|
||||
validation = self.validator.validate(config)
|
||||
if not validation.is_valid():
|
||||
result = MCTrialResult(
|
||||
trial_id=config.trial_id,
|
||||
config=config,
|
||||
status=validation.status.value,
|
||||
error_message=validation.reject_reason
|
||||
)
|
||||
result.execution_time_sec = time.time() - start_time
|
||||
return result
|
||||
|
||||
# Step 2: Preflight check (V4 lightweight)
|
||||
preflight_passed, preflight_msg = self._run_preflight(config)
|
||||
if not preflight_passed:
|
||||
result = MCTrialResult(
|
||||
trial_id=config.trial_id,
|
||||
config=config,
|
||||
status='PREFLIGHT_FAIL',
|
||||
error_message=preflight_msg
|
||||
)
|
||||
result.execution_time_sec = time.time() - start_time
|
||||
return result
|
||||
|
||||
# Step 3: Full backtest
|
||||
try:
|
||||
if self.nd_available:
|
||||
trades, daily_pnls, date_stats, signal_stats = self._run_nd_backtest(config)
|
||||
else:
|
||||
trades, daily_pnls, date_stats, signal_stats = self._run_simulated_backtest(config)
|
||||
|
||||
# Step 4: Compute metrics
|
||||
execution_time = time.time() - start_time
|
||||
result = self.metrics.compute(
|
||||
config, trades, daily_pnls, date_stats, signal_stats, execution_time
|
||||
)
|
||||
|
||||
if self.verbose:
|
||||
print(f" Trial {config.trial_id}: ROI={result.roi_pct:.2f}%, "
|
||||
f"Trades={result.n_trades}, Sharpe={result.sharpe_ratio:.2f}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f" Trial {config.trial_id}: ERROR - {e}")
|
||||
|
||||
result = MCTrialResult(
|
||||
trial_id=config.trial_id,
|
||||
config=config,
|
||||
status='ERROR',
|
||||
error_message=str(e)
|
||||
)
|
||||
result.execution_time_sec = time.time() - start_time
|
||||
return result
|
||||
|
||||
def _run_preflight(self, config: MCTrialConfig) -> Tuple[bool, str]:
|
||||
"""
|
||||
Run lightweight preflight check (V4).
|
||||
|
||||
Returns (passed, message).
|
||||
"""
|
||||
# Check for extreme values that would cause issues
|
||||
|
||||
# Fraction too small
|
||||
if config.fraction < 0.02:
|
||||
return False, f"FRACTION_TOO_SMALL: {config.fraction}"
|
||||
|
||||
# Leverage range issues
|
||||
leverage_range = config.max_leverage - config.min_leverage
|
||||
if leverage_range < 0.5 and config.leverage_convexity > 2.0:
|
||||
return False, f"NARROW_RANGE_HIGH_CONVEXITY"
|
||||
|
||||
# Hold period too short
|
||||
if config.max_hold_bars < config.vd_trend_lookback + 10:
|
||||
return False, f"HOLD_TOO_SHORT"
|
||||
|
||||
# TP/SL ratio check
|
||||
tp_sl_ratio = config.fixed_tp_pct / (config.stop_pct / 100)
|
||||
if tp_sl_ratio > 10:
|
||||
return False, f"TP_SL_RATIO_EXTREME: {tp_sl_ratio}"
|
||||
|
||||
return True, "OK"
|
||||
|
||||
def _run_nd_backtest(
|
||||
self,
|
||||
config: MCTrialConfig
|
||||
) -> Tuple[List[Dict], List[float], List[Dict], Dict[str, Any]]:
|
||||
"""
|
||||
Run actual Nautilus-Dolphin backtest.
|
||||
|
||||
Returns (trades, daily_pnls, date_stats, signal_stats).
|
||||
"""
|
||||
# Convert MC config to ND config
|
||||
nd_config = self._mc_to_nd_config(config)
|
||||
|
||||
# Run backtest
|
||||
backtest_result = self.run_nd_backtest(nd_config)
|
||||
|
||||
# Extract results
|
||||
trades = backtest_result.get('trades', [])
|
||||
daily_pnls = backtest_result.get('daily_pnls', [])
|
||||
date_stats = backtest_result.get('date_stats', [])
|
||||
signal_stats = backtest_result.get('signal_stats', {})
|
||||
|
||||
return trades, daily_pnls, date_stats, signal_stats
|
||||
|
||||
def _mc_to_nd_config(self, config: MCTrialConfig) -> Dict[str, Any]:
|
||||
"""Convert MC trial config to Nautilus-Dolphin config."""
|
||||
return {
|
||||
'venue': 'BINANCE_FUTURES',
|
||||
'environment': 'BACKTEST',
|
||||
'trader_id': f'DOLPHIN-MC-{config.trial_id}',
|
||||
'strategy': {
|
||||
'venue': 'BINANCE_FUTURES',
|
||||
'direction': 'SHORT',
|
||||
'vel_div_threshold': config.vel_div_threshold,
|
||||
'vel_div_extreme': config.vel_div_extreme,
|
||||
'max_leverage': config.max_leverage,
|
||||
'min_leverage': config.min_leverage,
|
||||
'leverage_convexity': config.leverage_convexity,
|
||||
'capital_fraction': config.fraction,
|
||||
'max_hold_bars': config.max_hold_bars,
|
||||
'tp_bps': int(config.fixed_tp_pct * 10000),
|
||||
'fixed_tp_pct': config.fixed_tp_pct,
|
||||
'stop_pct': config.stop_pct,
|
||||
'use_trailing': False,
|
||||
'irp_alignment_min': config.min_irp_alignment,
|
||||
'lookback': config.lookback,
|
||||
'excluded_assets': ['TUSDUSDT', 'USDCUSDT'],
|
||||
'acb_enabled': True,
|
||||
'max_concurrent_positions': 1,
|
||||
'daily_loss_limit_pct': 10.0,
|
||||
'use_sp_fees': config.use_sp_fees,
|
||||
'use_sp_slippage': config.use_sp_slippage,
|
||||
'sp_maker_fill_rate': config.sp_maker_entry_rate,
|
||||
'sp_maker_exit_rate': config.sp_maker_exit_rate,
|
||||
'use_ob_edge': config.use_ob_edge,
|
||||
'ob_edge_bps': config.ob_edge_bps,
|
||||
'ob_confirm_rate': config.ob_confirm_rate,
|
||||
'ob_imbalance_bias': config.ob_imbalance_bias,
|
||||
'ob_depth_scale': config.ob_depth_scale,
|
||||
'use_direction_confirm': config.use_direction_confirm,
|
||||
'dc_lookback_bars': config.dc_lookback_bars,
|
||||
'dc_min_magnitude_bps': config.dc_min_magnitude_bps,
|
||||
'dc_skip_contradicts': config.dc_skip_contradicts,
|
||||
'dc_leverage_boost': config.dc_leverage_boost,
|
||||
'dc_leverage_reduce': config.dc_leverage_reduce,
|
||||
'use_alpha_layers': config.use_alpha_layers,
|
||||
'use_dynamic_leverage': config.use_dynamic_leverage,
|
||||
'acb_beta_high': config.acb_beta_high,
|
||||
'acb_beta_low': config.acb_beta_low,
|
||||
'acb_w750_threshold_pct': config.acb_w750_threshold_pct,
|
||||
},
|
||||
'data_catalog': {
|
||||
'eigenvalues_dir': '../eigenvalues',
|
||||
'catalog_path': 'nautilus_dolphin/catalog',
|
||||
'start_date': self.data_period[0],
|
||||
'end_date': self.data_period[1],
|
||||
'assets': [
|
||||
'BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'SOLUSDT', 'DOTUSDT',
|
||||
'AVAXUSDT', 'MATICUSDT', 'LINKUSDT', 'UNIUSDT', 'ATOMUSDT'
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
def _run_simulated_backtest(
|
||||
self,
|
||||
config: MCTrialConfig
|
||||
) -> Tuple[List[Dict], List[float], List[Dict], Dict[str, Any]]:
|
||||
"""
|
||||
Run simulated backtest for testing without Nautilus.
|
||||
|
||||
This produces realistic-looking results based on parameter configuration
|
||||
without actually running a full backtest.
|
||||
"""
|
||||
# Number of trades based on vel_div_threshold (lower = more trades)
|
||||
base_trades = 500
|
||||
threshold_factor = abs(-0.02 / config.vel_div_threshold)
|
||||
n_trades = int(base_trades * threshold_factor * np.random.uniform(0.8, 1.2))
|
||||
n_trades = max(20, min(2000, n_trades))
|
||||
|
||||
# Win rate based on parameters
|
||||
base_wr = 0.48
|
||||
if config.use_direction_confirm:
|
||||
base_wr += 0.05
|
||||
if config.use_ob_edge:
|
||||
base_wr += 0.02
|
||||
win_rate = np.clip(base_wr + np.random.normal(0, 0.05), 0.3, 0.7)
|
||||
|
||||
# Generate trades
|
||||
trades = []
|
||||
n_wins = int(n_trades * win_rate)
|
||||
n_losses = n_trades - n_wins
|
||||
|
||||
for i in range(n_trades):
|
||||
is_win = i < n_wins
|
||||
|
||||
if is_win:
|
||||
pnl_pct = np.random.exponential(0.008) + 0.002
|
||||
pnl = pnl_pct * self.initial_capital * config.fraction * config.max_leverage
|
||||
exit_type = 'tp' if np.random.random() < 0.7 else 'hold'
|
||||
else:
|
||||
pnl_pct = -np.random.exponential(0.006) - 0.001
|
||||
pnl = pnl_pct * self.initial_capital * config.fraction * config.max_leverage
|
||||
exit_type = np.random.choice(['stop', 'hold'], p=[0.3, 0.7])
|
||||
|
||||
trades.append({
|
||||
'pnl': pnl,
|
||||
'pnl_pct': pnl_pct,
|
||||
'exit_type': exit_type,
|
||||
'bars_held': np.random.randint(10, config.max_hold_bars),
|
||||
'asset': np.random.choice(['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 'ADAUSDT']),
|
||||
})
|
||||
|
||||
# Shuffle trades
|
||||
np.random.shuffle(trades)
|
||||
|
||||
# Generate daily P&Ls (48 days)
|
||||
daily_pnls = []
|
||||
date_stats = []
|
||||
|
||||
trades_per_day = len(trades) // 48
|
||||
for day in range(48):
|
||||
day_trades = trades[day * trades_per_day:(day + 1) * trades_per_day]
|
||||
day_pnl = sum(t['pnl'] for t in day_trades)
|
||||
daily_pnls.append(day_pnl)
|
||||
|
||||
date_str = f'2026-01-{day % 31 + 1:02d}' if day < 31 else f'2026-02-{day - 30:02d}'
|
||||
date_stats.append({
|
||||
'date': date_str,
|
||||
'pnl': day_pnl,
|
||||
})
|
||||
|
||||
# Signal stats
|
||||
signal_stats = {
|
||||
'dc_skip_rate': 0.1 if config.use_direction_confirm else 0.0,
|
||||
'ob_skip_rate': 0.05 if config.use_ob_edge else 0.0,
|
||||
'dc_confirm_rate': 0.7 if config.use_direction_confirm else 0.0,
|
||||
'irp_match_rate': 0.6 if config.use_asset_selection else 0.0,
|
||||
'entry_attempt_rate': 0.3,
|
||||
'signal_to_trade_rate': len(trades) / (48 * 1000), # Approximate
|
||||
}
|
||||
|
||||
return trades, daily_pnls, date_stats, signal_stats
|
||||
|
||||
def execute_batch(
|
||||
self,
|
||||
configs: List[MCTrialConfig],
|
||||
progress_interval: int = 10
|
||||
) -> List[MCTrialResult]:
|
||||
"""
|
||||
Execute a batch of trials.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configs : List[MCTrialConfig]
|
||||
Trial configurations
|
||||
progress_interval : int
|
||||
Print progress every N trials
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[MCTrialResult]
|
||||
Results for all trials
|
||||
"""
|
||||
results = []
|
||||
total = len(configs)
|
||||
|
||||
for i, config in enumerate(configs):
|
||||
result = self.execute_trial(config)
|
||||
results.append(result)
|
||||
|
||||
if (i + 1) % progress_interval == 0 or i == total - 1:
|
||||
print(f" Progress: {i+1}/{total} ({(i+1)/total*100:.1f}%)")
|
||||
|
||||
return results
|
||||
737
nautilus_dolphin/mc/mc_metrics.py
Executable file
737
nautilus_dolphin/mc/mc_metrics.py
Executable file
@@ -0,0 +1,737 @@
|
||||
"""
|
||||
Monte Carlo Metrics Extractor
|
||||
=============================
|
||||
|
||||
Extract 48 metrics and 10 classification labels from trial results.
|
||||
|
||||
Metric Categories:
|
||||
M01-M15: Primary Performance Metrics
|
||||
M16-M32: Risk / Stability Metrics
|
||||
M33-M38: Signal Quality Metrics
|
||||
M39-M43: Capital Path Metrics
|
||||
M44-M48: Regime Metrics
|
||||
L01-L10: Derived Classification Labels
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 6
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, NamedTuple, Any, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
from .mc_sampler import MCTrialConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class MCTrialResult:
|
||||
"""Complete result from a Monte Carlo trial."""
|
||||
trial_id: int
|
||||
config: MCTrialConfig
|
||||
|
||||
# Primary Performance Metrics (M01-M15)
|
||||
roi_pct: float = 0.0
|
||||
profit_factor: float = 0.0
|
||||
win_rate: float = 0.0
|
||||
n_trades: int = 0
|
||||
max_drawdown_pct: float = 0.0
|
||||
sharpe_ratio: float = 0.0
|
||||
sortino_ratio: float = 0.0
|
||||
calmar_ratio: float = 0.0
|
||||
avg_win_pct: float = 0.0
|
||||
avg_loss_pct: float = 0.0
|
||||
win_loss_ratio: float = 0.0
|
||||
expectancy_pct: float = 0.0
|
||||
h1_roi_pct: float = 0.0
|
||||
h2_roi_pct: float = 0.0
|
||||
h2_h1_ratio: float = 0.0
|
||||
|
||||
# Risk / Stability Metrics (M16-M32)
|
||||
n_consecutive_losses_max: int = 0
|
||||
n_stop_exits: int = 0
|
||||
n_tp_exits: int = 0
|
||||
n_hold_exits: int = 0
|
||||
stop_rate: float = 0.0
|
||||
tp_rate: float = 0.0
|
||||
hold_rate: float = 0.0
|
||||
avg_hold_bars: float = 0.0
|
||||
vol_of_daily_pnl: float = 0.0
|
||||
skew_daily_pnl: float = 0.0
|
||||
kurtosis_daily_pnl: float = 0.0
|
||||
worst_day_pct: float = 0.0
|
||||
best_day_pct: float = 0.0
|
||||
n_days_profitable: int = 0
|
||||
n_days_loss: int = 0
|
||||
profitable_day_rate: float = 0.0
|
||||
max_daily_drawdown_pct: float = 0.0
|
||||
|
||||
# Signal Quality Metrics (M33-M38)
|
||||
dc_skip_rate: float = 0.0
|
||||
ob_skip_rate: float = 0.0
|
||||
dc_confirm_rate: float = 0.0
|
||||
irp_match_rate: float = 0.0
|
||||
entry_attempt_rate: float = 0.0
|
||||
signal_to_trade_rate: float = 0.0
|
||||
|
||||
# Capital Path Metrics (M39-M43)
|
||||
equity_curve_slope: float = 0.0
|
||||
equity_curve_r2: float = 0.0
|
||||
equity_curve_autocorr: float = 0.0
|
||||
max_underwater_days: int = 0
|
||||
recovery_factor: float = 0.0
|
||||
|
||||
# Regime Metrics (M44-M48)
|
||||
date_pnl_std: float = 0.0
|
||||
date_pnl_range: float = 0.0
|
||||
q10_date_pnl: float = 0.0
|
||||
q90_date_pnl: float = 0.0
|
||||
tail_ratio: float = 0.0
|
||||
|
||||
# Classification Labels (L01-L10)
|
||||
profitable: bool = False
|
||||
strongly_profitable: bool = False
|
||||
drawdown_ok: bool = False
|
||||
sharpe_ok: bool = False
|
||||
pf_ok: bool = False
|
||||
wr_ok: bool = False
|
||||
champion_region: bool = False
|
||||
catastrophic: bool = False
|
||||
inert: bool = False
|
||||
h2_degradation: bool = False
|
||||
|
||||
# Metadata
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
execution_time_sec: float = 0.0
|
||||
status: str = "pending"
|
||||
error_message: Optional[str] = None
|
||||
|
||||
def compute_labels(self):
|
||||
"""Compute classification labels from metrics."""
|
||||
# L01: profitable
|
||||
self.profitable = self.roi_pct > 0
|
||||
|
||||
# L02: strongly_profitable
|
||||
self.strongly_profitable = self.roi_pct > 30
|
||||
|
||||
# L03: drawdown_ok
|
||||
self.drawdown_ok = self.max_drawdown_pct < 20
|
||||
|
||||
# L04: sharpe_ok
|
||||
self.sharpe_ok = self.sharpe_ratio > 1.5
|
||||
|
||||
# L05: pf_ok
|
||||
self.pf_ok = self.profit_factor > 1.10
|
||||
|
||||
# L06: wr_ok
|
||||
self.wr_ok = self.win_rate > 0.45
|
||||
|
||||
# L07: champion_region
|
||||
self.champion_region = (
|
||||
self.strongly_profitable and
|
||||
self.drawdown_ok and
|
||||
self.sharpe_ok and
|
||||
self.pf_ok and
|
||||
self.wr_ok
|
||||
)
|
||||
|
||||
# L08: catastrophic
|
||||
self.catastrophic = (
|
||||
self.roi_pct < -30 or
|
||||
self.max_drawdown_pct > 40
|
||||
)
|
||||
|
||||
# L09: inert
|
||||
self.inert = self.n_trades < 50
|
||||
|
||||
# L10: h2_degradation
|
||||
self.h2_degradation = self.h2_h1_ratio < 0.50
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary (flat structure for DataFrame)."""
|
||||
result = {
|
||||
# IDs
|
||||
'trial_id': self.trial_id,
|
||||
'timestamp': self.timestamp,
|
||||
'execution_time_sec': self.execution_time_sec,
|
||||
'status': self.status,
|
||||
'error_message': self.error_message,
|
||||
}
|
||||
|
||||
# Add all config parameters with P_ prefix
|
||||
config_dict = self.config.to_dict()
|
||||
for k, v in config_dict.items():
|
||||
result[f'P_{k}'] = v
|
||||
|
||||
# Add metrics with M_ prefix
|
||||
result.update({
|
||||
'M_roi_pct': self.roi_pct,
|
||||
'M_profit_factor': self.profit_factor,
|
||||
'M_win_rate': self.win_rate,
|
||||
'M_n_trades': self.n_trades,
|
||||
'M_max_drawdown_pct': self.max_drawdown_pct,
|
||||
'M_sharpe_ratio': self.sharpe_ratio,
|
||||
'M_sortino_ratio': self.sortino_ratio,
|
||||
'M_calmar_ratio': self.calmar_ratio,
|
||||
'M_avg_win_pct': self.avg_win_pct,
|
||||
'M_avg_loss_pct': self.avg_loss_pct,
|
||||
'M_win_loss_ratio': self.win_loss_ratio,
|
||||
'M_expectancy_pct': self.expectancy_pct,
|
||||
'M_h1_roi_pct': self.h1_roi_pct,
|
||||
'M_h2_roi_pct': self.h2_roi_pct,
|
||||
'M_h2_h1_ratio': self.h2_h1_ratio,
|
||||
'M_n_consecutive_losses_max': self.n_consecutive_losses_max,
|
||||
'M_n_stop_exits': self.n_stop_exits,
|
||||
'M_n_tp_exits': self.n_tp_exits,
|
||||
'M_n_hold_exits': self.n_hold_exits,
|
||||
'M_stop_rate': self.stop_rate,
|
||||
'M_tp_rate': self.tp_rate,
|
||||
'M_hold_rate': self.hold_rate,
|
||||
'M_avg_hold_bars': self.avg_hold_bars,
|
||||
'M_vol_of_daily_pnl': self.vol_of_daily_pnl,
|
||||
'M_skew_daily_pnl': self.skew_daily_pnl,
|
||||
'M_kurtosis_daily_pnl': self.kurtosis_daily_pnl,
|
||||
'M_worst_day_pct': self.worst_day_pct,
|
||||
'M_best_day_pct': self.best_day_pct,
|
||||
'M_n_days_profitable': self.n_days_profitable,
|
||||
'M_n_days_loss': self.n_days_loss,
|
||||
'M_profitable_day_rate': self.profitable_day_rate,
|
||||
'M_max_daily_drawdown_pct': self.max_daily_drawdown_pct,
|
||||
'M_dc_skip_rate': self.dc_skip_rate,
|
||||
'M_ob_skip_rate': self.ob_skip_rate,
|
||||
'M_dc_confirm_rate': self.dc_confirm_rate,
|
||||
'M_irp_match_rate': self.irp_match_rate,
|
||||
'M_entry_attempt_rate': self.entry_attempt_rate,
|
||||
'M_signal_to_trade_rate': self.signal_to_trade_rate,
|
||||
'M_equity_curve_slope': self.equity_curve_slope,
|
||||
'M_equity_curve_r2': self.equity_curve_r2,
|
||||
'M_equity_curve_autocorr': self.equity_curve_autocorr,
|
||||
'M_max_underwater_days': self.max_underwater_days,
|
||||
'M_recovery_factor': self.recovery_factor,
|
||||
'M_date_pnl_std': self.date_pnl_std,
|
||||
'M_date_pnl_range': self.date_pnl_range,
|
||||
'M_q10_date_pnl': self.q10_date_pnl,
|
||||
'M_q90_date_pnl': self.q90_date_pnl,
|
||||
'M_tail_ratio': self.tail_ratio,
|
||||
})
|
||||
|
||||
# Add labels with L_ prefix
|
||||
result.update({
|
||||
'L_profitable': self.profitable,
|
||||
'L_strongly_profitable': self.strongly_profitable,
|
||||
'L_drawdown_ok': self.drawdown_ok,
|
||||
'L_sharpe_ok': self.sharpe_ok,
|
||||
'L_pf_ok': self.pf_ok,
|
||||
'L_wr_ok': self.wr_ok,
|
||||
'L_champion_region': self.champion_region,
|
||||
'L_catastrophic': self.catastrophic,
|
||||
'L_inert': self.inert,
|
||||
'L_h2_degradation': self.h2_degradation,
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialResult':
|
||||
"""Create from dictionary."""
|
||||
# Extract config
|
||||
config_dict = {k[2:]: v for k, v in d.items() if k.startswith('P_') and k != 'P_trial_id'}
|
||||
config = MCTrialConfig.from_dict(config_dict)
|
||||
|
||||
# Create result
|
||||
result = cls(trial_id=d.get('trial_id', 0), config=config)
|
||||
|
||||
# Set metrics
|
||||
for k, v in d.items():
|
||||
if k.startswith('M_'):
|
||||
attr_name = k[2:]
|
||||
if hasattr(result, attr_name):
|
||||
setattr(result, attr_name, v)
|
||||
elif k.startswith('L_'):
|
||||
attr_name = k[2:]
|
||||
if hasattr(result, attr_name):
|
||||
setattr(result, attr_name, v)
|
||||
|
||||
# Set metadata
|
||||
result.timestamp = d.get('timestamp', datetime.now().isoformat())
|
||||
result.execution_time_sec = d.get('execution_time_sec', 0.0)
|
||||
result.status = d.get('status', 'completed')
|
||||
result.error_message = d.get('error_message')
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class MCMetrics:
|
||||
"""
|
||||
Monte Carlo Metrics Extractor.
|
||||
|
||||
Computes all 48 metrics and 10 classification labels from backtest results.
|
||||
"""
|
||||
|
||||
def __init__(self, initial_capital: float = 25000.0):
|
||||
"""
|
||||
Initialize metrics extractor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
initial_capital : float
|
||||
Initial capital for ROI calculation
|
||||
"""
|
||||
self.initial_capital = initial_capital
|
||||
|
||||
def compute(
|
||||
self,
|
||||
config: MCTrialConfig,
|
||||
trades: List[Dict],
|
||||
daily_pnls: List[float],
|
||||
date_stats: List[Dict],
|
||||
signal_stats: Dict[str, Any],
|
||||
execution_time_sec: float = 0.0
|
||||
) -> MCTrialResult:
|
||||
"""
|
||||
Compute all metrics from backtest results.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : MCTrialConfig
|
||||
Trial configuration
|
||||
trades : List[Dict]
|
||||
Trade records with keys: pnl, pnl_pct, exit_type, bars_held, etc.
|
||||
daily_pnls : List[float]
|
||||
Daily P&L values
|
||||
date_stats : List[Dict]
|
||||
Per-date statistics
|
||||
signal_stats : Dict[str, Any]
|
||||
Signal processing statistics
|
||||
execution_time_sec : float
|
||||
Trial execution time
|
||||
|
||||
Returns
|
||||
-------
|
||||
MCTrialResult
|
||||
Complete trial result with all metrics
|
||||
"""
|
||||
result = MCTrialResult(trial_id=config.trial_id, config=config)
|
||||
result.execution_time_sec = execution_time_sec
|
||||
|
||||
# Compute metrics
|
||||
self._compute_performance_metrics(result, trades, daily_pnls, date_stats)
|
||||
self._compute_risk_metrics(result, trades, daily_pnls)
|
||||
self._compute_signal_metrics(result, signal_stats)
|
||||
self._compute_capital_metrics(result, daily_pnls)
|
||||
self._compute_regime_metrics(result, daily_pnls)
|
||||
|
||||
# Compute labels
|
||||
result.compute_labels()
|
||||
|
||||
result.status = "completed"
|
||||
return result
|
||||
|
||||
def _compute_performance_metrics(
|
||||
self,
|
||||
result: MCTrialResult,
|
||||
trades: List[Dict],
|
||||
daily_pnls: List[float],
|
||||
date_stats: List[Dict]
|
||||
):
|
||||
"""Compute M01-M15: Primary Performance Metrics."""
|
||||
n_trades = len(trades)
|
||||
result.n_trades = n_trades
|
||||
|
||||
if n_trades == 0:
|
||||
# No trades - all metrics stay at defaults
|
||||
return
|
||||
|
||||
# Win/loss separation
|
||||
winning_trades = [t for t in trades if t.get('pnl', 0) > 0]
|
||||
losing_trades = [t for t in trades if t.get('pnl', 0) <= 0]
|
||||
|
||||
n_wins = len(winning_trades)
|
||||
n_losses = len(losing_trades)
|
||||
|
||||
# M01: roi_pct
|
||||
final_capital = self.initial_capital + sum(daily_pnls) if daily_pnls else self.initial_capital
|
||||
result.roi_pct = (final_capital - self.initial_capital) / self.initial_capital * 100
|
||||
|
||||
# M02: profit_factor
|
||||
gross_wins = sum(t.get('pnl', 0) for t in winning_trades)
|
||||
gross_losses = abs(sum(t.get('pnl', 0) for t in losing_trades))
|
||||
result.profit_factor = gross_wins / gross_losses if gross_losses > 0 else float('inf')
|
||||
|
||||
# M03: win_rate
|
||||
result.win_rate = n_wins / n_trades if n_trades > 0 else 0
|
||||
|
||||
# M05: max_drawdown_pct
|
||||
result.max_drawdown_pct = self._compute_max_drawdown_pct(daily_pnls)
|
||||
|
||||
# M06: sharpe_ratio (annualized)
|
||||
result.sharpe_ratio = self._compute_sharpe_ratio(daily_pnls)
|
||||
|
||||
# M07: sortino_ratio
|
||||
result.sortino_ratio = self._compute_sortino_ratio(daily_pnls)
|
||||
|
||||
# M08: calmar_ratio
|
||||
result.calmar_ratio = result.roi_pct / result.max_drawdown_pct if result.max_drawdown_pct > 0 else float('inf')
|
||||
|
||||
# M09: avg_win_pct
|
||||
win_pnls_pct = [t.get('pnl_pct', 0) * 100 for t in winning_trades]
|
||||
result.avg_win_pct = np.mean(win_pnls_pct) if win_pnls_pct else 0
|
||||
|
||||
# M10: avg_loss_pct
|
||||
loss_pnls_pct = [t.get('pnl_pct', 0) * 100 for t in losing_trades]
|
||||
result.avg_loss_pct = np.mean(loss_pnls_pct) if loss_pnls_pct else 0
|
||||
|
||||
# M11: win_loss_ratio
|
||||
result.win_loss_ratio = abs(result.avg_win_pct / result.avg_loss_pct) if result.avg_loss_pct != 0 else float('inf')
|
||||
|
||||
# M12: expectancy_pct
|
||||
wr = result.win_rate
|
||||
result.expectancy_pct = wr * result.avg_win_pct + (1 - wr) * result.avg_loss_pct
|
||||
|
||||
# M13-M15: H1/H2 metrics
|
||||
if len(date_stats) >= 2:
|
||||
mid = len(date_stats) // 2
|
||||
h1_pnl = sum(d.get('pnl', 0) for d in date_stats[:mid])
|
||||
h2_pnl = sum(d.get('pnl', 0) for d in date_stats[mid:])
|
||||
h1_capital = self.initial_capital + h1_pnl
|
||||
|
||||
result.h1_roi_pct = h1_pnl / self.initial_capital * 100
|
||||
result.h2_roi_pct = h2_pnl / self.initial_capital * 100
|
||||
result.h2_h1_ratio = h2_pnl / h1_pnl if h1_pnl != 0 else 0
|
||||
|
||||
def _compute_risk_metrics(
|
||||
self,
|
||||
result: MCTrialResult,
|
||||
trades: List[Dict],
|
||||
daily_pnls: List[float]
|
||||
):
|
||||
"""Compute M16-M32: Risk / Stability Metrics."""
|
||||
# M16: n_consecutive_losses_max
|
||||
result.n_consecutive_losses_max = self._compute_max_consecutive_losses(trades)
|
||||
|
||||
# M17-M19: Exit type counts
|
||||
result.n_stop_exits = sum(1 for t in trades if t.get('exit_type') == 'stop')
|
||||
result.n_tp_exits = sum(1 for t in trades if t.get('exit_type') == 'tp')
|
||||
result.n_hold_exits = sum(1 for t in trades if t.get('exit_type') == 'hold')
|
||||
|
||||
# M20-M22: Exit rates
|
||||
n_trades = len(trades)
|
||||
if n_trades > 0:
|
||||
result.stop_rate = result.n_stop_exits / n_trades
|
||||
result.tp_rate = result.n_tp_exits / n_trades
|
||||
result.hold_rate = result.n_hold_exits / n_trades
|
||||
|
||||
# M23: avg_hold_bars
|
||||
hold_bars = [t.get('bars_held', 0) for t in trades]
|
||||
result.avg_hold_bars = np.mean(hold_bars) if hold_bars else 0
|
||||
|
||||
# M24-M26: Daily P&L distribution stats
|
||||
if len(daily_pnls) >= 2:
|
||||
result.vol_of_daily_pnl = np.std(daily_pnls, ddof=1)
|
||||
result.skew_daily_pnl = self._compute_skewness(daily_pnls)
|
||||
result.kurtosis_daily_pnl = self._compute_kurtosis(daily_pnls)
|
||||
|
||||
# M27-M28: Best/worst day
|
||||
if daily_pnls:
|
||||
result.worst_day_pct = min(daily_pnls) / self.initial_capital * 100
|
||||
result.best_day_pct = max(daily_pnls) / self.initial_capital * 100
|
||||
|
||||
# M29-M31: Profitable days
|
||||
result.n_days_profitable = sum(1 for pnl in daily_pnls if pnl > 0)
|
||||
result.n_days_loss = sum(1 for pnl in daily_pnls if pnl <= 0)
|
||||
if daily_pnls:
|
||||
result.profitable_day_rate = result.n_days_profitable / len(daily_pnls)
|
||||
|
||||
# M32: max_daily_drawdown_pct
|
||||
result.max_daily_drawdown_pct = self._compute_max_daily_drawdown_pct(daily_pnls)
|
||||
|
||||
def _compute_signal_metrics(
|
||||
self,
|
||||
result: MCTrialResult,
|
||||
signal_stats: Dict[str, Any]
|
||||
):
|
||||
"""Compute M33-M38: Signal Quality Metrics."""
|
||||
result.dc_skip_rate = signal_stats.get('dc_skip_rate', 0)
|
||||
result.ob_skip_rate = signal_stats.get('ob_skip_rate', 0)
|
||||
result.dc_confirm_rate = signal_stats.get('dc_confirm_rate', 0)
|
||||
result.irp_match_rate = signal_stats.get('irp_match_rate', 0)
|
||||
result.entry_attempt_rate = signal_stats.get('entry_attempt_rate', 0)
|
||||
result.signal_to_trade_rate = signal_stats.get('signal_to_trade_rate', 0)
|
||||
|
||||
def _compute_capital_metrics(
|
||||
self,
|
||||
result: MCTrialResult,
|
||||
daily_pnls: List[float]
|
||||
):
|
||||
"""Compute M39-M43: Capital Path Metrics."""
|
||||
if len(daily_pnls) < 2:
|
||||
return
|
||||
|
||||
# Compute equity curve
|
||||
equity = [self.initial_capital]
|
||||
for pnl in daily_pnls:
|
||||
equity.append(equity[-1] + pnl)
|
||||
|
||||
# M39: equity_curve_slope (linear regression)
|
||||
days = np.arange(len(equity))
|
||||
result.equity_curve_slope, result.equity_curve_r2 = self._linear_regression(days, equity)
|
||||
|
||||
# M41: equity_curve_autocorr
|
||||
returns = np.diff(equity) / equity[:-1]
|
||||
if len(returns) > 1:
|
||||
result.equity_curve_autocorr = np.corrcoef(returns[:-1], returns[1:])[0, 1] if len(returns) > 2 else 0
|
||||
|
||||
# M42: max_underwater_days
|
||||
result.max_underwater_days = self._compute_max_underwater_days(equity)
|
||||
|
||||
# M43: recovery_factor
|
||||
total_return = sum(daily_pnls)
|
||||
max_dd = self._compute_max_drawdown_value(daily_pnls)
|
||||
result.recovery_factor = total_return / max_dd if max_dd > 0 else float('inf')
|
||||
|
||||
def _compute_regime_metrics(
|
||||
self,
|
||||
result: MCTrialResult,
|
||||
daily_pnls: List[float]
|
||||
):
|
||||
"""Compute M44-M48: Regime Metrics."""
|
||||
if len(daily_pnls) < 2:
|
||||
return
|
||||
|
||||
# M44: date_pnl_std
|
||||
result.date_pnl_std = np.std(daily_pnls, ddof=1)
|
||||
|
||||
# M45: date_pnl_range
|
||||
result.date_pnl_range = max(daily_pnls) - min(daily_pnls)
|
||||
|
||||
# M46-M47: Quantiles
|
||||
result.q10_date_pnl = np.percentile(daily_pnls, 10)
|
||||
result.q90_date_pnl = np.percentile(daily_pnls, 90)
|
||||
|
||||
# M48: tail_ratio
|
||||
if result.q90_date_pnl != 0:
|
||||
result.tail_ratio = abs(result.q10_date_pnl) / abs(result.q90_date_pnl)
|
||||
|
||||
# --- Helper Methods ---
|
||||
|
||||
def _compute_max_drawdown_pct(self, daily_pnls: List[float]) -> float:
|
||||
"""Compute maximum drawdown as percentage."""
|
||||
if not daily_pnls:
|
||||
return 0
|
||||
|
||||
equity = [self.initial_capital]
|
||||
for pnl in daily_pnls:
|
||||
equity.append(equity[-1] + pnl)
|
||||
|
||||
peak = equity[0]
|
||||
max_dd = 0
|
||||
|
||||
for e in equity:
|
||||
if e > peak:
|
||||
peak = e
|
||||
dd = (peak - e) / peak
|
||||
max_dd = max(max_dd, dd)
|
||||
|
||||
return max_dd * 100
|
||||
|
||||
def _compute_max_drawdown_value(self, daily_pnls: List[float]) -> float:
|
||||
"""Compute maximum drawdown as value."""
|
||||
if not daily_pnls:
|
||||
return 0
|
||||
|
||||
equity = [self.initial_capital]
|
||||
for pnl in daily_pnls:
|
||||
equity.append(equity[-1] + pnl)
|
||||
|
||||
peak = equity[0]
|
||||
max_dd = 0
|
||||
|
||||
for e in equity:
|
||||
if e > peak:
|
||||
peak = e
|
||||
dd = peak - e
|
||||
max_dd = max(max_dd, dd)
|
||||
|
||||
return max_dd
|
||||
|
||||
def _compute_sharpe_ratio(self, daily_pnls: List[float]) -> float:
|
||||
"""Compute annualized Sharpe ratio."""
|
||||
if len(daily_pnls) < 2:
|
||||
return 0
|
||||
|
||||
returns = [p / self.initial_capital for p in daily_pnls]
|
||||
mean_ret = np.mean(returns)
|
||||
std_ret = np.std(returns, ddof=1)
|
||||
|
||||
if std_ret == 0:
|
||||
return 0
|
||||
|
||||
# Annualize (assuming 365 trading days)
|
||||
return (mean_ret / std_ret) * np.sqrt(365)
|
||||
|
||||
def _compute_sortino_ratio(self, daily_pnls: List[float]) -> float:
|
||||
"""Compute annualized Sortino ratio."""
|
||||
if len(daily_pnls) < 2:
|
||||
return 0
|
||||
|
||||
returns = [p / self.initial_capital for p in daily_pnls]
|
||||
mean_ret = np.mean(returns)
|
||||
|
||||
# Downside deviation (only negative returns)
|
||||
downside_returns = [r for r in returns if r < 0]
|
||||
if not downside_returns:
|
||||
return float('inf')
|
||||
|
||||
downside_std = np.std(downside_returns, ddof=1)
|
||||
|
||||
if downside_std == 0:
|
||||
return float('inf')
|
||||
|
||||
return (mean_ret / downside_std) * np.sqrt(365)
|
||||
|
||||
def _compute_max_consecutive_losses(self, trades: List[Dict]) -> int:
|
||||
"""Compute maximum consecutive losing trades."""
|
||||
max_consec = 0
|
||||
current_consec = 0
|
||||
|
||||
for trade in trades:
|
||||
if trade.get('pnl', 0) <= 0:
|
||||
current_consec += 1
|
||||
max_consec = max(max_consec, current_consec)
|
||||
else:
|
||||
current_consec = 0
|
||||
|
||||
return max_consec
|
||||
|
||||
def _compute_skewness(self, data: List[float]) -> float:
|
||||
"""Compute skewness."""
|
||||
if len(data) < 3:
|
||||
return 0
|
||||
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
std = np.std(data, ddof=1)
|
||||
|
||||
if std == 0:
|
||||
return 0
|
||||
|
||||
skew = sum(((x - mean) / std) ** 3 for x in data) * n / ((n - 1) * (n - 2))
|
||||
return skew
|
||||
|
||||
def _compute_kurtosis(self, data: List[float]) -> float:
|
||||
"""Compute excess kurtosis."""
|
||||
if len(data) < 4:
|
||||
return 0
|
||||
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
std = np.std(data, ddof=1)
|
||||
|
||||
if std == 0:
|
||||
return 0
|
||||
|
||||
kurt = sum(((x - mean) / std) ** 4 for x in data) * n * (n + 1) / ((n - 1) * (n - 2) * (n - 3))
|
||||
kurt -= 3 * (n - 1) ** 2 / ((n - 2) * (n - 3))
|
||||
return kurt
|
||||
|
||||
def _linear_regression(self, x: np.ndarray, y: List[float]) -> Tuple[float, float]:
|
||||
"""Simple linear regression. Returns (slope, r_squared)."""
|
||||
if len(x) < 2:
|
||||
return 0, 0
|
||||
|
||||
x_mean = np.mean(x)
|
||||
y_mean = np.mean(y)
|
||||
|
||||
numerator = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, y))
|
||||
denom_x = sum((xi - x_mean) ** 2 for xi in x)
|
||||
denom_y = sum((yi - y_mean) ** 2 for yi in y)
|
||||
|
||||
if denom_x == 0:
|
||||
return 0, 0
|
||||
|
||||
slope = numerator / denom_x
|
||||
|
||||
if denom_y == 0:
|
||||
r_squared = 0
|
||||
else:
|
||||
r_squared = (numerator ** 2) / (denom_x * denom_y)
|
||||
|
||||
return slope, r_squared
|
||||
|
||||
def _compute_max_underwater_days(self, equity: List[float]) -> int:
|
||||
"""Compute maximum consecutive days in drawdown."""
|
||||
max_underwater = 0
|
||||
current_underwater = 0
|
||||
peak = equity[0]
|
||||
|
||||
for e in equity:
|
||||
if e >= peak:
|
||||
peak = e
|
||||
current_underwater = 0
|
||||
else:
|
||||
current_underwater += 1
|
||||
max_underwater = max(max_underwater, current_underwater)
|
||||
|
||||
return max_underwater
|
||||
|
||||
def _compute_max_daily_drawdown_pct(self, daily_pnls: List[float]) -> float:
|
||||
"""Compute worst single-day drawdown percentage."""
|
||||
if not daily_pnls:
|
||||
return 0
|
||||
|
||||
equity = [self.initial_capital]
|
||||
for pnl in daily_pnls:
|
||||
equity.append(equity[-1] + pnl)
|
||||
|
||||
max_dd_pct = 0
|
||||
for i in range(1, len(equity)):
|
||||
prev_equity = equity[i-1]
|
||||
if prev_equity > 0:
|
||||
dd_pct = min(0, daily_pnls[i-1]) / prev_equity * 100
|
||||
max_dd_pct = min(max_dd_pct, dd_pct)
|
||||
|
||||
return max_dd_pct
|
||||
|
||||
|
||||
def test_metrics():
|
||||
"""Quick test of metrics computation."""
|
||||
from .mc_sampler import MCSampler
|
||||
|
||||
sampler = MCSampler()
|
||||
config = sampler.generate_champion_trial()
|
||||
|
||||
# Create dummy data
|
||||
trades = [
|
||||
{'pnl': 100, 'pnl_pct': 0.004, 'exit_type': 'tp', 'bars_held': 50},
|
||||
{'pnl': -50, 'pnl_pct': -0.002, 'exit_type': 'stop', 'bars_held': 20},
|
||||
{'pnl': 150, 'pnl_pct': 0.006, 'exit_type': 'tp', 'bars_held': 80},
|
||||
] * 20 # 60 trades
|
||||
|
||||
daily_pnls = [50, -20, 80, -10, 100, -30, 60, 40, -15, 90] * 5 # 50 days
|
||||
|
||||
date_stats = [{'date': f'2026-01-{i+1:02d}', 'pnl': daily_pnls[i]} for i in range(len(daily_pnls))]
|
||||
|
||||
signal_stats = {
|
||||
'dc_skip_rate': 0.1,
|
||||
'ob_skip_rate': 0.05,
|
||||
'dc_confirm_rate': 0.7,
|
||||
'irp_match_rate': 0.6,
|
||||
'entry_attempt_rate': 0.3,
|
||||
'signal_to_trade_rate': 0.15,
|
||||
}
|
||||
|
||||
metrics = MCMetrics()
|
||||
result = metrics.compute(config, trades, daily_pnls, date_stats, signal_stats)
|
||||
|
||||
print("Test Metrics Result:")
|
||||
print(f" ROI: {result.roi_pct:.2f}%")
|
||||
print(f" Profit Factor: {result.profit_factor:.2f}")
|
||||
print(f" Win Rate: {result.win_rate:.2%}")
|
||||
print(f" Sharpe: {result.sharpe_ratio:.2f}")
|
||||
print(f" Max DD: {result.max_drawdown_pct:.2f}%")
|
||||
print(f" Champion Region: {result.champion_region}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_metrics()
|
||||
505
nautilus_dolphin/mc/mc_ml.py
Executable file
505
nautilus_dolphin/mc/mc_ml.py
Executable file
@@ -0,0 +1,505 @@
|
||||
"""
|
||||
Monte Carlo ML Envelope Learning
|
||||
================================
|
||||
|
||||
Train ML models on MC results for envelope boundary estimation and forewarning.
|
||||
|
||||
Models:
|
||||
- Regression models for ROI, DD, PF, WR prediction
|
||||
- Classification models for champion_region, catastrophic
|
||||
- One-Class SVM for envelope boundary estimation
|
||||
- SHAP for feature importance
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 9, 12
|
||||
"""
|
||||
|
||||
import json
|
||||
import pickle
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
# Try to import ML libraries
|
||||
try:
|
||||
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
|
||||
from sklearn.svm import OneClassSVM
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
SKLEARN_AVAILABLE = True
|
||||
except ImportError:
|
||||
SKLEARN_AVAILABLE = False
|
||||
print("[WARN] scikit-learn not available - ML training disabled")
|
||||
|
||||
try:
|
||||
import xgboost as xgb
|
||||
XGBOOST_AVAILABLE = True
|
||||
except ImportError:
|
||||
XGBOOST_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import shap
|
||||
SHAP_AVAILABLE = True
|
||||
except ImportError:
|
||||
SHAP_AVAILABLE = False
|
||||
|
||||
from .mc_sampler import MCTrialConfig, MCSampler
|
||||
from .mc_store import MCStore
|
||||
|
||||
|
||||
@dataclass
|
||||
class ForewarningReport:
|
||||
"""Forewarning report for a configuration."""
|
||||
config: Dict[str, Any]
|
||||
predicted_roi: float
|
||||
predicted_roi_p10: float
|
||||
predicted_roi_p90: float
|
||||
predicted_max_dd: float
|
||||
champion_probability: float
|
||||
catastrophic_probability: float
|
||||
envelope_score: float
|
||||
warnings: List[str]
|
||||
nearest_champion: Optional[Dict[str, Any]]
|
||||
parameter_risks: Dict[str, float]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'config': self.config,
|
||||
'predicted_roi': self.predicted_roi,
|
||||
'predicted_roi_p10': self.predicted_roi_p10,
|
||||
'predicted_roi_p90': self.predicted_roi_p90,
|
||||
'predicted_max_dd': self.predicted_max_dd,
|
||||
'champion_probability': self.champion_probability,
|
||||
'catastrophic_probability': self.catastrophic_probability,
|
||||
'envelope_score': self.envelope_score,
|
||||
'warnings': self.warnings,
|
||||
'nearest_champion': self.nearest_champion,
|
||||
'parameter_risks': self.parameter_risks,
|
||||
}
|
||||
|
||||
|
||||
class MCML:
|
||||
"""
|
||||
Monte Carlo ML Envelope Learning.
|
||||
|
||||
Trains models on MC results and provides forewarning capabilities.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str = "mc_results",
|
||||
models_dir: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Initialize ML trainer.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_dir : str
|
||||
MC results directory
|
||||
models_dir : str, optional
|
||||
Directory to save trained models
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.models_dir = Path(models_dir) if models_dir else self.output_dir / "models"
|
||||
self.models_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.store = MCStore(output_dir=output_dir)
|
||||
|
||||
# Models
|
||||
self.models: Dict[str, Any] = {}
|
||||
self.scalers: Dict[str, StandardScaler] = {}
|
||||
self.feature_names: List[str] = []
|
||||
|
||||
self._init_feature_names()
|
||||
|
||||
def _init_feature_names(self):
|
||||
"""Initialize feature names from parameter space."""
|
||||
sampler = MCSampler()
|
||||
self.feature_names = list(sampler.CHAMPION.keys())
|
||||
|
||||
def load_corpus(self) -> Optional[Any]:
|
||||
"""Load full corpus from store."""
|
||||
return self.store.load_corpus()
|
||||
|
||||
def train_all_models(self, test_size: float = 0.2) -> Dict[str, Any]:
|
||||
"""
|
||||
Train all ML models on the corpus.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
test_size : float
|
||||
Fraction of data for testing
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, Any]
|
||||
Training results and metrics
|
||||
"""
|
||||
if not SKLEARN_AVAILABLE:
|
||||
raise RuntimeError("scikit-learn required for training")
|
||||
|
||||
print("="*70)
|
||||
print("TRAINING ML MODELS")
|
||||
print("="*70)
|
||||
|
||||
# Load corpus
|
||||
print("\n[1/6] Loading corpus...")
|
||||
df = self.load_corpus()
|
||||
if df is None or len(df) == 0:
|
||||
raise ValueError("No corpus data available")
|
||||
|
||||
print(f" Loaded {len(df)} trials")
|
||||
|
||||
# Prepare features
|
||||
print("\n[2/6] Preparing features...")
|
||||
X = self._extract_features(df)
|
||||
|
||||
# Train regression models
|
||||
print("\n[3/6] Training regression models...")
|
||||
self._train_regression_model(X, df, 'M_roi_pct', 'model_roi')
|
||||
self._train_regression_model(X, df, 'M_max_drawdown_pct', 'model_dd')
|
||||
self._train_regression_model(X, df, 'M_profit_factor', 'model_pf')
|
||||
self._train_regression_model(X, df, 'M_win_rate', 'model_wr')
|
||||
|
||||
# Train classification models
|
||||
print("\n[4/6] Training classification models...")
|
||||
self._train_classification_model(X, df, 'L_champion_region', 'model_champ')
|
||||
self._train_classification_model(X, df, 'L_catastrophic', 'model_catas')
|
||||
self._train_classification_model(X, df, 'L_inert', 'model_inert')
|
||||
self._train_classification_model(X, df, 'L_h2_degradation', 'model_h2deg')
|
||||
|
||||
# Train envelope model (One-Class SVM on champions)
|
||||
print("\n[5/6] Training envelope boundary model...")
|
||||
self._train_envelope_model(X, df)
|
||||
|
||||
# Save models
|
||||
print("\n[6/6] Saving models...")
|
||||
self._save_models()
|
||||
|
||||
print("\n[OK] All models trained and saved")
|
||||
|
||||
return {'status': 'success', 'n_samples': len(df)}
|
||||
|
||||
def _extract_features(self, df: Any) -> np.ndarray:
|
||||
"""Extract feature matrix from DataFrame."""
|
||||
# Get parameter columns
|
||||
param_cols = [f'P_{name}' for name in self.feature_names if f'P_{name}' in df.columns]
|
||||
|
||||
# Extract and normalize
|
||||
X = df[param_cols].values
|
||||
|
||||
# Standardize
|
||||
scaler = StandardScaler()
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
self.scalers['default'] = scaler
|
||||
|
||||
return X_scaled
|
||||
|
||||
def _train_regression_model(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
df: Any,
|
||||
target_col: str,
|
||||
model_name: str
|
||||
):
|
||||
"""Train a regression model."""
|
||||
if target_col not in df.columns:
|
||||
print(f" [SKIP] {model_name}: target column not found")
|
||||
return
|
||||
|
||||
y = df[target_col].values
|
||||
|
||||
# Split
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42
|
||||
)
|
||||
|
||||
# Train
|
||||
model = GradientBoostingRegressor(
|
||||
n_estimators=100,
|
||||
max_depth=5,
|
||||
learning_rate=0.1,
|
||||
random_state=42
|
||||
)
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# Evaluate
|
||||
train_score = model.score(X_train, y_train)
|
||||
test_score = model.score(X_test, y_test)
|
||||
|
||||
print(f" {model_name}: R² train={train_score:.3f}, test={test_score:.3f}")
|
||||
|
||||
self.models[model_name] = model
|
||||
|
||||
def _train_classification_model(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
df: Any,
|
||||
target_col: str,
|
||||
model_name: str
|
||||
):
|
||||
"""Train a classification model."""
|
||||
if target_col not in df.columns:
|
||||
print(f" [SKIP] {model_name}: target column not found")
|
||||
return
|
||||
|
||||
y = df[target_col].astype(int).values
|
||||
|
||||
# Check if we have both classes
|
||||
if len(set(y)) < 2:
|
||||
print(f" [SKIP] {model_name}: only one class present")
|
||||
return
|
||||
|
||||
# Split
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42, stratify=y
|
||||
)
|
||||
|
||||
# Train with XGBoost if available, else RandomForest
|
||||
if XGBOOST_AVAILABLE:
|
||||
model = xgb.XGBClassifier(
|
||||
n_estimators=100,
|
||||
max_depth=5,
|
||||
learning_rate=0.1,
|
||||
random_state=42,
|
||||
use_label_encoder=False,
|
||||
eval_metric='logloss'
|
||||
)
|
||||
else:
|
||||
model = RandomForestClassifier(
|
||||
n_estimators=100,
|
||||
max_depth=5,
|
||||
random_state=42
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# Evaluate
|
||||
y_pred = model.predict(X_test)
|
||||
acc = accuracy_score(y_test, y_pred)
|
||||
|
||||
print(f" {model_name}: accuracy={acc:.3f}")
|
||||
|
||||
self.models[model_name] = model
|
||||
|
||||
def _train_envelope_model(self, X: np.ndarray, df: Any):
|
||||
"""Train One-Class SVM on champion region configurations."""
|
||||
if 'L_champion_region' not in df.columns:
|
||||
print(" [SKIP] envelope: champion_region column not found")
|
||||
return
|
||||
|
||||
# Filter to champions
|
||||
champion_mask = df['L_champion_region'].astype(bool)
|
||||
X_champions = X[champion_mask]
|
||||
|
||||
if len(X_champions) < 100:
|
||||
print(f" [SKIP] envelope: only {len(X_champions)} champions (need 100+)")
|
||||
return
|
||||
|
||||
print(f" Training on {len(X_champions)} champion configurations")
|
||||
|
||||
# Train One-Class SVM
|
||||
model = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
|
||||
model.fit(X_champions)
|
||||
|
||||
self.models['envelope'] = model
|
||||
print(f" Envelope model trained")
|
||||
|
||||
def _save_models(self):
|
||||
"""Save all trained models."""
|
||||
# Save models
|
||||
for name, model in self.models.items():
|
||||
path = self.models_dir / f"{name}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(model, f)
|
||||
|
||||
# Save scalers
|
||||
for name, scaler in self.scalers.items():
|
||||
path = self.models_dir / f"scaler_{name}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(scaler, f)
|
||||
|
||||
# Save feature names
|
||||
with open(self.models_dir / "feature_names.json", 'w') as f:
|
||||
json.dump(self.feature_names, f)
|
||||
|
||||
print(f" Saved {len(self.models)} models to {self.models_dir}")
|
||||
|
||||
def load_models(self):
|
||||
"""Load trained models from disk."""
|
||||
# Load feature names
|
||||
with open(self.models_dir / "feature_names.json", 'r') as f:
|
||||
self.feature_names = json.load(f)
|
||||
|
||||
# Load models — skip any that fail (e.g. XGBoost pickle when xgboost not installed)
|
||||
model_files = list(self.models_dir.glob("*.pkl"))
|
||||
for path in model_files:
|
||||
if 'scaler_' in path.name:
|
||||
continue
|
||||
try:
|
||||
with open(path, 'rb') as f:
|
||||
self.models[path.stem] = pickle.load(f)
|
||||
except Exception as e:
|
||||
print(f" [WARN] Skipping {path.name}: {e}")
|
||||
|
||||
# Load scalers
|
||||
for path in self.models_dir.glob("scaler_*.pkl"):
|
||||
name = path.stem.replace('scaler_', '')
|
||||
try:
|
||||
with open(path, 'rb') as f:
|
||||
self.scalers[name] = pickle.load(f)
|
||||
except Exception as e:
|
||||
print(f" [WARN] Skipping scaler {path.name}: {e}")
|
||||
|
||||
loaded = list(self.models.keys())
|
||||
print(f"[OK] Loaded {len(loaded)} models: {loaded}")
|
||||
|
||||
def predict(self, config: MCTrialConfig) -> Dict[str, float]:
|
||||
"""
|
||||
Make predictions for a configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : MCTrialConfig
|
||||
Configuration to predict
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, float]
|
||||
Predictions for all targets
|
||||
"""
|
||||
if not self.models:
|
||||
self.load_models()
|
||||
|
||||
# Extract features
|
||||
X = self._config_to_features(config)
|
||||
|
||||
predictions = {}
|
||||
|
||||
# Regression predictions
|
||||
if 'model_roi' in self.models:
|
||||
predictions['roi'] = self.models['model_roi'].predict(X)[0]
|
||||
if 'model_dd' in self.models:
|
||||
predictions['max_dd'] = self.models['model_dd'].predict(X)[0]
|
||||
if 'model_pf' in self.models:
|
||||
predictions['profit_factor'] = self.models['model_pf'].predict(X)[0]
|
||||
if 'model_wr' in self.models:
|
||||
predictions['win_rate'] = self.models['model_wr'].predict(X)[0]
|
||||
|
||||
# Classification predictions (probability of positive class)
|
||||
if 'model_champ' in self.models:
|
||||
if hasattr(self.models['model_champ'], 'predict_proba'):
|
||||
predictions['champion_prob'] = self.models['model_champ'].predict_proba(X)[0, 1]
|
||||
else:
|
||||
predictions['champion_prob'] = float(self.models['model_champ'].predict(X)[0])
|
||||
|
||||
if 'model_catas' in self.models:
|
||||
if hasattr(self.models['model_catas'], 'predict_proba'):
|
||||
predictions['catastrophic_prob'] = self.models['model_catas'].predict_proba(X)[0, 1]
|
||||
else:
|
||||
predictions['catastrophic_prob'] = float(self.models['model_catas'].predict(X)[0])
|
||||
|
||||
# Envelope score
|
||||
if 'envelope' in self.models:
|
||||
predictions['envelope_score'] = self.models['envelope'].decision_function(X)[0]
|
||||
|
||||
return predictions
|
||||
|
||||
def _config_to_features(self, config: MCTrialConfig) -> np.ndarray:
|
||||
"""Convert config to feature vector."""
|
||||
features = []
|
||||
for name in self.feature_names:
|
||||
value = getattr(config, name, MCSampler.CHAMPION[name])
|
||||
features.append(value)
|
||||
|
||||
X = np.array([features])
|
||||
|
||||
# Scale
|
||||
if 'default' in self.scalers:
|
||||
X = self.scalers['default'].transform(X)
|
||||
|
||||
return X
|
||||
|
||||
|
||||
class DolphinForewarner:
|
||||
"""
|
||||
Live forewarning system for Dolphin configurations.
|
||||
|
||||
Provides risk assessment based on trained MC envelope model.
|
||||
"""
|
||||
|
||||
def __init__(self, models_dir: str = "mc_results/models"):
|
||||
"""
|
||||
Initialize forewarner.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
models_dir : str
|
||||
Directory with trained models
|
||||
"""
|
||||
self.ml = MCML(models_dir=models_dir)
|
||||
self.ml.load_models()
|
||||
|
||||
def assess(self, config: MCTrialConfig) -> ForewarningReport:
|
||||
"""
|
||||
Assess a configuration and return forewarning report.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : MCTrialConfig
|
||||
Configuration to assess
|
||||
|
||||
Returns
|
||||
-------
|
||||
ForewarningReport
|
||||
Complete risk assessment
|
||||
"""
|
||||
# Get predictions
|
||||
preds = self.ml.predict(config)
|
||||
|
||||
# Build warnings
|
||||
warnings = []
|
||||
|
||||
if preds.get('catastrophic_prob', 0) > 0.10:
|
||||
warnings.append(f"Catastrophic risk: {preds['catastrophic_prob']:.1%}")
|
||||
|
||||
if preds.get('envelope_score', 0) < 0:
|
||||
warnings.append("Configuration outside safe operating envelope")
|
||||
|
||||
# Check parameter boundaries
|
||||
if config.max_leverage > 6.0:
|
||||
warnings.append(f"High leverage: {config.max_leverage:.1f}x")
|
||||
|
||||
if config.fraction * config.max_leverage > 1.5:
|
||||
warnings.append(f"High notional exposure: {config.fraction * config.max_leverage:.2f}x")
|
||||
|
||||
# Create report
|
||||
report = ForewarningReport(
|
||||
config=config.to_dict(),
|
||||
predicted_roi=preds.get('roi', 0),
|
||||
predicted_roi_p10=preds.get('roi', 0) * 0.5, # Simplified
|
||||
predicted_roi_p90=preds.get('roi', 0) * 1.5,
|
||||
predicted_max_dd=preds.get('max_dd', 0),
|
||||
champion_probability=preds.get('champion_prob', 0),
|
||||
catastrophic_probability=preds.get('catastrophic_prob', 0),
|
||||
envelope_score=preds.get('envelope_score', 0),
|
||||
warnings=warnings,
|
||||
nearest_champion=None, # Would require search
|
||||
parameter_risks={}
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
def assess_config_dict(self, config_dict: Dict[str, Any]) -> ForewarningReport:
|
||||
"""Assess from a configuration dictionary."""
|
||||
config = MCTrialConfig.from_dict(config_dict)
|
||||
return self.assess(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
print("MC ML module loaded")
|
||||
print("Run training with: MCML().train_all_models()")
|
||||
395
nautilus_dolphin/mc/mc_runner.py
Executable file
395
nautilus_dolphin/mc/mc_runner.py
Executable file
@@ -0,0 +1,395 @@
|
||||
"""
|
||||
Monte Carlo Runner
|
||||
==================
|
||||
|
||||
Orchestration and parallel execution for MC envelope mapping.
|
||||
|
||||
Features:
|
||||
- Parallel execution using multiprocessing
|
||||
- Checkpointing and resume capability
|
||||
- Batch processing
|
||||
- Progress tracking
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 1, 5.4
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import multiprocessing as mp
|
||||
from functools import partial
|
||||
|
||||
from .mc_sampler import MCSampler, MCTrialConfig
|
||||
from .mc_validator import MCValidator, ValidationResult
|
||||
from .mc_executor import MCExecutor
|
||||
from .mc_store import MCStore
|
||||
from .mc_metrics import MCTrialResult
|
||||
|
||||
|
||||
class MCRunner:
|
||||
"""
|
||||
Monte Carlo Runner.
|
||||
|
||||
Orchestrates the full MC envelope mapping pipeline:
|
||||
1. Generate trial configurations
|
||||
2. Validate configurations
|
||||
3. Execute trials (parallel)
|
||||
4. Store results
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str = "mc_results",
|
||||
n_workers: int = -1,
|
||||
batch_size: int = 1000,
|
||||
base_seed: int = 42,
|
||||
verbose: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize the runner.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_dir : str
|
||||
Directory for results
|
||||
n_workers : int
|
||||
Number of parallel workers (-1 for auto)
|
||||
batch_size : int
|
||||
Trials per batch
|
||||
base_seed : int
|
||||
Master RNG seed
|
||||
verbose : bool
|
||||
Print progress
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.n_workers = n_workers if n_workers > 0 else max(1, mp.cpu_count() - 1)
|
||||
self.batch_size = batch_size
|
||||
self.base_seed = base_seed
|
||||
self.verbose = verbose
|
||||
|
||||
# Components
|
||||
self.sampler = MCSampler(base_seed=base_seed)
|
||||
self.store = MCStore(output_dir=output_dir, batch_size=batch_size)
|
||||
|
||||
# State
|
||||
self.completed_trials: set = set()
|
||||
self.stats: Dict[str, Any] = {}
|
||||
|
||||
def generate_and_validate(
|
||||
self,
|
||||
n_samples_per_switch: int = 500,
|
||||
max_trials: Optional[int] = None
|
||||
) -> List[MCTrialConfig]:
|
||||
"""
|
||||
Generate and validate trial configurations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples_per_switch : int
|
||||
Samples per switch vector
|
||||
max_trials : int, optional
|
||||
Maximum total trials
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[MCTrialConfig]
|
||||
Valid trial configurations
|
||||
"""
|
||||
print("="*70)
|
||||
print("PHASE 1: GENERATE & VALIDATE CONFIGURATIONS")
|
||||
print("="*70)
|
||||
|
||||
# Generate trials
|
||||
print(f"\n[1/3] Generating trials (n_samples_per_switch={n_samples_per_switch})...")
|
||||
all_configs = self.sampler.generate_trials(
|
||||
n_samples_per_switch=n_samples_per_switch,
|
||||
max_trials=max_trials
|
||||
)
|
||||
|
||||
# Validate
|
||||
print(f"\n[2/3] Validating {len(all_configs)} configurations...")
|
||||
validator = MCValidator(verbose=False)
|
||||
validation_results = validator.validate_batch(all_configs)
|
||||
|
||||
# Filter valid configs
|
||||
valid_configs = [
|
||||
config for config, result in zip(all_configs, validation_results)
|
||||
if result.is_valid()
|
||||
]
|
||||
|
||||
# Save validation results
|
||||
self.store.save_validation_results(validation_results, batch_id=0)
|
||||
|
||||
# Stats
|
||||
stats = validator.get_validity_stats(validation_results)
|
||||
print(f"\n[3/3] Validation complete:")
|
||||
print(f" Total: {stats['total']}")
|
||||
print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
|
||||
print(f" Rejected: {stats['total'] - stats['valid']}")
|
||||
|
||||
self.stats['validation'] = stats
|
||||
|
||||
return valid_configs
|
||||
|
||||
def run_envelope_mapping(
|
||||
self,
|
||||
n_samples_per_switch: int = 500,
|
||||
max_trials: Optional[int] = None,
|
||||
resume: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run full envelope mapping.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples_per_switch : int
|
||||
Samples per switch vector
|
||||
max_trials : int, optional
|
||||
Maximum total trials
|
||||
resume : bool
|
||||
Resume from existing results
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, Any]
|
||||
Run statistics
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Generate and validate
|
||||
valid_configs = self.generate_and_validate(
|
||||
n_samples_per_switch=n_samples_per_switch,
|
||||
max_trials=max_trials
|
||||
)
|
||||
|
||||
# Check for resume
|
||||
if resume:
|
||||
self._load_completed_trials()
|
||||
valid_configs = [c for c in valid_configs if c.trial_id not in self.completed_trials]
|
||||
print(f"\n[Resume] {len(self.completed_trials)} trials already completed")
|
||||
print(f"[Resume] {len(valid_configs)} trials remaining")
|
||||
|
||||
if not valid_configs:
|
||||
print("\n[OK] All trials already completed!")
|
||||
return self._get_run_stats(start_time)
|
||||
|
||||
# Execute trials
|
||||
print("\n" + "="*70)
|
||||
print("PHASE 2: EXECUTE TRIALS")
|
||||
print("="*70)
|
||||
print(f"\nRunning {len(valid_configs)} trials with {self.n_workers} workers...")
|
||||
|
||||
# Split into batches
|
||||
batches = self._split_into_batches(valid_configs)
|
||||
print(f"Split into {len(batches)} batches (batch_size={self.batch_size})")
|
||||
|
||||
# Process batches
|
||||
total_completed = 0
|
||||
for batch_idx, batch_configs in enumerate(batches):
|
||||
print(f"\n--- Batch {batch_idx+1}/{len(batches)} ({len(batch_configs)} trials) ---")
|
||||
|
||||
batch_start = time.time()
|
||||
|
||||
if self.n_workers > 1 and len(batch_configs) > 1:
|
||||
# Parallel execution
|
||||
results = self._execute_parallel(batch_configs)
|
||||
else:
|
||||
# Sequential execution
|
||||
results = self._execute_sequential(batch_configs)
|
||||
|
||||
# Save results
|
||||
self.store.save_trial_results(results, batch_id=batch_idx+1)
|
||||
|
||||
batch_time = time.time() - batch_start
|
||||
total_completed += len(results)
|
||||
|
||||
print(f"Batch {batch_idx+1} complete in {batch_time:.1f}s "
|
||||
f"({len(results)/batch_time:.1f} trials/sec)")
|
||||
|
||||
# Progress
|
||||
progress = total_completed / len(valid_configs)
|
||||
eta_seconds = (time.time() - start_time) / progress * (1 - progress) if progress > 0 else 0
|
||||
print(f"Overall: {total_completed}/{len(valid_configs)} ({progress*100:.1f}%) "
|
||||
f"ETA: {eta_seconds/60:.1f} min")
|
||||
|
||||
return self._get_run_stats(start_time)
|
||||
|
||||
def _split_into_batches(
|
||||
self,
|
||||
configs: List[MCTrialConfig]
|
||||
) -> List[List[MCTrialConfig]]:
|
||||
"""Split configurations into batches."""
|
||||
batches = []
|
||||
for i in range(0, len(configs), self.batch_size):
|
||||
batches.append(configs[i:i+self.batch_size])
|
||||
return batches
|
||||
|
||||
def _execute_sequential(
|
||||
self,
|
||||
configs: List[MCTrialConfig]
|
||||
) -> List[MCTrialResult]:
|
||||
"""Execute trials sequentially."""
|
||||
executor = MCExecutor(verbose=self.verbose)
|
||||
return executor.execute_batch(configs, progress_interval=max(1, len(configs)//10))
|
||||
|
||||
def _execute_parallel(
|
||||
self,
|
||||
configs: List[MCTrialConfig]
|
||||
) -> List[MCTrialResult]:
|
||||
"""Execute trials in parallel using multiprocessing."""
|
||||
# Create worker function
|
||||
worker = partial(_execute_trial_worker, initial_capital=25000.0)
|
||||
|
||||
# Run in pool
|
||||
with mp.Pool(processes=self.n_workers) as pool:
|
||||
results = pool.map(worker, configs)
|
||||
|
||||
return results
|
||||
|
||||
def _load_completed_trials(self):
|
||||
"""Load IDs of already completed trials from index."""
|
||||
entries = self.store.query_index(status='completed', limit=1000000)
|
||||
self.completed_trials = {e['trial_id'] for e in entries}
|
||||
|
||||
def _get_run_stats(self, start_time: float) -> Dict[str, Any]:
|
||||
"""Get final run statistics."""
|
||||
total_time = time.time() - start_time
|
||||
corpus_stats = self.store.get_corpus_stats()
|
||||
|
||||
stats = {
|
||||
'total_time_sec': total_time,
|
||||
'total_time_min': total_time / 60,
|
||||
'total_time_hours': total_time / 3600,
|
||||
**corpus_stats,
|
||||
}
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("ENVELOPE MAPPING COMPLETE")
|
||||
print("="*70)
|
||||
print(f"\nTotal time: {total_time/3600:.2f} hours")
|
||||
print(f"Total trials: {stats['total_trials']}")
|
||||
print(f"Champion region: {stats['champion_count']}")
|
||||
print(f"Catastrophic: {stats['catastrophic_count']}")
|
||||
print(f"Avg ROI: {stats['avg_roi_pct']:.2f}%")
|
||||
print(f"Avg Sharpe: {stats['avg_sharpe']:.2f}")
|
||||
|
||||
return stats
|
||||
|
||||
def generate_report(self, output_path: Optional[str] = None):
|
||||
"""Generate a summary report."""
|
||||
stats = self.store.get_corpus_stats()
|
||||
|
||||
report = f"""
|
||||
# Monte Carlo Envelope Mapping Report
|
||||
|
||||
Generated: {datetime.now().isoformat()}
|
||||
|
||||
## Corpus Statistics
|
||||
|
||||
- Total trials: {stats['total_trials']}
|
||||
- Champion region: {stats['champion_count']} ({stats['champion_count']/max(1,stats['total_trials'])*100:.1f}%)
|
||||
- Catastrophic: {stats['catastrophic_count']} ({stats['catastrophic_count']/max(1,stats['total_trials'])*100:.1f}%)
|
||||
|
||||
## Performance Metrics
|
||||
|
||||
- Average ROI: {stats['avg_roi_pct']:.2f}%
|
||||
- Min ROI: {stats['min_roi_pct']:.2f}%
|
||||
- Max ROI: {stats['max_roi_pct']:.2f}%
|
||||
- Average Sharpe: {stats['avg_sharpe']:.2f}
|
||||
- Average Max DD: {stats['avg_max_dd_pct']:.2f}%
|
||||
|
||||
## Validation Summary
|
||||
|
||||
"""
|
||||
if 'validation' in self.stats:
|
||||
vstats = self.stats['validation']
|
||||
report += f"""
|
||||
- Total configs: {vstats['total']}
|
||||
- Valid configs: {vstats['valid']} ({vstats['validity_rate']*100:.1f}%)
|
||||
- Rejected V1 (range): {vstats.get('rejected_v1', 0)}
|
||||
- Rejected V2 (constraints): {vstats.get('rejected_v2', 0)}
|
||||
- Rejected V3 (cross-group): {vstats.get('rejected_v3', 0)}
|
||||
- Rejected V4 (degenerate): {vstats.get('rejected_v4', 0)}
|
||||
"""
|
||||
|
||||
if output_path:
|
||||
with open(output_path, 'w') as f:
|
||||
f.write(report)
|
||||
print(f"\n[OK] Report saved: {output_path}")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def _execute_trial_worker(
|
||||
config: MCTrialConfig,
|
||||
initial_capital: float = 25000.0
|
||||
) -> MCTrialResult:
|
||||
"""
|
||||
Worker function for parallel execution.
|
||||
|
||||
Must be at module level for pickle serialization.
|
||||
"""
|
||||
executor = MCExecutor(initial_capital=initial_capital, verbose=False)
|
||||
return executor.execute_trial(config, skip_validation=True)
|
||||
|
||||
|
||||
def run_mc_envelope(
|
||||
n_samples_per_switch: int = 100, # Reduced default for testing
|
||||
max_trials: Optional[int] = None,
|
||||
n_workers: int = -1,
|
||||
output_dir: str = "mc_results",
|
||||
resume: bool = True,
|
||||
base_seed: int = 42
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Convenience function to run full MC envelope mapping.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples_per_switch : int
|
||||
Samples per switch vector
|
||||
max_trials : int, optional
|
||||
Maximum total trials
|
||||
n_workers : int
|
||||
Number of parallel workers (-1 for auto)
|
||||
output_dir : str
|
||||
Output directory
|
||||
resume : bool
|
||||
Resume from existing results
|
||||
base_seed : int
|
||||
Master RNG seed
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, Any]
|
||||
Run statistics
|
||||
"""
|
||||
runner = MCRunner(
|
||||
output_dir=output_dir,
|
||||
n_workers=n_workers,
|
||||
base_seed=base_seed
|
||||
)
|
||||
|
||||
stats = runner.run_envelope_mapping(
|
||||
n_samples_per_switch=n_samples_per_switch,
|
||||
max_trials=max_trials,
|
||||
resume=resume
|
||||
)
|
||||
|
||||
# Generate report
|
||||
runner.generate_report(output_path=f"{output_dir}/envelope_report.md")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test run
|
||||
stats = run_mc_envelope(
|
||||
n_samples_per_switch=10,
|
||||
max_trials=100,
|
||||
n_workers=1,
|
||||
output_dir="mc_results_test"
|
||||
)
|
||||
print("\nTest complete!")
|
||||
534
nautilus_dolphin/mc/mc_sampler.py
Executable file
534
nautilus_dolphin/mc/mc_sampler.py
Executable file
@@ -0,0 +1,534 @@
|
||||
"""
|
||||
Monte Carlo Parameter Sampler
|
||||
=============================
|
||||
|
||||
Parameter space definition and Latin Hypercube Sampling (LHS) implementation.
|
||||
|
||||
This module defines the complete 33-parameter space across 7 sub-systems
|
||||
and implements the two-phase sampling strategy:
|
||||
1. Phase A: Switch grid (boolean combinations)
|
||||
2. Phase B: LHS continuous sampling per switch-vector
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 2, 3
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Tuple, NamedTuple, Any, Union
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Try to import scipy for LHS
|
||||
try:
|
||||
from scipy.stats import qmc
|
||||
SCIPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
SCIPY_AVAILABLE = False
|
||||
|
||||
|
||||
class ParamType(Enum):
|
||||
"""Parameter sampling types."""
|
||||
CONTINUOUS = "continuous"
|
||||
DISCRETE = "discrete"
|
||||
CATEGORICAL = "categorical"
|
||||
BOOLEAN = "boolean"
|
||||
DERIVED = "derived"
|
||||
FIXED = "fixed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParameterDef:
|
||||
"""Definition of a single parameter."""
|
||||
id: str
|
||||
name: str
|
||||
champion: Any
|
||||
param_type: ParamType
|
||||
lo: Optional[float] = None
|
||||
hi: Optional[float] = None
|
||||
log_transform: bool = False
|
||||
constraint_group: Optional[str] = None
|
||||
depends_on: Optional[str] = None # For conditional parameters
|
||||
categories: Optional[List[str]] = None # For CATEGORICAL
|
||||
|
||||
def __post_init__(self):
|
||||
if self.param_type == ParamType.CATEGORICAL and self.categories is None:
|
||||
raise ValueError(f"Categorical parameter {self.name} must have categories")
|
||||
|
||||
|
||||
class MCTrialConfig(NamedTuple):
|
||||
"""Complete parameter vector for a Monte Carlo trial."""
|
||||
trial_id: int
|
||||
# P1 Signal
|
||||
vel_div_threshold: float
|
||||
vel_div_extreme: float
|
||||
use_direction_confirm: bool
|
||||
dc_lookback_bars: int
|
||||
dc_min_magnitude_bps: float
|
||||
dc_skip_contradicts: bool
|
||||
dc_leverage_boost: float
|
||||
dc_leverage_reduce: float
|
||||
vd_trend_lookback: int
|
||||
# P2 Leverage
|
||||
min_leverage: float
|
||||
max_leverage: float
|
||||
leverage_convexity: float
|
||||
fraction: float
|
||||
use_alpha_layers: bool
|
||||
use_dynamic_leverage: bool
|
||||
# P3 Exit
|
||||
fixed_tp_pct: float
|
||||
stop_pct: float
|
||||
max_hold_bars: int
|
||||
# P4 Fees
|
||||
use_sp_fees: bool
|
||||
use_sp_slippage: bool
|
||||
sp_maker_entry_rate: float
|
||||
sp_maker_exit_rate: float
|
||||
# P5 OB
|
||||
use_ob_edge: bool
|
||||
ob_edge_bps: float
|
||||
ob_confirm_rate: float
|
||||
ob_imbalance_bias: float
|
||||
ob_depth_scale: float
|
||||
# P6 Asset Selection
|
||||
use_asset_selection: bool
|
||||
min_irp_alignment: float
|
||||
lookback: int
|
||||
# P7 ACB
|
||||
acb_beta_high: float
|
||||
acb_beta_low: float
|
||||
acb_w750_threshold_pct: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'trial_id': self.trial_id,
|
||||
'vel_div_threshold': self.vel_div_threshold,
|
||||
'vel_div_extreme': self.vel_div_extreme,
|
||||
'use_direction_confirm': self.use_direction_confirm,
|
||||
'dc_lookback_bars': self.dc_lookback_bars,
|
||||
'dc_min_magnitude_bps': self.dc_min_magnitude_bps,
|
||||
'dc_skip_contradicts': self.dc_skip_contradicts,
|
||||
'dc_leverage_boost': self.dc_leverage_boost,
|
||||
'dc_leverage_reduce': self.dc_leverage_reduce,
|
||||
'vd_trend_lookback': self.vd_trend_lookback,
|
||||
'min_leverage': self.min_leverage,
|
||||
'max_leverage': self.max_leverage,
|
||||
'leverage_convexity': self.leverage_convexity,
|
||||
'fraction': self.fraction,
|
||||
'use_alpha_layers': self.use_alpha_layers,
|
||||
'use_dynamic_leverage': self.use_dynamic_leverage,
|
||||
'fixed_tp_pct': self.fixed_tp_pct,
|
||||
'stop_pct': self.stop_pct,
|
||||
'max_hold_bars': self.max_hold_bars,
|
||||
'use_sp_fees': self.use_sp_fees,
|
||||
'use_sp_slippage': self.use_sp_slippage,
|
||||
'sp_maker_entry_rate': self.sp_maker_entry_rate,
|
||||
'sp_maker_exit_rate': self.sp_maker_exit_rate,
|
||||
'use_ob_edge': self.use_ob_edge,
|
||||
'ob_edge_bps': self.ob_edge_bps,
|
||||
'ob_confirm_rate': self.ob_confirm_rate,
|
||||
'ob_imbalance_bias': self.ob_imbalance_bias,
|
||||
'ob_depth_scale': self.ob_depth_scale,
|
||||
'use_asset_selection': self.use_asset_selection,
|
||||
'min_irp_alignment': self.min_irp_alignment,
|
||||
'lookback': self.lookback,
|
||||
'acb_beta_high': self.acb_beta_high,
|
||||
'acb_beta_low': self.acb_beta_low,
|
||||
'acb_w750_threshold_pct': self.acb_w750_threshold_pct,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> 'MCTrialConfig':
|
||||
"""Create from dictionary."""
|
||||
# Filter to only valid fields
|
||||
valid_fields = cls._fields
|
||||
filtered = {k: v for k, v in d.items() if k in valid_fields}
|
||||
return cls(**filtered)
|
||||
|
||||
|
||||
class MCSampler:
|
||||
"""
|
||||
Monte Carlo Parameter Sampler.
|
||||
|
||||
Implements two-phase sampling:
|
||||
1. Phase A: Enumerate all boolean switch combinations
|
||||
2. Phase B: LHS continuous sampling per switch-vector
|
||||
"""
|
||||
|
||||
# Champion configuration (baseline)
|
||||
CHAMPION = {
|
||||
'vel_div_threshold': -0.020,
|
||||
'vel_div_extreme': -0.050,
|
||||
'use_direction_confirm': True,
|
||||
'dc_lookback_bars': 7,
|
||||
'dc_min_magnitude_bps': 0.75,
|
||||
'dc_skip_contradicts': True,
|
||||
'dc_leverage_boost': 1.00,
|
||||
'dc_leverage_reduce': 0.50,
|
||||
'vd_trend_lookback': 10,
|
||||
'min_leverage': 0.50,
|
||||
'max_leverage': 5.00,
|
||||
'leverage_convexity': 3.00,
|
||||
'fraction': 0.20,
|
||||
'use_alpha_layers': True,
|
||||
'use_dynamic_leverage': True,
|
||||
'fixed_tp_pct': 0.0099,
|
||||
'stop_pct': 1.00,
|
||||
'max_hold_bars': 120,
|
||||
'use_sp_fees': True,
|
||||
'use_sp_slippage': True,
|
||||
'sp_maker_entry_rate': 0.62,
|
||||
'sp_maker_exit_rate': 0.50,
|
||||
'use_ob_edge': True,
|
||||
'ob_edge_bps': 5.00,
|
||||
'ob_confirm_rate': 0.40,
|
||||
'ob_imbalance_bias': -0.09,
|
||||
'ob_depth_scale': 1.00,
|
||||
'use_asset_selection': True,
|
||||
'min_irp_alignment': 0.45,
|
||||
'lookback': 100,
|
||||
'acb_beta_high': 0.80,
|
||||
'acb_beta_low': 0.20,
|
||||
'acb_w750_threshold_pct': 60,
|
||||
}
|
||||
|
||||
# Parameter definitions
|
||||
PARAMS = {
|
||||
# P1 Signal Generator
|
||||
'vel_div_threshold': ParameterDef('P1.01', 'vel_div_threshold', -0.020, ParamType.CONTINUOUS, -0.040, -0.008, False, 'CG-VD'),
|
||||
'vel_div_extreme': ParameterDef('P1.02', 'vel_div_extreme', -0.050, ParamType.CONTINUOUS, -0.120, None, False, 'CG-VD'), # hi depends on threshold
|
||||
'use_direction_confirm': ParameterDef('P1.03', 'use_direction_confirm', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
|
||||
'dc_lookback_bars': ParameterDef('P1.04', 'dc_lookback_bars', 7, ParamType.DISCRETE, 3, 25, False, 'CG-DC'),
|
||||
'dc_min_magnitude_bps': ParameterDef('P1.05', 'dc_min_magnitude_bps', 0.75, ParamType.CONTINUOUS, 0.20, 3.00, False, 'CG-DC'),
|
||||
'dc_skip_contradicts': ParameterDef('P1.06', 'dc_skip_contradicts', True, ParamType.BOOLEAN, constraint_group='CG-DC'),
|
||||
'dc_leverage_boost': ParameterDef('P1.07', 'dc_leverage_boost', 1.00, ParamType.CONTINUOUS, 1.00, 1.50, False, 'CG-DC-LEV'),
|
||||
'dc_leverage_reduce': ParameterDef('P1.08', 'dc_leverage_reduce', 0.50, ParamType.CONTINUOUS, 0.25, 0.90, False, 'CG-DC-LEV'),
|
||||
'vd_trend_lookback': ParameterDef('P1.09', 'vd_trend_lookback', 10, ParamType.DISCRETE, 5, 30, False),
|
||||
|
||||
# P2 Leverage
|
||||
'min_leverage': ParameterDef('P2.01', 'min_leverage', 0.50, ParamType.CONTINUOUS, 0.10, 1.50, False, 'CG-LEV'),
|
||||
'max_leverage': ParameterDef('P2.02', 'max_leverage', 5.00, ParamType.CONTINUOUS, 1.50, 12.00, False, 'CG-LEV'),
|
||||
'leverage_convexity': ParameterDef('P2.03', 'leverage_convexity', 3.00, ParamType.CONTINUOUS, 0.75, 6.00, False),
|
||||
'fraction': ParameterDef('P2.04', 'fraction', 0.20, ParamType.CONTINUOUS, 0.05, 0.40, False, 'CG-RISK'),
|
||||
'use_alpha_layers': ParameterDef('P2.05', 'use_alpha_layers', True, ParamType.BOOLEAN),
|
||||
'use_dynamic_leverage': ParameterDef('P2.06', 'use_dynamic_leverage', True, ParamType.BOOLEAN, constraint_group='CG-DYNLEV'),
|
||||
|
||||
# P3 Exit
|
||||
'fixed_tp_pct': ParameterDef('P3.01', 'fixed_tp_pct', 0.0099, ParamType.CONTINUOUS, 0.0030, 0.0300, True, 'CG-EXIT'),
|
||||
'stop_pct': ParameterDef('P3.02', 'stop_pct', 1.00, ParamType.CONTINUOUS, 0.20, 5.00, True, 'CG-EXIT'),
|
||||
'max_hold_bars': ParameterDef('P3.03', 'max_hold_bars', 120, ParamType.DISCRETE, 20, 600, False, 'CG-EXIT'),
|
||||
|
||||
# P4 Fees
|
||||
'use_sp_fees': ParameterDef('P4.01', 'use_sp_fees', True, ParamType.BOOLEAN),
|
||||
'use_sp_slippage': ParameterDef('P4.02', 'use_sp_slippage', True, ParamType.BOOLEAN, constraint_group='CG-SP'),
|
||||
'sp_maker_entry_rate': ParameterDef('P4.03', 'sp_maker_entry_rate', 0.62, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
|
||||
'sp_maker_exit_rate': ParameterDef('P4.04', 'sp_maker_exit_rate', 0.50, ParamType.CONTINUOUS, 0.20, 0.85, False, 'CG-SP'),
|
||||
|
||||
# P5 OB Intelligence
|
||||
'use_ob_edge': ParameterDef('P5.01', 'use_ob_edge', True, ParamType.BOOLEAN, constraint_group='CG-OB'),
|
||||
'ob_edge_bps': ParameterDef('P5.02', 'ob_edge_bps', 5.00, ParamType.CONTINUOUS, 1.00, 20.00, True, 'CG-OB'),
|
||||
'ob_confirm_rate': ParameterDef('P5.03', 'ob_confirm_rate', 0.40, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-OB'),
|
||||
'ob_imbalance_bias': ParameterDef('P5.04', 'ob_imbalance_bias', -0.09, ParamType.CONTINUOUS, -0.25, 0.15, False, 'CG-OB-SIG'),
|
||||
'ob_depth_scale': ParameterDef('P5.05', 'ob_depth_scale', 1.00, ParamType.CONTINUOUS, 0.30, 2.00, True, 'CG-OB-SIG'),
|
||||
|
||||
# P6 Asset Selection
|
||||
'use_asset_selection': ParameterDef('P6.01', 'use_asset_selection', True, ParamType.BOOLEAN, constraint_group='CG-IRP'),
|
||||
'min_irp_alignment': ParameterDef('P6.02', 'min_irp_alignment', 0.45, ParamType.CONTINUOUS, 0.10, 0.80, False, 'CG-IRP'),
|
||||
'lookback': ParameterDef('P6.03', 'lookback', 100, ParamType.DISCRETE, 30, 300, False, 'CG-IRP'),
|
||||
|
||||
# P7 ACB
|
||||
'acb_beta_high': ParameterDef('P7.01', 'acb_beta_high', 0.80, ParamType.CONTINUOUS, 0.40, 1.50, False, 'CG-ACB'),
|
||||
'acb_beta_low': ParameterDef('P7.02', 'acb_beta_low', 0.20, ParamType.CONTINUOUS, 0.00, 0.60, False, 'CG-ACB'),
|
||||
'acb_w750_threshold_pct': ParameterDef('P7.03', 'acb_w750_threshold_pct', 60, ParamType.DISCRETE, 20, 80, False),
|
||||
}
|
||||
|
||||
# Boolean parameters for switch grid
|
||||
BOOLEAN_PARAMS = [
|
||||
'use_direction_confirm',
|
||||
'dc_skip_contradicts',
|
||||
'use_alpha_layers',
|
||||
'use_dynamic_leverage',
|
||||
'use_sp_fees',
|
||||
'use_sp_slippage',
|
||||
'use_ob_edge',
|
||||
'use_asset_selection',
|
||||
]
|
||||
|
||||
# Parameters that become FIXED when their parent switch is False
|
||||
CONDITIONAL_PARAMS = {
|
||||
'use_direction_confirm': ['dc_lookback_bars', 'dc_min_magnitude_bps', 'dc_skip_contradicts', 'dc_leverage_boost', 'dc_leverage_reduce'],
|
||||
'use_sp_slippage': ['sp_maker_entry_rate', 'sp_maker_exit_rate'],
|
||||
'use_ob_edge': ['ob_edge_bps', 'ob_confirm_rate'],
|
||||
'use_asset_selection': ['min_irp_alignment', 'lookback'],
|
||||
}
|
||||
|
||||
def __init__(self, base_seed: int = 42):
|
||||
"""
|
||||
Initialize the sampler.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
base_seed : int
|
||||
Master RNG seed for reproducibility
|
||||
"""
|
||||
self.base_seed = base_seed
|
||||
self.rng = np.random.RandomState(base_seed)
|
||||
|
||||
def generate_switch_vectors(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Phase A: Generate all unique boolean switch combinations.
|
||||
|
||||
After canonicalisation (collapsing equivalent configs),
|
||||
returns approximately 64-96 unique switch vectors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[Dict[str, Any]]
|
||||
List of switch vectors (boolean parameter assignments)
|
||||
"""
|
||||
n_bool = len(self.BOOLEAN_PARAMS)
|
||||
n_combinations = 2 ** n_bool
|
||||
|
||||
switch_vectors = []
|
||||
seen_canonical = set()
|
||||
|
||||
for i in range(n_combinations):
|
||||
# Decode integer to boolean switches
|
||||
switches = {}
|
||||
for j, param_name in enumerate(self.BOOLEAN_PARAMS):
|
||||
switches[param_name] = bool((i >> j) & 1)
|
||||
|
||||
# Create canonical form (conditional params fixed to champion when parent is False)
|
||||
canonical = self._canonicalize_switch_vector(switches)
|
||||
canonical_key = tuple(sorted((k, v) for k, v in canonical.items() if isinstance(v, bool)))
|
||||
|
||||
if canonical_key not in seen_canonical:
|
||||
seen_canonical.add(canonical_key)
|
||||
switch_vectors.append(canonical)
|
||||
|
||||
return switch_vectors
|
||||
|
||||
def _canonicalize_switch_vector(self, switches: Dict[str, bool]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert a raw switch vector to canonical form.
|
||||
|
||||
When a parent switch is False, its conditional parameters
|
||||
are set to FIXED champion values.
|
||||
"""
|
||||
canonical = dict(switches)
|
||||
|
||||
for parent, children in self.CONDITIONAL_PARAMS.items():
|
||||
if not switches.get(parent, False):
|
||||
# Parent is disabled - fix children to champion
|
||||
for child in children:
|
||||
canonical[child] = self.CHAMPION[child]
|
||||
|
||||
return canonical
|
||||
|
||||
def get_free_continuous_params(self, switch_vector: Dict[str, Any]) -> List[str]:
|
||||
"""
|
||||
Get list of continuous/discrete parameters that are NOT fixed
|
||||
by the switch vector.
|
||||
"""
|
||||
free_params = []
|
||||
|
||||
for name, pdef in self.PARAMS.items():
|
||||
if pdef.param_type in (ParamType.CONTINUOUS, ParamType.DISCRETE):
|
||||
# Check if this param is fixed by any switch
|
||||
is_fixed = False
|
||||
for parent, children in self.CONDITIONAL_PARAMS.items():
|
||||
if name in children and not switch_vector.get(parent, True):
|
||||
is_fixed = True
|
||||
break
|
||||
|
||||
if not is_fixed:
|
||||
free_params.append(name)
|
||||
|
||||
return free_params
|
||||
|
||||
def sample_continuous_params(
|
||||
self,
|
||||
switch_vector: Dict[str, Any],
|
||||
n_samples: int,
|
||||
seed: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Phase B: Generate n LHS samples for continuous/discrete parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
switch_vector : dict
|
||||
Fixed boolean parameters
|
||||
n_samples : int
|
||||
Number of samples to generate
|
||||
seed : int
|
||||
RNG seed for this batch
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[Dict[str, Any]]
|
||||
List of complete parameter dicts (switch + continuous)
|
||||
"""
|
||||
free_params = self.get_free_continuous_params(switch_vector)
|
||||
n_free = len(free_params)
|
||||
|
||||
if n_free == 0:
|
||||
# No free parameters - just return the switch vector
|
||||
return [dict(switch_vector)]
|
||||
|
||||
# Generate LHS samples in unit hypercube
|
||||
if SCIPY_AVAILABLE:
|
||||
sampler = qmc.LatinHypercube(d=n_free, seed=seed)
|
||||
unit_samples = sampler.random(n=n_samples)
|
||||
else:
|
||||
# Fallback: random sampling with warning
|
||||
print(f"[WARN] scipy not available, using random sampling instead of LHS")
|
||||
rng = np.random.RandomState(seed)
|
||||
unit_samples = rng.rand(n_samples, n_free)
|
||||
|
||||
# Scale to parameter ranges
|
||||
samples = []
|
||||
for i in range(n_samples):
|
||||
sample = dict(switch_vector)
|
||||
|
||||
for j, param_name in enumerate(free_params):
|
||||
pdef = self.PARAMS[param_name]
|
||||
u = unit_samples[i, j]
|
||||
|
||||
# Handle dependent bounds
|
||||
lo = pdef.lo
|
||||
hi = pdef.hi
|
||||
if hi is None:
|
||||
# Compute dependent bound
|
||||
if param_name == 'vel_div_extreme':
|
||||
hi = sample['vel_div_threshold'] * 1.5
|
||||
|
||||
if pdef.param_type == ParamType.CONTINUOUS:
|
||||
if pdef.log_transform:
|
||||
# Log-space sampling: value = lo * (hi/lo) ** u
|
||||
value = lo * (hi / lo) ** u
|
||||
else:
|
||||
# Linear sampling
|
||||
value = lo + u * (hi - lo)
|
||||
elif pdef.param_type == ParamType.DISCRETE:
|
||||
# Discrete sampling
|
||||
value = int(round(lo + u * (hi - lo)))
|
||||
value = max(int(lo), min(int(hi), value))
|
||||
else:
|
||||
value = pdef.champion
|
||||
|
||||
sample[param_name] = value
|
||||
|
||||
samples.append(sample)
|
||||
|
||||
return samples
|
||||
|
||||
def generate_trials(
|
||||
self,
|
||||
n_samples_per_switch: int = 500,
|
||||
max_trials: Optional[int] = None
|
||||
) -> List[MCTrialConfig]:
|
||||
"""
|
||||
Generate all MC trial configurations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples_per_switch : int
|
||||
Samples per unique switch vector
|
||||
max_trials : int, optional
|
||||
Maximum total trials (for testing)
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[MCTrialConfig]
|
||||
All trial configurations
|
||||
"""
|
||||
switch_vectors = self.generate_switch_vectors()
|
||||
print(f"[INFO] Generated {len(switch_vectors)} unique switch vectors")
|
||||
|
||||
trials = []
|
||||
trial_id = 0
|
||||
|
||||
for switch_idx, switch_vector in enumerate(switch_vectors):
|
||||
# Generate seed for this switch vector
|
||||
switch_seed = (self.base_seed * 1000003 + switch_idx) % 2**31
|
||||
|
||||
# Generate continuous samples
|
||||
samples = self.sample_continuous_params(
|
||||
switch_vector, n_samples_per_switch, switch_seed
|
||||
)
|
||||
|
||||
for sample in samples:
|
||||
if max_trials and trial_id >= max_trials:
|
||||
break
|
||||
|
||||
# Fill in any missing parameters with champion values
|
||||
full_params = dict(self.CHAMPION)
|
||||
full_params.update(sample)
|
||||
full_params['trial_id'] = trial_id
|
||||
|
||||
# Create trial config
|
||||
try:
|
||||
config = MCTrialConfig(**full_params)
|
||||
trials.append(config)
|
||||
trial_id += 1
|
||||
except Exception as e:
|
||||
print(f"[WARN] Failed to create trial {trial_id}: {e}")
|
||||
|
||||
if max_trials and trial_id >= max_trials:
|
||||
break
|
||||
|
||||
print(f"[INFO] Generated {len(trials)} total trial configurations")
|
||||
return trials
|
||||
|
||||
def generate_champion_trial(self) -> MCTrialConfig:
|
||||
"""Generate the champion configuration as a single trial."""
|
||||
params = dict(self.CHAMPION)
|
||||
params['trial_id'] = -1 # Special ID for champion
|
||||
return MCTrialConfig(**params)
|
||||
|
||||
def save_trials(self, trials: List[MCTrialConfig], path: Union[str, Path]):
|
||||
"""Save trials to JSON."""
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = [t.to_dict() for t in trials]
|
||||
with open(path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"[OK] Saved {len(trials)} trials to {path}")
|
||||
|
||||
def load_trials(self, path: Union[str, Path]) -> List[MCTrialConfig]:
|
||||
"""Load trials from JSON."""
|
||||
with open(path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
trials = [MCTrialConfig.from_dict(d) for d in data]
|
||||
print(f"[OK] Loaded {len(trials)} trials from {path}")
|
||||
return trials
|
||||
|
||||
|
||||
def test_sampler():
|
||||
"""Quick test of the sampler."""
|
||||
sampler = MCSampler(base_seed=42)
|
||||
|
||||
# Test switch vector generation
|
||||
switches = sampler.generate_switch_vectors()
|
||||
print(f"Unique switch vectors: {len(switches)}")
|
||||
|
||||
# Test trial generation (small)
|
||||
trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100)
|
||||
print(f"Generated trials: {len(trials)}")
|
||||
|
||||
# Check parameter ranges
|
||||
for trial in trials[:5]:
|
||||
print(f"Trial {trial.trial_id}: vel_div_threshold={trial.vel_div_threshold:.4f}, "
|
||||
f"max_leverage={trial.max_leverage:.2f}, use_direction_confirm={trial.use_direction_confirm}")
|
||||
|
||||
return trials
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sampler()
|
||||
327
nautilus_dolphin/mc/mc_store.py
Executable file
327
nautilus_dolphin/mc/mc_store.py
Executable file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Monte Carlo Result Store
|
||||
========================
|
||||
|
||||
Persistence layer for MC trial results.
|
||||
|
||||
Supports:
|
||||
- Parquet files for bulk data storage
|
||||
- SQLite index for fast querying
|
||||
- Incremental/resumable runs
|
||||
- Batch organization
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 8
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
# Try to import pandas/pyarrow
|
||||
try:
|
||||
import pandas as pd
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PANDAS_AVAILABLE = False
|
||||
print("[WARN] pandas not available - Parquet storage disabled")
|
||||
|
||||
from .mc_metrics import MCTrialResult
|
||||
from .mc_validator import ValidationResult
|
||||
|
||||
|
||||
class MCStore:
|
||||
"""
|
||||
Monte Carlo Result Store.
|
||||
|
||||
Manages persistence of trial configurations, results, and indices.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: Union[str, Path] = "mc_results",
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""
|
||||
Initialize the store.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_dir : str or Path
|
||||
Directory for all MC results
|
||||
batch_size : int
|
||||
Number of trials per batch file
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.batch_size = batch_size
|
||||
|
||||
# Create directory structure
|
||||
self.manifests_dir = self.output_dir / "manifests"
|
||||
self.results_dir = self.output_dir / "results"
|
||||
self.models_dir = self.output_dir / "models"
|
||||
|
||||
self.manifests_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.results_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.models_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# SQLite index
|
||||
self.index_path = self.output_dir / "mc_index.sqlite"
|
||||
self._init_index()
|
||||
|
||||
self.current_batch = self._get_latest_batch() + 1
|
||||
|
||||
def _init_index(self):
|
||||
"""Initialize SQLite index."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS mc_index (
|
||||
trial_id INTEGER PRIMARY KEY,
|
||||
batch_id INTEGER,
|
||||
status TEXT,
|
||||
roi_pct REAL,
|
||||
profit_factor REAL,
|
||||
win_rate REAL,
|
||||
max_dd_pct REAL,
|
||||
sharpe REAL,
|
||||
n_trades INTEGER,
|
||||
champion_region INTEGER,
|
||||
catastrophic INTEGER,
|
||||
created_at INTEGER
|
||||
)
|
||||
''')
|
||||
|
||||
# Create indices
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_roi ON mc_index (roi_pct)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_champion ON mc_index (champion_region)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_catastrophic ON mc_index (catastrophic)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_batch ON mc_index (batch_id)')
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def _get_latest_batch(self) -> int:
|
||||
"""Get the highest batch ID in the index."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT MAX(batch_id) FROM mc_index')
|
||||
result = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
return result[0] if result and result[0] else 0
|
||||
|
||||
def save_validation_results(self, results: List[ValidationResult], batch_id: int):
|
||||
"""Save validation results to manifest."""
|
||||
manifest_path = self.manifests_dir / f"batch_{batch_id:04d}_validation.json"
|
||||
|
||||
data = [r.to_dict() for r in results]
|
||||
with open(manifest_path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"[OK] Saved validation manifest: {manifest_path}")
|
||||
|
||||
def save_trial_results(
|
||||
self,
|
||||
results: List[MCTrialResult],
|
||||
batch_id: Optional[int] = None
|
||||
):
|
||||
"""
|
||||
Save trial results to Parquet and update index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
results : List[MCTrialResult]
|
||||
Trial results to save
|
||||
batch_id : int, optional
|
||||
Batch ID (auto-incremented if not provided)
|
||||
"""
|
||||
if batch_id is None:
|
||||
batch_id = self.current_batch
|
||||
self.current_batch += 1
|
||||
|
||||
if not results:
|
||||
return
|
||||
|
||||
# Save to Parquet
|
||||
if PANDAS_AVAILABLE:
|
||||
self._save_parquet(results, batch_id)
|
||||
|
||||
# Update SQLite index
|
||||
self._update_index(results, batch_id)
|
||||
|
||||
print(f"[OK] Saved batch {batch_id}: {len(results)} trials")
|
||||
|
||||
def _save_parquet(self, results: List[MCTrialResult], batch_id: int):
|
||||
"""Save results to Parquet file."""
|
||||
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
|
||||
|
||||
# Convert to DataFrame
|
||||
data = [r.to_dict() for r in results]
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Save
|
||||
df.to_parquet(parquet_path, index=False, compression='zstd')
|
||||
|
||||
def _update_index(self, results: List[MCTrialResult], batch_id: int):
|
||||
"""Update SQLite index with result summaries."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
timestamp = int(datetime.now().timestamp())
|
||||
|
||||
for r in results:
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO mc_index
|
||||
(trial_id, batch_id, status, roi_pct, profit_factor, win_rate,
|
||||
max_dd_pct, sharpe, n_trades, champion_region, catastrophic, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
r.trial_id,
|
||||
batch_id,
|
||||
r.status,
|
||||
r.roi_pct,
|
||||
r.profit_factor,
|
||||
r.win_rate,
|
||||
r.max_drawdown_pct,
|
||||
r.sharpe_ratio,
|
||||
r.n_trades,
|
||||
int(r.champion_region),
|
||||
int(r.catastrophic),
|
||||
timestamp
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def query_index(
|
||||
self,
|
||||
status: Optional[str] = None,
|
||||
min_roi: Optional[float] = None,
|
||||
champion_only: bool = False,
|
||||
catastrophic_only: bool = False,
|
||||
limit: int = 1000
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query the SQLite index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
status : str, optional
|
||||
Filter by status
|
||||
min_roi : float, optional
|
||||
Minimum ROI percentage
|
||||
champion_only : bool
|
||||
Only champion region configs
|
||||
catastrophic_only : bool
|
||||
Only catastrophic configs
|
||||
limit : int
|
||||
Maximum results
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[Dict]
|
||||
Matching index entries
|
||||
"""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = 'SELECT * FROM mc_index WHERE 1=1'
|
||||
params = []
|
||||
|
||||
if status:
|
||||
query += ' AND status = ?'
|
||||
params.append(status)
|
||||
|
||||
if min_roi is not None:
|
||||
query += ' AND roi_pct >= ?'
|
||||
params.append(min_roi)
|
||||
|
||||
if champion_only:
|
||||
query += ' AND champion_region = 1'
|
||||
|
||||
if catastrophic_only:
|
||||
query += ' AND catastrophic = 1'
|
||||
|
||||
query += ' ORDER BY roi_pct DESC LIMIT ?'
|
||||
params.append(limit)
|
||||
|
||||
cursor.execute(query, params)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
def get_corpus_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about the stored corpus."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Total trials
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index')
|
||||
total = cursor.fetchone()[0]
|
||||
|
||||
# By status
|
||||
cursor.execute('SELECT status, COUNT(*) FROM mc_index GROUP BY status')
|
||||
by_status = {row[0]: row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Champion region
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE champion_region = 1')
|
||||
champion_count = cursor.fetchone()[0]
|
||||
|
||||
# Catastrophic
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE catastrophic = 1')
|
||||
catastrophic_count = cursor.fetchone()[0]
|
||||
|
||||
# ROI stats
|
||||
cursor.execute('''
|
||||
SELECT AVG(roi_pct), MIN(roi_pct), MAX(roi_pct),
|
||||
AVG(sharpe), AVG(max_dd_pct)
|
||||
FROM mc_index WHERE status = 'completed'
|
||||
''')
|
||||
roi_stats = cursor.fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'total_trials': total,
|
||||
'by_status': by_status,
|
||||
'champion_count': champion_count,
|
||||
'catastrophic_count': catastrophic_count,
|
||||
'avg_roi_pct': roi_stats[0] if roi_stats else 0,
|
||||
'min_roi_pct': roi_stats[1] if roi_stats else 0,
|
||||
'max_roi_pct': roi_stats[2] if roi_stats else 0,
|
||||
'avg_sharpe': roi_stats[3] if roi_stats else 0,
|
||||
'avg_max_dd_pct': roi_stats[4] if roi_stats else 0,
|
||||
}
|
||||
|
||||
def load_batch(self, batch_id: int) -> Optional[pd.DataFrame]:
|
||||
"""Load a batch of results from Parquet."""
|
||||
if not PANDAS_AVAILABLE:
|
||||
return None
|
||||
|
||||
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
|
||||
|
||||
if not parquet_path.exists():
|
||||
return None
|
||||
|
||||
return pd.read_parquet(parquet_path)
|
||||
|
||||
def load_corpus(self) -> Optional[pd.DataFrame]:
|
||||
"""Load entire corpus from all batches."""
|
||||
if not PANDAS_AVAILABLE:
|
||||
return None
|
||||
|
||||
batches = []
|
||||
for parquet_file in sorted(self.results_dir.glob("batch_*_results.parquet")):
|
||||
df = pd.read_parquet(parquet_file)
|
||||
batches.append(df)
|
||||
|
||||
if not batches:
|
||||
return None
|
||||
|
||||
return pd.concat(batches, ignore_index=True)
|
||||
547
nautilus_dolphin/mc/mc_validator.py
Executable file
547
nautilus_dolphin/mc/mc_validator.py
Executable file
@@ -0,0 +1,547 @@
|
||||
"""
|
||||
Monte Carlo Configuration Validator
|
||||
===================================
|
||||
|
||||
Internal consistency validation for all constraint groups V1-V4.
|
||||
|
||||
Validation Pipeline:
|
||||
V1: Range check - each param within declared [lo, hi]
|
||||
V2: Constraint groups - CG-VD, CG-LEV, CG-EXIT, CG-RISK, CG-ACB, etc.
|
||||
V3: Cross-group check - inter-subsystem coherence
|
||||
V4: Degenerate check - would produce 0 trades or infinite leverage
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 4
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import numpy as np
|
||||
|
||||
from .mc_sampler import MCTrialConfig, MCSampler
|
||||
|
||||
|
||||
class ValidationStatus(Enum):
|
||||
"""Validation result status."""
|
||||
VALID = "VALID"
|
||||
REJECTED_V1 = "REJECTED_V1" # Range check failed
|
||||
REJECTED_V2 = "REJECTED_V2" # Constraint group failed
|
||||
REJECTED_V3 = "REJECTED_V3" # Cross-group check failed
|
||||
REJECTED_V4 = "REJECTED_V4" # Degenerate configuration
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of validation."""
|
||||
status: ValidationStatus
|
||||
trial_id: int
|
||||
reject_reason: Optional[str] = None
|
||||
warnings: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.warnings is None:
|
||||
self.warnings = []
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""Check if configuration is valid."""
|
||||
return self.status == ValidationStatus.VALID
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'status': self.status.value,
|
||||
'trial_id': self.trial_id,
|
||||
'reject_reason': self.reject_reason,
|
||||
'warnings': self.warnings,
|
||||
}
|
||||
|
||||
|
||||
class MCValidator:
|
||||
"""
|
||||
Monte Carlo Configuration Validator.
|
||||
|
||||
Implements the full V1-V4 validation pipeline.
|
||||
"""
|
||||
|
||||
def __init__(self, verbose: bool = False):
|
||||
"""
|
||||
Initialize validator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verbose : bool
|
||||
Print detailed validation messages
|
||||
"""
|
||||
self.verbose = verbose
|
||||
self.sampler = MCSampler()
|
||||
|
||||
def validate(self, config: MCTrialConfig) -> ValidationResult:
|
||||
"""
|
||||
Run full validation pipeline on a configuration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config : MCTrialConfig
|
||||
Configuration to validate
|
||||
|
||||
Returns
|
||||
-------
|
||||
ValidationResult
|
||||
Validation result with status and details
|
||||
"""
|
||||
warnings = []
|
||||
|
||||
# V1: Range checks
|
||||
v1_passed, v1_reason = self._validate_v1_ranges(config)
|
||||
if not v1_passed:
|
||||
return ValidationResult(
|
||||
status=ValidationStatus.REJECTED_V1,
|
||||
trial_id=config.trial_id,
|
||||
reject_reason=v1_reason,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
# V2: Constraint group rules
|
||||
v2_passed, v2_reason = self._validate_v2_constraint_groups(config)
|
||||
if not v2_passed:
|
||||
return ValidationResult(
|
||||
status=ValidationStatus.REJECTED_V2,
|
||||
trial_id=config.trial_id,
|
||||
reject_reason=v2_reason,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
# V3: Cross-group checks
|
||||
v3_passed, v3_reason, v3_warnings = self._validate_v3_cross_group(config)
|
||||
warnings.extend(v3_warnings)
|
||||
if not v3_passed:
|
||||
return ValidationResult(
|
||||
status=ValidationStatus.REJECTED_V3,
|
||||
trial_id=config.trial_id,
|
||||
reject_reason=v3_reason,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
# V4: Degenerate check (lightweight - no actual backtest)
|
||||
v4_passed, v4_reason = self._validate_v4_degenerate(config)
|
||||
if not v4_passed:
|
||||
return ValidationResult(
|
||||
status=ValidationStatus.REJECTED_V4,
|
||||
trial_id=config.trial_id,
|
||||
reject_reason=v4_reason,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
return ValidationResult(
|
||||
status=ValidationStatus.VALID,
|
||||
trial_id=config.trial_id,
|
||||
reject_reason=None,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
def _validate_v1_ranges(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
V1: Range checks - each param within declared [lo, hi].
|
||||
"""
|
||||
params = config._asdict()
|
||||
|
||||
for name, pdef in self.sampler.PARAMS.items():
|
||||
if pdef.param_type.value in ('derived', 'fixed'):
|
||||
continue
|
||||
|
||||
value = params.get(name)
|
||||
if value is None:
|
||||
return False, f"Missing parameter: {name}"
|
||||
|
||||
# Check lower bound
|
||||
if pdef.lo is not None and value < pdef.lo:
|
||||
return False, f"{name}={value} below minimum {pdef.lo}"
|
||||
|
||||
# Check upper bound (handle dependent bounds)
|
||||
hi = pdef.hi
|
||||
if hi is None and name == 'vel_div_extreme':
|
||||
hi = params.get('vel_div_threshold', -0.02) * 1.5
|
||||
|
||||
if hi is not None and value > hi:
|
||||
return False, f"{name}={value} above maximum {hi}"
|
||||
|
||||
return True, None
|
||||
|
||||
def _validate_v2_constraint_groups(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
V2: Constraint group rules.
|
||||
"""
|
||||
# CG-VD: Velocity Divergence thresholds
|
||||
if not self._check_cg_vd(config):
|
||||
return False, "CG-VD: Velocity divergence constraints violated"
|
||||
|
||||
# CG-LEV: Leverage bounds
|
||||
if not self._check_cg_lev(config):
|
||||
return False, "CG-LEV: Leverage constraints violated"
|
||||
|
||||
# CG-EXIT: Exit management
|
||||
if not self._check_cg_exit(config):
|
||||
return False, "CG-EXIT: Exit constraints violated"
|
||||
|
||||
# CG-RISK: Combined risk
|
||||
if not self._check_cg_risk(config):
|
||||
return False, "CG-RISK: Risk cap exceeded"
|
||||
|
||||
# CG-DC-LEV: DC leverage adjustments
|
||||
if not self._check_cg_dc_lev(config):
|
||||
return False, "CG-DC-LEV: DC leverage adjustment constraints violated"
|
||||
|
||||
# CG-ACB: ACB beta bounds
|
||||
if not self._check_cg_acb(config):
|
||||
return False, "CG-ACB: ACB beta constraints violated"
|
||||
|
||||
# CG-SP: SmartPlacer rates
|
||||
if not self._check_cg_sp(config):
|
||||
return False, "CG-SP: SmartPlacer rate constraints violated"
|
||||
|
||||
# CG-OB-SIG: OB signal constraints
|
||||
if not self._check_cg_ob_sig(config):
|
||||
return False, "CG-OB-SIG: OB signal constraints violated"
|
||||
|
||||
return True, None
|
||||
|
||||
def _check_cg_vd(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-VD: Velocity Divergence constraints."""
|
||||
# extreme < threshold (both negative; extreme is more negative)
|
||||
if config.vel_div_extreme >= config.vel_div_threshold:
|
||||
if self.verbose:
|
||||
print(f" CG-VD fail: extreme={config.vel_div_extreme} >= threshold={config.vel_div_threshold}")
|
||||
return False
|
||||
|
||||
# extreme >= -0.15 (below this, no bars fire at all)
|
||||
if config.vel_div_extreme < -0.15:
|
||||
if self.verbose:
|
||||
print(f" CG-VD fail: extreme={config.vel_div_extreme} < -0.15")
|
||||
return False
|
||||
|
||||
# threshold <= -0.005 (above this, too many spurious entries)
|
||||
if config.vel_div_threshold > -0.005:
|
||||
if self.verbose:
|
||||
print(f" CG-VD fail: threshold={config.vel_div_threshold} > -0.005")
|
||||
return False
|
||||
|
||||
# abs(extreme / threshold) >= 1.5 (meaningful separation)
|
||||
separation = abs(config.vel_div_extreme / config.vel_div_threshold)
|
||||
if separation < 1.5:
|
||||
if self.verbose:
|
||||
print(f" CG-VD fail: separation={separation:.2f} < 1.5")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_lev(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-LEV: Leverage bounds."""
|
||||
# min_leverage < max_leverage
|
||||
if config.min_leverage >= config.max_leverage:
|
||||
if self.verbose:
|
||||
print(f" CG-LEV fail: min={config.min_leverage} >= max={config.max_leverage}")
|
||||
return False
|
||||
|
||||
# max_leverage - min_leverage >= 1.0 (meaningful range)
|
||||
if config.max_leverage - config.min_leverage < 1.0:
|
||||
if self.verbose:
|
||||
print(f" CG-LEV fail: range={config.max_leverage - config.min_leverage:.2f} < 1.0")
|
||||
return False
|
||||
|
||||
# max_leverage * fraction <= 2.0 (notional-capital safety cap)
|
||||
notional_cap = config.max_leverage * config.fraction
|
||||
if notional_cap > 2.0:
|
||||
if self.verbose:
|
||||
print(f" CG-LEV fail: notional_cap={notional_cap:.2f} > 2.0")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_exit(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-EXIT: Exit management constraints."""
|
||||
tp_decimal = config.fixed_tp_pct
|
||||
sl_decimal = config.stop_pct / 100.0 # Convert from percentage to decimal
|
||||
|
||||
# TP must be achievable before SL
|
||||
if tp_decimal > sl_decimal * 5.0:
|
||||
if self.verbose:
|
||||
print(f" CG-EXIT fail: TP={tp_decimal:.4f} > SL*5={sl_decimal*5:.4f}")
|
||||
return False
|
||||
|
||||
# minimum 30 bps TP
|
||||
if tp_decimal < 0.0030:
|
||||
if self.verbose:
|
||||
print(f" CG-EXIT fail: TP={tp_decimal:.4f} < 0.0030")
|
||||
return False
|
||||
|
||||
# minimum 20 bps SL width
|
||||
if sl_decimal < 0.0020:
|
||||
if self.verbose:
|
||||
print(f" CG-EXIT fail: SL={sl_decimal:.4f} < 0.0020")
|
||||
return False
|
||||
|
||||
# minimum meaningful hold period
|
||||
if config.max_hold_bars < 20:
|
||||
if self.verbose:
|
||||
print(f" CG-EXIT fail: max_hold={config.max_hold_bars} < 20")
|
||||
return False
|
||||
|
||||
# TP:SL ratio >= 0.10x
|
||||
if sl_decimal > 0 and tp_decimal / sl_decimal < 0.10:
|
||||
if self.verbose:
|
||||
print(f" CG-EXIT fail: TP/SL ratio={tp_decimal/sl_decimal:.2f} < 0.10")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_risk(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-RISK: Combined risk constraints."""
|
||||
# fraction * max_leverage <= 2.0 (mirrors CG-LEV)
|
||||
max_notional_fraction = config.fraction * config.max_leverage
|
||||
if max_notional_fraction > 2.0:
|
||||
if self.verbose:
|
||||
print(f" CG-RISK fail: max_notional={max_notional_fraction:.2f} > 2.0")
|
||||
return False
|
||||
|
||||
# minimum meaningful position
|
||||
if max_notional_fraction < 0.10:
|
||||
if self.verbose:
|
||||
print(f" CG-RISK fail: max_notional={max_notional_fraction:.2f} < 0.10")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_dc_lev(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-DC-LEV: DC leverage adjustment constraints."""
|
||||
if not config.use_direction_confirm:
|
||||
# DC not used - constraints don't apply
|
||||
return True
|
||||
|
||||
# dc_leverage_boost >= 1.0 (must boost, not reduce)
|
||||
if config.dc_leverage_boost < 1.0:
|
||||
if self.verbose:
|
||||
print(f" CG-DC-LEV fail: boost={config.dc_leverage_boost:.2f} < 1.0")
|
||||
return False
|
||||
|
||||
# dc_leverage_reduce < 1.0 (must reduce, not boost)
|
||||
if config.dc_leverage_reduce >= 1.0:
|
||||
if self.verbose:
|
||||
print(f" CG-DC-LEV fail: reduce={config.dc_leverage_reduce:.2f} >= 1.0")
|
||||
return False
|
||||
|
||||
# DC swing bounded: boost * (1/reduce) <= 4.0
|
||||
dc_swing = config.dc_leverage_boost * (1.0 / config.dc_leverage_reduce)
|
||||
if dc_swing > 4.0:
|
||||
if self.verbose:
|
||||
print(f" CG-DC-LEV fail: dc_swing={dc_swing:.2f} > 4.0")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_acb(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-ACB: ACB beta bounds."""
|
||||
# acb_beta_low < acb_beta_high
|
||||
if config.acb_beta_low >= config.acb_beta_high:
|
||||
if self.verbose:
|
||||
print(f" CG-ACB fail: low={config.acb_beta_low:.2f} >= high={config.acb_beta_high:.2f}")
|
||||
return False
|
||||
|
||||
# acb_beta_high - acb_beta_low >= 0.20 (meaningful dynamic range)
|
||||
if config.acb_beta_high - config.acb_beta_low < 0.20:
|
||||
if self.verbose:
|
||||
print(f" CG-ACB fail: range={config.acb_beta_high - config.acb_beta_low:.2f} < 0.20")
|
||||
return False
|
||||
|
||||
# acb_beta_high <= 1.50 (cap at 150%)
|
||||
if config.acb_beta_high > 1.50:
|
||||
if self.verbose:
|
||||
print(f" CG-ACB fail: high={config.acb_beta_high:.2f} > 1.50")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_sp(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-SP: SmartPlacer rate constraints."""
|
||||
if not config.use_sp_slippage:
|
||||
# Slippage disabled - rates don't matter
|
||||
return True
|
||||
|
||||
# Rates must be in [0, 1]
|
||||
if not (0.0 <= config.sp_maker_entry_rate <= 1.0):
|
||||
if self.verbose:
|
||||
print(f" CG-SP fail: entry_rate={config.sp_maker_entry_rate:.2f} not in [0,1]")
|
||||
return False
|
||||
|
||||
if not (0.0 <= config.sp_maker_exit_rate <= 1.0):
|
||||
if self.verbose:
|
||||
print(f" CG-SP fail: exit_rate={config.sp_maker_exit_rate:.2f} not in [0,1]")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_cg_ob_sig(self, config: MCTrialConfig) -> bool:
|
||||
"""CG-OB-SIG: OB signal constraints."""
|
||||
# ob_imbalance_bias in [-1.0, 1.0]
|
||||
if not (-1.0 <= config.ob_imbalance_bias <= 1.0):
|
||||
if self.verbose:
|
||||
print(f" CG-OB-SIG fail: bias={config.ob_imbalance_bias:.2f} not in [-1,1]")
|
||||
return False
|
||||
|
||||
# ob_depth_scale > 0
|
||||
if config.ob_depth_scale <= 0:
|
||||
if self.verbose:
|
||||
print(f" CG-OB-SIG fail: depth_scale={config.ob_depth_scale:.2f} <= 0")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _validate_v3_cross_group(
|
||||
self, config: MCTrialConfig
|
||||
) -> Tuple[bool, Optional[str], List[str]]:
|
||||
"""
|
||||
V3: Cross-group coherence checks.
|
||||
Returns (passed, reason, warnings).
|
||||
"""
|
||||
warnings = []
|
||||
|
||||
# Signal threshold vs exit: TP must be achievable before max_hold_bars expires
|
||||
# Approximate: at typical vol, price moves ~0.03% per 5s bar
|
||||
expected_tp_bars = config.fixed_tp_pct / 0.0003
|
||||
if expected_tp_bars > config.max_hold_bars * 3:
|
||||
warnings.append(
|
||||
f"TP_TIME_RISK: expected_tp_bars={expected_tp_bars:.0f} > max_hold*3={config.max_hold_bars*3}"
|
||||
)
|
||||
|
||||
# Leverage convexity vs range: extreme convexity with wide leverage range
|
||||
# produces near-binary leverage
|
||||
if config.leverage_convexity > 5.0 and (config.max_leverage - config.min_leverage) > 5.0:
|
||||
warnings.append(
|
||||
f"HIGH_CONVEXITY_WIDE_RANGE: near-binary leverage behaviour likely"
|
||||
)
|
||||
|
||||
# OB skip + DC skip double-filtering: very few trades may fire
|
||||
if config.dc_skip_contradicts and config.ob_imbalance_bias > 0.15:
|
||||
warnings.append(
|
||||
f"DOUBLE_FILTER_RISK: DC skip + strong OB contradiction may starve trades"
|
||||
)
|
||||
|
||||
# Reject only on critical cross-group violations
|
||||
# (none currently defined - all are warnings)
|
||||
|
||||
return True, None, warnings
|
||||
|
||||
def _validate_v4_degenerate(self, config: MCTrialConfig) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
V4: Degenerate configuration check (lightweight heuristics).
|
||||
|
||||
Full pre-flight with 500 bars is done in mc_executor during actual trial.
|
||||
This is just a quick sanity check.
|
||||
"""
|
||||
# Check for numerical extremes that would cause issues
|
||||
|
||||
# Fraction too small - would produce micro-positions
|
||||
if config.fraction < 0.02:
|
||||
return False, f"FRACTION_TOO_SMALL: fraction={config.fraction} < 0.02"
|
||||
|
||||
# Leverage range too narrow for convexity to matter
|
||||
leverage_range = config.max_leverage - config.min_leverage
|
||||
if leverage_range < 0.5 and config.leverage_convexity > 2.0:
|
||||
return False, f"NARROW_RANGE_HIGH_CONVEXITY: range={leverage_range:.2f}, convexity={config.leverage_convexity:.2f}"
|
||||
|
||||
# Max hold too short for vol filter to stabilize
|
||||
if config.max_hold_bars < config.vd_trend_lookback + 10:
|
||||
return False, f"HOLD_TOO_SHORT: max_hold={config.max_hold_bars} < trend_lookback+10={config.vd_trend_lookback+10}"
|
||||
|
||||
# IRP lookback too short for meaningful alignment
|
||||
if config.lookback < 50:
|
||||
return False, f"LOOKBACK_TOO_SHORT: lookback={config.lookback} < 50"
|
||||
|
||||
return True, None
|
||||
|
||||
def validate_batch(
|
||||
self,
|
||||
configs: List[MCTrialConfig]
|
||||
) -> List[ValidationResult]:
|
||||
"""
|
||||
Validate a batch of configurations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configs : List[MCTrialConfig]
|
||||
Configurations to validate
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[ValidationResult]
|
||||
Validation results (same order as input)
|
||||
"""
|
||||
results = []
|
||||
for config in configs:
|
||||
result = self.validate(config)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
def get_validity_stats(self, results: List[ValidationResult]) -> Dict[str, Any]:
|
||||
"""
|
||||
Get statistics about validation results.
|
||||
"""
|
||||
total = len(results)
|
||||
if total == 0:
|
||||
return {'total': 0}
|
||||
|
||||
by_status = {}
|
||||
for status in ValidationStatus:
|
||||
by_status[status.value] = sum(1 for r in results if r.status == status)
|
||||
|
||||
rejection_reasons = {}
|
||||
for r in results:
|
||||
if r.reject_reason:
|
||||
reason = r.reject_reason.split(':')[0] if ':' in r.reject_reason else r.reject_reason
|
||||
rejection_reasons[reason] = rejection_reasons.get(reason, 0) + 1
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
'valid': by_status.get(ValidationStatus.VALID.value, 0),
|
||||
'rejected_v1': by_status.get(ValidationStatus.REJECTED_V1.value, 0),
|
||||
'rejected_v2': by_status.get(ValidationStatus.REJECTED_V2.value, 0),
|
||||
'rejected_v3': by_status.get(ValidationStatus.REJECTED_V3.value, 0),
|
||||
'rejected_v4': by_status.get(ValidationStatus.REJECTED_V4.value, 0),
|
||||
'validity_rate': by_status.get(ValidationStatus.VALID.value, 0) / total,
|
||||
'rejection_reasons': rejection_reasons,
|
||||
}
|
||||
|
||||
|
||||
def test_validator():
|
||||
"""Quick test of the validator."""
|
||||
validator = MCValidator(verbose=True)
|
||||
sampler = MCSampler(base_seed=42)
|
||||
|
||||
# Generate some test configurations
|
||||
trials = sampler.generate_trials(n_samples_per_switch=10, max_trials=100)
|
||||
|
||||
# Validate
|
||||
results = validator.validate_batch(trials)
|
||||
|
||||
# Stats
|
||||
stats = validator.get_validity_stats(results)
|
||||
print(f"\nValidation Stats:")
|
||||
print(f" Total: {stats['total']}")
|
||||
print(f" Valid: {stats['valid']} ({stats['validity_rate']*100:.1f}%)")
|
||||
print(f" Rejected V1: {stats['rejected_v1']}")
|
||||
print(f" Rejected V2: {stats['rejected_v2']}")
|
||||
print(f" Rejected V3: {stats['rejected_v3']}")
|
||||
print(f" Rejected V4: {stats['rejected_v4']}")
|
||||
|
||||
# Show some rejections
|
||||
print("\nSample Rejections:")
|
||||
for r in results:
|
||||
if not r.is_valid():
|
||||
print(f" Trial {r.trial_id}: {r.status.value} - {r.reject_reason}")
|
||||
if len([x for x in results if not x.is_valid()]) > 5:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_validator()
|
||||
Reference in New Issue
Block a user