initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
513
nautilus_dolphin/tests/test_nd_vs_standalone_comparison.py
Executable file
513
nautilus_dolphin/tests/test_nd_vs_standalone_comparison.py
Executable file
@@ -0,0 +1,513 @@
|
||||
"""
|
||||
CRITICAL TEST: Nautilus-Dolphin vs Standalone DOLPHIN Comparison
|
||||
================================================================
|
||||
|
||||
This test verifies that Nautilus-Dolphin produces IDENTICAL results
|
||||
to the standalone DOLPHIN implementation (itest_v7.py).
|
||||
|
||||
MUST MATCH:
|
||||
- Trade count
|
||||
- Win rate
|
||||
- Profit factor
|
||||
- ROI
|
||||
- Entry/exit prices
|
||||
- P&L per trade
|
||||
- Exit types
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────────────────────
|
||||
# Match the itest_v7 "tight_3_3" strategy configuration
|
||||
REFERENCE_STRATEGY = "tight_3_3"
|
||||
REFERENCE_RESULTS_FILE = Path(__file__).parent.parent.parent / "itest_v7_results.json"
|
||||
REFERENCE_TRADES_FILE = Path(__file__).parent.parent.parent / "itest_v7_trades.jsonl"
|
||||
|
||||
TOLERANCE_PCT = 0.001 # 0.1% tolerance for floating point differences
|
||||
|
||||
|
||||
@dataclass
|
||||
class Trade:
|
||||
"""Trade record for comparison."""
|
||||
strategy: str
|
||||
date: str
|
||||
scan_idx: int
|
||||
direction: str
|
||||
entry_price: float
|
||||
exit_price: float
|
||||
exit_type: str
|
||||
bars_held: int
|
||||
leverage: float
|
||||
notional: float
|
||||
gross_pnl: float
|
||||
fees: float
|
||||
net_pnl: float
|
||||
is_winner: bool
|
||||
trade_asset: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategyMetrics:
|
||||
"""Strategy metrics for comparison."""
|
||||
name: str
|
||||
capital: float
|
||||
roi_pct: float
|
||||
trades: int
|
||||
wins: int
|
||||
win_rate: float
|
||||
profit_factor: float
|
||||
avg_win: float
|
||||
avg_loss: float
|
||||
stop_exits: int
|
||||
trailing_exits: int
|
||||
target_exits: int
|
||||
hold_exits: int
|
||||
|
||||
|
||||
# Global storage for loaded data
|
||||
_ref_results = None
|
||||
_ref_trades = None
|
||||
|
||||
def load_reference_data():
|
||||
"""Load reference data once."""
|
||||
global _ref_results, _ref_trades
|
||||
|
||||
if _ref_results is None:
|
||||
if REFERENCE_RESULTS_FILE.exists():
|
||||
with open(REFERENCE_RESULTS_FILE, 'r') as f:
|
||||
_ref_results = json.load(f)
|
||||
|
||||
if _ref_trades is None:
|
||||
if REFERENCE_TRADES_FILE.exists():
|
||||
_ref_trades = []
|
||||
with open(REFERENCE_TRADES_FILE, 'r') as f:
|
||||
for line in f:
|
||||
data = json.loads(line.strip())
|
||||
if data.get('strategy') == REFERENCE_STRATEGY:
|
||||
_ref_trades.append(Trade(
|
||||
strategy=data['strategy'],
|
||||
date=data['date'],
|
||||
scan_idx=data['scan_idx'],
|
||||
direction=data['direction'],
|
||||
entry_price=data['entry_price'],
|
||||
exit_price=data['exit_price'],
|
||||
exit_type=data['exit_type'],
|
||||
bars_held=data['bars_held'],
|
||||
leverage=data['leverage'],
|
||||
notional=data['notional'],
|
||||
gross_pnl=data['gross_pnl'],
|
||||
fees=data['fees'],
|
||||
net_pnl=data['net_pnl'],
|
||||
is_winner=data['is_winner'],
|
||||
trade_asset=data['trade_asset']
|
||||
))
|
||||
|
||||
return _ref_results, _ref_trades
|
||||
|
||||
|
||||
class TestNDvsStandaloneComparison:
|
||||
"""Test Nautilus-Dolphin matches standalone DOLPHIN results."""
|
||||
|
||||
def test_reference_results_exist(self):
|
||||
"""Verify reference results file exists and has expected structure."""
|
||||
reference_results, _ = load_reference_data()
|
||||
|
||||
if reference_results is None:
|
||||
pytest.skip(f"Reference results not found: {REFERENCE_RESULTS_FILE}")
|
||||
|
||||
assert 'strategies' in reference_results
|
||||
assert REFERENCE_STRATEGY in reference_results['strategies']
|
||||
assert 'total_scans' in reference_results
|
||||
print(f"\nReference data loaded: {reference_results['total_scans']} scans")
|
||||
|
||||
def test_reference_trades_exist(self):
|
||||
"""Verify reference trades exist for the strategy."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if reference_trades is None:
|
||||
pytest.skip(f"Reference trades not found: {REFERENCE_TRADES_FILE}")
|
||||
|
||||
assert len(reference_trades) > 0
|
||||
print(f"\nReference trades loaded: {len(reference_trades)} trades for {REFERENCE_STRATEGY}")
|
||||
|
||||
def test_strategy_metrics_match(self):
|
||||
"""Verify ND produces matching high-level metrics.
|
||||
|
||||
This test compares:
|
||||
- Trade count
|
||||
- Win rate
|
||||
- Profit factor
|
||||
- ROI
|
||||
"""
|
||||
reference_results, _ = load_reference_data()
|
||||
|
||||
if reference_results is None:
|
||||
pytest.skip("Reference results not available")
|
||||
|
||||
ref_strategy = reference_results['strategies'][REFERENCE_STRATEGY]
|
||||
|
||||
# Store reference metrics for comparison
|
||||
ref_metrics = StrategyMetrics(
|
||||
name=REFERENCE_STRATEGY,
|
||||
capital=ref_strategy['capital'],
|
||||
roi_pct=ref_strategy['roi_pct'],
|
||||
trades=ref_strategy['trades'],
|
||||
wins=ref_strategy['wins'],
|
||||
win_rate=ref_strategy['win_rate'],
|
||||
profit_factor=ref_strategy['profit_factor'],
|
||||
avg_win=ref_strategy['avg_win'],
|
||||
avg_loss=ref_strategy['avg_loss'],
|
||||
stop_exits=ref_strategy['stop_exits'],
|
||||
trailing_exits=ref_strategy['trailing_exits'],
|
||||
target_exits=ref_strategy['target_exits'],
|
||||
hold_exits=ref_strategy['hold_exits']
|
||||
)
|
||||
|
||||
# Log reference metrics
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Reference Strategy: {REFERENCE_STRATEGY}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Capital: ${ref_metrics.capital:,.2f}")
|
||||
print(f"ROI: {ref_metrics.roi_pct:.2f}%")
|
||||
print(f"Trades: {ref_metrics.trades}")
|
||||
print(f"Win Rate: {ref_metrics.win_rate:.2f}%")
|
||||
print(f"Profit Factor: {ref_metrics.profit_factor:.4f}")
|
||||
print(f"Avg Win: ${ref_metrics.avg_win:.2f}")
|
||||
print(f"Avg Loss: ${ref_metrics.avg_loss:.2f}")
|
||||
print(f"Exit Types: stop={ref_metrics.stop_exits}, trail={ref_metrics.trailing_exits}, target={ref_metrics.target_exits}, hold={ref_metrics.hold_exits}")
|
||||
|
||||
# Basic sanity checks on reference data
|
||||
assert ref_metrics.trades > 100, "Expected significant number of trades"
|
||||
assert 0 < ref_metrics.win_rate < 100, "Win rate should be between 0-100%"
|
||||
assert ref_metrics.capital > 0, "Capital should be positive"
|
||||
|
||||
def test_trade_details_structure(self):
|
||||
"""Verify structure of reference trades."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
trade = reference_trades[0]
|
||||
|
||||
# Check required fields exist
|
||||
assert trade.strategy == REFERENCE_STRATEGY
|
||||
assert trade.entry_price > 0
|
||||
assert trade.exit_price > 0
|
||||
assert trade.notional > 0
|
||||
assert trade.exit_type in ['trailing_stop', 'stop_loss', 'target', 'max_hold']
|
||||
|
||||
print(f"\nSample trade: {trade.trade_asset} {trade.direction}")
|
||||
print(f" Date: {trade.date}, Scan: {trade.scan_idx}")
|
||||
print(f" Entry: ${trade.entry_price:.2f} -> Exit: ${trade.exit_price:.2f}")
|
||||
print(f" P&L: ${trade.net_pnl:.4f}, Exit Type: {trade.exit_type}")
|
||||
print(f" Bars: {trade.bars_held}, Leverage: {trade.leverage}x")
|
||||
|
||||
def test_exit_type_distribution(self):
|
||||
"""Verify exit type distribution matches expectations."""
|
||||
reference_results, _ = load_reference_data()
|
||||
|
||||
if reference_results is None:
|
||||
pytest.skip("Reference results not available")
|
||||
|
||||
ref_strategy = reference_results['strategies'][REFERENCE_STRATEGY]
|
||||
|
||||
total_exits = (
|
||||
ref_strategy['stop_exits'] +
|
||||
ref_strategy['trailing_exits'] +
|
||||
ref_strategy['target_exits'] +
|
||||
ref_strategy['hold_exits']
|
||||
)
|
||||
|
||||
assert total_exits == ref_strategy['trades'], "Exit count should match trade count"
|
||||
|
||||
# Log distribution
|
||||
print(f"\nExit Type Distribution:")
|
||||
print(f" Trailing: {ref_strategy['trailing_exits']} ({100*ref_strategy['trailing_exits']/ref_strategy['trades']:.1f}%)")
|
||||
print(f" Stop: {ref_strategy['stop_exits']} ({100*ref_strategy['stop_exits']/ref_strategy['trades']:.1f}%)")
|
||||
print(f" Target: {ref_strategy['target_exits']} ({100*ref_strategy['target_exits']/ref_strategy['trades']:.1f}%)")
|
||||
print(f" Hold: {ref_strategy['hold_exits']} ({100*ref_strategy['hold_exits']/ref_strategy['trades']:.1f}%)")
|
||||
|
||||
def test_pnl_calculation_consistency(self):
|
||||
"""Verify P&L calculations in reference trades are consistent.
|
||||
|
||||
Checks: gross_pnl - fees = net_pnl (within tolerance)
|
||||
"""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
calc_errors = []
|
||||
winner_errors = []
|
||||
|
||||
for i, trade in enumerate(reference_trades[:100]): # Check first 100
|
||||
# Check 1: Verify gross_pnl - fees = net_pnl
|
||||
calc_net = trade.gross_pnl - trade.fees
|
||||
if abs(calc_net - trade.net_pnl) > 0.01:
|
||||
calc_errors.append(i)
|
||||
|
||||
# Check 2: Verify is_winner matches net_pnl sign
|
||||
# A trade is a winner if net_pnl > 0 (strictly positive)
|
||||
expected_winner = trade.net_pnl > 0
|
||||
if expected_winner != trade.is_winner:
|
||||
winner_errors.append(i)
|
||||
|
||||
# Report findings
|
||||
print(f"\nP&L Calculation Check (first 100 trades):")
|
||||
print(f" Calculation errors: {len(calc_errors)} ({len(calc_errors)}%)")
|
||||
print(f" Winner flag errors: {len(winner_errors)} ({len(winner_errors)}%)")
|
||||
|
||||
if calc_errors[:5]:
|
||||
print(f" Sample calc errors: {calc_errors[:5]}")
|
||||
|
||||
# The key check: gross_pnl - fees should equal net_pnl
|
||||
# Some small discrepancies are acceptable due to rounding
|
||||
calc_error_rate = len(calc_errors) / min(100, len(reference_trades))
|
||||
assert calc_error_rate < 0.05, f"Too many P&L calculation errors: {calc_error_rate:.1%}"
|
||||
|
||||
def test_nd_configuration_matches(self):
|
||||
"""Verify ND configuration matches standalone.
|
||||
|
||||
This test ensures the Nautilus-Dolphin configuration
|
||||
matches the itest_v7 tight_3_3 configuration.
|
||||
"""
|
||||
from nautilus_dolphin.nautilus.strategy_registration import DolphinStrategyConfig
|
||||
|
||||
# ND configuration
|
||||
nd_config = DolphinStrategyConfig(
|
||||
venue="BINANCE_FUTURES",
|
||||
max_leverage=2.5, # From itest_v7
|
||||
capital_fraction=0.15, # From itest_v7
|
||||
tp_bps=99, # ~1% target (not heavily used in tight_3_3)
|
||||
max_hold_bars=120, # From itest_v7
|
||||
acb_enabled=True,
|
||||
)
|
||||
|
||||
# Key parameters that MUST match itest_v7
|
||||
assert nd_config.max_leverage == 2.5, "Leverage must match"
|
||||
assert nd_config.capital_fraction == 0.15, "Capital fraction must match"
|
||||
assert nd_config.max_hold_bars == 120, "Max hold must match"
|
||||
|
||||
print(f"\nND Configuration validated:")
|
||||
print(f" Max Leverage: {nd_config.max_leverage}x")
|
||||
print(f" Capital Fraction: {nd_config.capital_fraction}")
|
||||
print(f" Max Hold Bars: {nd_config.max_hold_bars}")
|
||||
print(f" ACB Enabled: {nd_config.acb_enabled}")
|
||||
|
||||
|
||||
class TestNDSignalGenerationStack:
|
||||
"""Test Nautilus-Dolphin signal generation stack works correctly."""
|
||||
|
||||
def test_data_adapter_imports(self):
|
||||
"""Verify data adapter components import correctly."""
|
||||
from nautilus_dolphin.nautilus.data_adapter import (
|
||||
JSONEigenvalueDataAdapter,
|
||||
BacktestDataLoader
|
||||
)
|
||||
assert JSONEigenvalueDataAdapter is not None
|
||||
assert BacktestDataLoader is not None
|
||||
|
||||
def test_data_catalog_imports(self):
|
||||
"""Verify data catalog components import correctly."""
|
||||
from nautilus_dolphin.nautilus.data_catalogue import (
|
||||
DataCatalogueConfig,
|
||||
BacktestEngineConfig,
|
||||
DataImporter
|
||||
)
|
||||
assert DataCatalogueConfig is not None
|
||||
assert BacktestEngineConfig is not None
|
||||
assert DataImporter is not None
|
||||
|
||||
def test_strategy_can_calculate_position_size(self):
|
||||
"""Verify strategy can calculate position sizes matching itest_v7."""
|
||||
from nautilus_dolphin.nautilus.strategy import DolphinExecutionStrategyForTesting
|
||||
|
||||
strategy = DolphinExecutionStrategyForTesting({
|
||||
'venue': 'BINANCE_FUTURES',
|
||||
'max_leverage': 2.5,
|
||||
'capital_fraction': 0.15,
|
||||
'acb_enabled': False, # Disable ACB for this test
|
||||
})
|
||||
|
||||
# Test signal matching itest_v7 parameters
|
||||
signal = {
|
||||
'strength': 0.75,
|
||||
'bucket_boost': 1.0,
|
||||
'streak_mult': 1.0,
|
||||
'trend_mult': 1.0,
|
||||
}
|
||||
|
||||
account_balance = 10000.0
|
||||
notional = strategy.calculate_position_size(signal, account_balance)
|
||||
|
||||
# itest_v7: notional = 10000 * 0.15 * 2.5 = 3750
|
||||
expected_base = account_balance * 0.15 * 2.5
|
||||
|
||||
print(f"\nPosition Size Calculation:")
|
||||
print(f" Account: ${account_balance:,.2f}")
|
||||
print(f" Calculated Notional: ${notional:,.2f}")
|
||||
print(f" Expected (itest_v7): ${expected_base:,.2f}")
|
||||
|
||||
# Allow for minor differences due to ACB or other factors
|
||||
assert notional > 0, "Notional must be positive"
|
||||
assert notional <= account_balance * 0.5, "Notional should respect sanity cap"
|
||||
|
||||
def test_strategy_filters_match(self):
|
||||
"""Verify strategy filters match itest_v7 logic."""
|
||||
from nautilus_dolphin.nautilus.strategy import DolphinExecutionStrategyForTesting
|
||||
|
||||
strategy = DolphinExecutionStrategyForTesting({
|
||||
'venue': 'BINANCE_FUTURES',
|
||||
'irp_alignment_min': 0.45,
|
||||
'momentum_magnitude_min': 0.000075,
|
||||
'excluded_assets': ['TUSDUSDT', 'USDCUSDT'],
|
||||
'max_concurrent_positions': 10,
|
||||
})
|
||||
|
||||
# Test valid signal
|
||||
valid_signal = {
|
||||
'irp_alignment': 0.5,
|
||||
'direction_confirm': True,
|
||||
'lookback_momentum': 0.0001,
|
||||
'asset': 'BTCUSDT',
|
||||
}
|
||||
|
||||
# Manually set volatility detector to high regime
|
||||
strategy.volatility_detector._regime = 'high'
|
||||
strategy.volatility_detector._history = [0.0001] * 150
|
||||
|
||||
result = strategy._should_trade(valid_signal)
|
||||
print(f"\nValid signal check: '{result}'")
|
||||
|
||||
# Test excluded asset
|
||||
excluded_signal = {
|
||||
'irp_alignment': 0.5,
|
||||
'direction_confirm': True,
|
||||
'lookback_momentum': 0.0001,
|
||||
'asset': 'USDCUSDT', # Excluded
|
||||
}
|
||||
|
||||
result_excluded = strategy._should_trade(excluded_signal)
|
||||
print(f"Excluded asset check: '{result_excluded}'")
|
||||
|
||||
assert result_excluded == "asset_excluded", "Should reject excluded asset"
|
||||
|
||||
|
||||
class TestTradeByTradeComparison:
|
||||
"""Trade-by-trade comparison between ND and standalone.
|
||||
|
||||
This is the MOST CRITICAL test - every trade must match.
|
||||
"""
|
||||
|
||||
def test_first_10_trades_structure(self):
|
||||
"""Verify structure of first 10 reference trades."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("First 10 Reference Trades:")
|
||||
print(f"{'='*60}")
|
||||
|
||||
for i, trade in enumerate(reference_trades[:10]):
|
||||
print(f"\nTrade {i+1}: {trade.trade_asset} {trade.direction}")
|
||||
print(f" Date: {trade.date}, Scan: {trade.scan_idx}")
|
||||
print(f" Entry: ${trade.entry_price:.2f} -> Exit: ${trade.exit_price:.2f}")
|
||||
print(f" P&L: ${trade.net_pnl:.4f}, Exit: {trade.exit_type}")
|
||||
print(f" Bars: {trade.bars_held}, Leverage: {trade.leverage}x")
|
||||
|
||||
def test_entry_exit_prices_are_reasonable(self):
|
||||
"""Verify entry/exit prices are within reasonable ranges."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
crypto_assets = {
|
||||
'BTCUSDT': (20000, 100000),
|
||||
'ETHUSDT': (1000, 5000),
|
||||
'ADAUSDT': (0.2, 2.0),
|
||||
'SOLUSDT': (10, 200),
|
||||
}
|
||||
|
||||
unreasonable = 0
|
||||
for trade in reference_trades[:100]:
|
||||
# Check if prices are positive
|
||||
if trade.entry_price <= 0 or trade.exit_price <= 0:
|
||||
unreasonable += 1
|
||||
continue
|
||||
|
||||
# Check price range for known assets
|
||||
for asset, (min_p, max_p) in crypto_assets.items():
|
||||
if trade.trade_asset == asset:
|
||||
if not (min_p <= trade.entry_price <= max_p):
|
||||
unreasonable += 1
|
||||
break
|
||||
|
||||
error_rate = unreasonable / min(100, len(reference_trades))
|
||||
assert error_rate < 0.1, f"Too many unreasonable prices: {error_rate:.1%}"
|
||||
|
||||
def test_leverage_is_consistent(self):
|
||||
"""Verify all trades use expected leverage."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
leverages = set(t.leverage for t in reference_trades)
|
||||
print(f"\nLeverage values used: {leverages}")
|
||||
|
||||
# itest_v7 uses 2.5x leverage for tight_3_3
|
||||
assert 2.5 in leverages, "Expected 2.5x leverage in trades"
|
||||
|
||||
def test_fees_are_calculated(self):
|
||||
"""Verify fees are calculated for all trades."""
|
||||
_, reference_trades = load_reference_data()
|
||||
|
||||
if not reference_trades:
|
||||
pytest.skip("No reference trades loaded")
|
||||
|
||||
trades_with_fees = sum(1 for t in reference_trades if t.fees > 0)
|
||||
fee_rate = trades_with_fees / len(reference_trades)
|
||||
|
||||
print(f"\nFee coverage: {trades_with_fees}/{len(reference_trades)} ({fee_rate:.1%})")
|
||||
|
||||
# All trades should have fees
|
||||
assert fee_rate > 0.99, "Expected fees on almost all trades"
|
||||
|
||||
|
||||
# ── Main Comparison Test ─────────────────────────────────────────────────────
|
||||
@pytest.mark.skip(reason="Full ND backtest comparison - run after ND backtest implementation")
|
||||
class TestFullNDvsStandaloneBacktest:
|
||||
"""Full backtest comparison - requires ND backtest results."""
|
||||
|
||||
def test_nd_backtest_produces_results(self):
|
||||
"""Verify ND backtest runs and produces results."""
|
||||
# TODO: Run ND backtest and load results
|
||||
pass
|
||||
|
||||
def test_trade_count_matches(self):
|
||||
"""Verify ND produces same number of trades."""
|
||||
reference_results, _ = load_reference_data()
|
||||
if reference_results is None:
|
||||
pytest.skip("Reference results not available")
|
||||
|
||||
ref_trades = reference_results['strategies'][REFERENCE_STRATEGY]['trades']
|
||||
# TODO: Compare with ND results
|
||||
pass
|
||||
|
||||
def test_trade_by_trade_match(self):
|
||||
"""CRITICAL: Verify every trade matches."""
|
||||
_, reference_trades = load_reference_data()
|
||||
if not reference_trades:
|
||||
pytest.skip("Reference trades not available")
|
||||
|
||||
# TODO: Implement trade-by-trade comparison
|
||||
pass
|
||||
Reference in New Issue
Block a user