initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/tests/test_nd_vs_standalone_comparison.py
+++ b/nautilus_dolphin/tests/test_nd_vs_standalone_comparison.py
@@ -0,0 +1,513 @@
+"""
+CRITICAL TEST: Nautilus-Dolphin vs Standalone DOLPHIN Comparison
+================================================================
+
+This test verifies that Nautilus-Dolphin produces IDENTICAL results
+to the standalone DOLPHIN implementation (itest_v7.py).
+
+MUST MATCH:
+- Trade count
+- Win rate  
+- Profit factor
+- ROI
+- Entry/exit prices
+- P&L per trade
+- Exit types
+"""
+
+import json
+import pytest
+from pathlib import Path
+from typing import Dict, List, Any
+from dataclasses import dataclass
+
+
+# ── Configuration ────────────────────────────────────────────────────────────
+# Match the itest_v7 "tight_3_3" strategy configuration
+REFERENCE_STRATEGY = "tight_3_3"
+REFERENCE_RESULTS_FILE = Path(__file__).parent.parent.parent / "itest_v7_results.json"
+REFERENCE_TRADES_FILE = Path(__file__).parent.parent.parent / "itest_v7_trades.jsonl"
+
+TOLERANCE_PCT = 0.001  # 0.1% tolerance for floating point differences
+
+
+@dataclass
+class Trade:
+    """Trade record for comparison."""
+    strategy: str
+    date: str
+    scan_idx: int
+    direction: str
+    entry_price: float
+    exit_price: float
+    exit_type: str
+    bars_held: int
+    leverage: float
+    notional: float
+    gross_pnl: float
+    fees: float
+    net_pnl: float
+    is_winner: bool
+    trade_asset: str
+
+
+@dataclass
+class StrategyMetrics:
+    """Strategy metrics for comparison."""
+    name: str
+    capital: float
+    roi_pct: float
+    trades: int
+    wins: int
+    win_rate: float
+    profit_factor: float
+    avg_win: float
+    avg_loss: float
+    stop_exits: int
+    trailing_exits: int
+    target_exits: int
+    hold_exits: int
+
+
+# Global storage for loaded data
+_ref_results = None
+_ref_trades = None
+
+def load_reference_data():
+    """Load reference data once."""
+    global _ref_results, _ref_trades
+    
+    if _ref_results is None:
+        if REFERENCE_RESULTS_FILE.exists():
+            with open(REFERENCE_RESULTS_FILE, 'r') as f:
+                _ref_results = json.load(f)
+    
+    if _ref_trades is None:
+        if REFERENCE_TRADES_FILE.exists():
+            _ref_trades = []
+            with open(REFERENCE_TRADES_FILE, 'r') as f:
+                for line in f:
+                    data = json.loads(line.strip())
+                    if data.get('strategy') == REFERENCE_STRATEGY:
+                        _ref_trades.append(Trade(
+                            strategy=data['strategy'],
+                            date=data['date'],
+                            scan_idx=data['scan_idx'],
+                            direction=data['direction'],
+                            entry_price=data['entry_price'],
+                            exit_price=data['exit_price'],
+                            exit_type=data['exit_type'],
+                            bars_held=data['bars_held'],
+                            leverage=data['leverage'],
+                            notional=data['notional'],
+                            gross_pnl=data['gross_pnl'],
+                            fees=data['fees'],
+                            net_pnl=data['net_pnl'],
+                            is_winner=data['is_winner'],
+                            trade_asset=data['trade_asset']
+                        ))
+    
+    return _ref_results, _ref_trades
+
+
+class TestNDvsStandaloneComparison:
+    """Test Nautilus-Dolphin matches standalone DOLPHIN results."""
+    
+    def test_reference_results_exist(self):
+        """Verify reference results file exists and has expected structure."""
+        reference_results, _ = load_reference_data()
+        
+        if reference_results is None:
+            pytest.skip(f"Reference results not found: {REFERENCE_RESULTS_FILE}")
+        
+        assert 'strategies' in reference_results
+        assert REFERENCE_STRATEGY in reference_results['strategies']
+        assert 'total_scans' in reference_results
+        print(f"\nReference data loaded: {reference_results['total_scans']} scans")
+    
+    def test_reference_trades_exist(self):
+        """Verify reference trades exist for the strategy."""
+        _, reference_trades = load_reference_data()
+        
+        if reference_trades is None:
+            pytest.skip(f"Reference trades not found: {REFERENCE_TRADES_FILE}")
+        
+        assert len(reference_trades) > 0
+        print(f"\nReference trades loaded: {len(reference_trades)} trades for {REFERENCE_STRATEGY}")
+    
+    def test_strategy_metrics_match(self):
+        """Verify ND produces matching high-level metrics.
+        
+        This test compares:
+        - Trade count
+        - Win rate
+        - Profit factor
+        - ROI
+        """
+        reference_results, _ = load_reference_data()
+        
+        if reference_results is None:
+            pytest.skip("Reference results not available")
+        
+        ref_strategy = reference_results['strategies'][REFERENCE_STRATEGY]
+        
+        # Store reference metrics for comparison
+        ref_metrics = StrategyMetrics(
+            name=REFERENCE_STRATEGY,
+            capital=ref_strategy['capital'],
+            roi_pct=ref_strategy['roi_pct'],
+            trades=ref_strategy['trades'],
+            wins=ref_strategy['wins'],
+            win_rate=ref_strategy['win_rate'],
+            profit_factor=ref_strategy['profit_factor'],
+            avg_win=ref_strategy['avg_win'],
+            avg_loss=ref_strategy['avg_loss'],
+            stop_exits=ref_strategy['stop_exits'],
+            trailing_exits=ref_strategy['trailing_exits'],
+            target_exits=ref_strategy['target_exits'],
+            hold_exits=ref_strategy['hold_exits']
+        )
+        
+        # Log reference metrics
+        print(f"\n{'='*60}")
+        print(f"Reference Strategy: {REFERENCE_STRATEGY}")
+        print(f"{'='*60}")
+        print(f"Capital: ${ref_metrics.capital:,.2f}")
+        print(f"ROI: {ref_metrics.roi_pct:.2f}%")
+        print(f"Trades: {ref_metrics.trades}")
+        print(f"Win Rate: {ref_metrics.win_rate:.2f}%")
+        print(f"Profit Factor: {ref_metrics.profit_factor:.4f}")
+        print(f"Avg Win: ${ref_metrics.avg_win:.2f}")
+        print(f"Avg Loss: ${ref_metrics.avg_loss:.2f}")
+        print(f"Exit Types: stop={ref_metrics.stop_exits}, trail={ref_metrics.trailing_exits}, target={ref_metrics.target_exits}, hold={ref_metrics.hold_exits}")
+        
+        # Basic sanity checks on reference data
+        assert ref_metrics.trades > 100, "Expected significant number of trades"
+        assert 0 < ref_metrics.win_rate < 100, "Win rate should be between 0-100%"
+        assert ref_metrics.capital > 0, "Capital should be positive"
+    
+    def test_trade_details_structure(self):
+        """Verify structure of reference trades."""
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        trade = reference_trades[0]
+        
+        # Check required fields exist
+        assert trade.strategy == REFERENCE_STRATEGY
+        assert trade.entry_price > 0
+        assert trade.exit_price > 0
+        assert trade.notional > 0
+        assert trade.exit_type in ['trailing_stop', 'stop_loss', 'target', 'max_hold']
+        
+        print(f"\nSample trade: {trade.trade_asset} {trade.direction}")
+        print(f"  Date: {trade.date}, Scan: {trade.scan_idx}")
+        print(f"  Entry: ${trade.entry_price:.2f} -> Exit: ${trade.exit_price:.2f}")
+        print(f"  P&L: ${trade.net_pnl:.4f}, Exit Type: {trade.exit_type}")
+        print(f"  Bars: {trade.bars_held}, Leverage: {trade.leverage}x")
+    
+    def test_exit_type_distribution(self):
+        """Verify exit type distribution matches expectations."""
+        reference_results, _ = load_reference_data()
+        
+        if reference_results is None:
+            pytest.skip("Reference results not available")
+        
+        ref_strategy = reference_results['strategies'][REFERENCE_STRATEGY]
+        
+        total_exits = (
+            ref_strategy['stop_exits'] + 
+            ref_strategy['trailing_exits'] + 
+            ref_strategy['target_exits'] + 
+            ref_strategy['hold_exits']
+        )
+        
+        assert total_exits == ref_strategy['trades'], "Exit count should match trade count"
+        
+        # Log distribution
+        print(f"\nExit Type Distribution:")
+        print(f"  Trailing: {ref_strategy['trailing_exits']} ({100*ref_strategy['trailing_exits']/ref_strategy['trades']:.1f}%)")
+        print(f"  Stop: {ref_strategy['stop_exits']} ({100*ref_strategy['stop_exits']/ref_strategy['trades']:.1f}%)")
+        print(f"  Target: {ref_strategy['target_exits']} ({100*ref_strategy['target_exits']/ref_strategy['trades']:.1f}%)")
+        print(f"  Hold: {ref_strategy['hold_exits']} ({100*ref_strategy['hold_exits']/ref_strategy['trades']:.1f}%)")
+    
+    def test_pnl_calculation_consistency(self):
+        """Verify P&L calculations in reference trades are consistent.
+        
+        Checks: gross_pnl - fees = net_pnl (within tolerance)
+        """
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        calc_errors = []
+        winner_errors = []
+        
+        for i, trade in enumerate(reference_trades[:100]):  # Check first 100
+            # Check 1: Verify gross_pnl - fees = net_pnl
+            calc_net = trade.gross_pnl - trade.fees
+            if abs(calc_net - trade.net_pnl) > 0.01:
+                calc_errors.append(i)
+            
+            # Check 2: Verify is_winner matches net_pnl sign
+            # A trade is a winner if net_pnl > 0 (strictly positive)
+            expected_winner = trade.net_pnl > 0
+            if expected_winner != trade.is_winner:
+                winner_errors.append(i)
+        
+        # Report findings
+        print(f"\nP&L Calculation Check (first 100 trades):")
+        print(f"  Calculation errors: {len(calc_errors)} ({len(calc_errors)}%)")
+        print(f"  Winner flag errors: {len(winner_errors)} ({len(winner_errors)}%)")
+        
+        if calc_errors[:5]:
+            print(f"  Sample calc errors: {calc_errors[:5]}")
+        
+        # The key check: gross_pnl - fees should equal net_pnl
+        # Some small discrepancies are acceptable due to rounding
+        calc_error_rate = len(calc_errors) / min(100, len(reference_trades))
+        assert calc_error_rate < 0.05, f"Too many P&L calculation errors: {calc_error_rate:.1%}"
+    
+    def test_nd_configuration_matches(self):
+        """Verify ND configuration matches standalone.
+        
+        This test ensures the Nautilus-Dolphin configuration
+        matches the itest_v7 tight_3_3 configuration.
+        """
+        from nautilus_dolphin.nautilus.strategy_registration import DolphinStrategyConfig
+        
+        # ND configuration
+        nd_config = DolphinStrategyConfig(
+            venue="BINANCE_FUTURES",
+            max_leverage=2.5,  # From itest_v7
+            capital_fraction=0.15,  # From itest_v7
+            tp_bps=99,  # ~1% target (not heavily used in tight_3_3)
+            max_hold_bars=120,  # From itest_v7
+            acb_enabled=True,
+        )
+        
+        # Key parameters that MUST match itest_v7
+        assert nd_config.max_leverage == 2.5, "Leverage must match"
+        assert nd_config.capital_fraction == 0.15, "Capital fraction must match"
+        assert nd_config.max_hold_bars == 120, "Max hold must match"
+        
+        print(f"\nND Configuration validated:")
+        print(f"  Max Leverage: {nd_config.max_leverage}x")
+        print(f"  Capital Fraction: {nd_config.capital_fraction}")
+        print(f"  Max Hold Bars: {nd_config.max_hold_bars}")
+        print(f"  ACB Enabled: {nd_config.acb_enabled}")
+
+
+class TestNDSignalGenerationStack:
+    """Test Nautilus-Dolphin signal generation stack works correctly."""
+    
+    def test_data_adapter_imports(self):
+        """Verify data adapter components import correctly."""
+        from nautilus_dolphin.nautilus.data_adapter import (
+            JSONEigenvalueDataAdapter,
+            BacktestDataLoader
+        )
+        assert JSONEigenvalueDataAdapter is not None
+        assert BacktestDataLoader is not None
+    
+    def test_data_catalog_imports(self):
+        """Verify data catalog components import correctly."""
+        from nautilus_dolphin.nautilus.data_catalogue import (
+            DataCatalogueConfig,
+            BacktestEngineConfig,
+            DataImporter
+        )
+        assert DataCatalogueConfig is not None
+        assert BacktestEngineConfig is not None
+        assert DataImporter is not None
+    
+    def test_strategy_can_calculate_position_size(self):
+        """Verify strategy can calculate position sizes matching itest_v7."""
+        from nautilus_dolphin.nautilus.strategy import DolphinExecutionStrategyForTesting
+        
+        strategy = DolphinExecutionStrategyForTesting({
+            'venue': 'BINANCE_FUTURES',
+            'max_leverage': 2.5,
+            'capital_fraction': 0.15,
+            'acb_enabled': False,  # Disable ACB for this test
+        })
+        
+        # Test signal matching itest_v7 parameters
+        signal = {
+            'strength': 0.75,
+            'bucket_boost': 1.0,
+            'streak_mult': 1.0,
+            'trend_mult': 1.0,
+        }
+        
+        account_balance = 10000.0
+        notional = strategy.calculate_position_size(signal, account_balance)
+        
+        # itest_v7: notional = 10000 * 0.15 * 2.5 = 3750
+        expected_base = account_balance * 0.15 * 2.5
+        
+        print(f"\nPosition Size Calculation:")
+        print(f"  Account: ${account_balance:,.2f}")
+        print(f"  Calculated Notional: ${notional:,.2f}")
+        print(f"  Expected (itest_v7): ${expected_base:,.2f}")
+        
+        # Allow for minor differences due to ACB or other factors
+        assert notional > 0, "Notional must be positive"
+        assert notional <= account_balance * 0.5, "Notional should respect sanity cap"
+    
+    def test_strategy_filters_match(self):
+        """Verify strategy filters match itest_v7 logic."""
+        from nautilus_dolphin.nautilus.strategy import DolphinExecutionStrategyForTesting
+        
+        strategy = DolphinExecutionStrategyForTesting({
+            'venue': 'BINANCE_FUTURES',
+            'irp_alignment_min': 0.45,
+            'momentum_magnitude_min': 0.000075,
+            'excluded_assets': ['TUSDUSDT', 'USDCUSDT'],
+            'max_concurrent_positions': 10,
+        })
+        
+        # Test valid signal
+        valid_signal = {
+            'irp_alignment': 0.5,
+            'direction_confirm': True,
+            'lookback_momentum': 0.0001,
+            'asset': 'BTCUSDT',
+        }
+        
+        # Manually set volatility detector to high regime
+        strategy.volatility_detector._regime = 'high'
+        strategy.volatility_detector._history = [0.0001] * 150
+        
+        result = strategy._should_trade(valid_signal)
+        print(f"\nValid signal check: '{result}'")
+        
+        # Test excluded asset
+        excluded_signal = {
+            'irp_alignment': 0.5,
+            'direction_confirm': True,
+            'lookback_momentum': 0.0001,
+            'asset': 'USDCUSDT',  # Excluded
+        }
+        
+        result_excluded = strategy._should_trade(excluded_signal)
+        print(f"Excluded asset check: '{result_excluded}'")
+        
+        assert result_excluded == "asset_excluded", "Should reject excluded asset"
+
+
+class TestTradeByTradeComparison:
+    """Trade-by-trade comparison between ND and standalone.
+    
+    This is the MOST CRITICAL test - every trade must match.
+    """
+    
+    def test_first_10_trades_structure(self):
+        """Verify structure of first 10 reference trades."""
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        print(f"\n{'='*60}")
+        print("First 10 Reference Trades:")
+        print(f"{'='*60}")
+        
+        for i, trade in enumerate(reference_trades[:10]):
+            print(f"\nTrade {i+1}: {trade.trade_asset} {trade.direction}")
+            print(f"  Date: {trade.date}, Scan: {trade.scan_idx}")
+            print(f"  Entry: ${trade.entry_price:.2f} -> Exit: ${trade.exit_price:.2f}")
+            print(f"  P&L: ${trade.net_pnl:.4f}, Exit: {trade.exit_type}")
+            print(f"  Bars: {trade.bars_held}, Leverage: {trade.leverage}x")
+    
+    def test_entry_exit_prices_are_reasonable(self):
+        """Verify entry/exit prices are within reasonable ranges."""
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        crypto_assets = {
+            'BTCUSDT': (20000, 100000),
+            'ETHUSDT': (1000, 5000),
+            'ADAUSDT': (0.2, 2.0),
+            'SOLUSDT': (10, 200),
+        }
+        
+        unreasonable = 0
+        for trade in reference_trades[:100]:
+            # Check if prices are positive
+            if trade.entry_price <= 0 or trade.exit_price <= 0:
+                unreasonable += 1
+                continue
+            
+            # Check price range for known assets
+            for asset, (min_p, max_p) in crypto_assets.items():
+                if trade.trade_asset == asset:
+                    if not (min_p <= trade.entry_price <= max_p):
+                        unreasonable += 1
+                    break
+        
+        error_rate = unreasonable / min(100, len(reference_trades))
+        assert error_rate < 0.1, f"Too many unreasonable prices: {error_rate:.1%}"
+    
+    def test_leverage_is_consistent(self):
+        """Verify all trades use expected leverage."""
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        leverages = set(t.leverage for t in reference_trades)
+        print(f"\nLeverage values used: {leverages}")
+        
+        # itest_v7 uses 2.5x leverage for tight_3_3
+        assert 2.5 in leverages, "Expected 2.5x leverage in trades"
+    
+    def test_fees_are_calculated(self):
+        """Verify fees are calculated for all trades."""
+        _, reference_trades = load_reference_data()
+        
+        if not reference_trades:
+            pytest.skip("No reference trades loaded")
+        
+        trades_with_fees = sum(1 for t in reference_trades if t.fees > 0)
+        fee_rate = trades_with_fees / len(reference_trades)
+        
+        print(f"\nFee coverage: {trades_with_fees}/{len(reference_trades)} ({fee_rate:.1%})")
+        
+        # All trades should have fees
+        assert fee_rate > 0.99, "Expected fees on almost all trades"
+
+
+# ── Main Comparison Test ─────────────────────────────────────────────────────
+@pytest.mark.skip(reason="Full ND backtest comparison - run after ND backtest implementation")
+class TestFullNDvsStandaloneBacktest:
+    """Full backtest comparison - requires ND backtest results."""
+    
+    def test_nd_backtest_produces_results(self):
+        """Verify ND backtest runs and produces results."""
+        # TODO: Run ND backtest and load results
+        pass
+    
+    def test_trade_count_matches(self):
+        """Verify ND produces same number of trades."""
+        reference_results, _ = load_reference_data()
+        if reference_results is None:
+            pytest.skip("Reference results not available")
+        
+        ref_trades = reference_results['strategies'][REFERENCE_STRATEGY]['trades']
+        # TODO: Compare with ND results
+        pass
+    
+    def test_trade_by_trade_match(self):
+        """CRITICAL: Verify every trade matches."""
+        _, reference_trades = load_reference_data()
+        if not reference_trades:
+            pytest.skip("Reference trades not available")
+        
+        # TODO: Implement trade-by-trade comparison
+        pass