initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/mc_forewarning_qlabs_fork/tests/test_qlabs_ml.py
+++ b/mc_forewarning_qlabs_fork/tests/test_qlabs_ml.py
@@ -0,0 +1,523 @@
+"""
+Test Suite for QLabs-Enhanced MC Forewarning System
+===================================================
+
+Comprehensive tests for:
+1. Individual QLabs ML techniques
+2. End-to-end ML model training
+3. E2E forewarning system performance
+4. Comparison with baseline MCML
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import unittest
+import numpy as np
+import json
+from pathlib import Path
+from typing import Dict, Any
+
+# Import MC modules
+from mc.mc_sampler import MCSampler, MCTrialConfig
+from mc.mc_metrics import MCTrialResult, MCMetrics
+from mc.mc_ml import MCML, DolphinForewarner
+from mc.mc_ml_qlabs import (
+    MCMLQLabs, DolphinForewarnerQLabs, MuonOptimizer,
+    SwiGLU, UNetMLP, DeepEnsemble, QLabsHyperParams
+)
+
+
+class TestMuonOptimizer(unittest.TestCase):
+    """Test QLabs Technique #1: Muon Optimizer"""
+    
+    def test_newton_schulz_orthogonalization(self):
+        """Test that Newton-Schulz produces near-orthogonal matrices."""
+        optimizer = MuonOptimizer()
+        
+        # Create random matrix
+        X = np.random.randn(10, 8)
+        
+        # Orthogonalize
+        X_ortho = optimizer.newton_schulz(X)
+        
+        # Check orthogonality: X^T @ X should be close to identity
+        if X.shape[0] >= X.shape[1]:
+            gram = X_ortho.T @ X_ortho
+        else:
+            gram = X_ortho @ X_ortho.T
+        
+        # Check diagonal is close to 1, off-diagonal close to 0
+        diag_mean = np.mean(np.diag(gram))
+        off_diag_mean = np.mean(np.abs(gram - np.eye(gram.shape[0])))
+        
+        self.assertGreater(diag_mean, 0.8, "Diagonal should be close to 1")
+        self.assertLess(off_diag_mean, 0.3, "Off-diagonal should be close to 0")
+    
+    def test_compute_update_shape(self):
+        """Test that Muon update has correct shape."""
+        optimizer = MuonOptimizer()
+        
+        grad = np.random.randn(10, 8)
+        param = np.random.randn(10, 8)
+        
+        update = optimizer.compute_update(grad, param)
+        
+        self.assertEqual(update.shape, param.shape)
+    
+    def test_momentum_accumulation(self):
+        """Test that momentum accumulates over steps."""
+        optimizer = MuonOptimizer(momentum=0.9)
+        
+        grad1 = np.random.randn(5, 4)
+        grad2 = np.random.randn(5, 4)
+        param = np.random.randn(5, 4)
+        
+        # First update
+        update1 = optimizer.compute_update(grad1, param)
+        
+        # Second update
+        update2 = optimizer.compute_update(grad2, param)
+        
+        # Momentum buffer should have history
+        self.assertIsNotNone(optimizer.momentum_buffer)
+        self.assertEqual(optimizer.step_count, 2)
+
+
+class TestSwiGLU(unittest.TestCase):
+    """Test QLabs Technique #4: SwiGLU Activation"""
+    
+    def test_swiglu_output_shape(self):
+        """Test SwiGLU output shape."""
+        batch_size = 32
+        input_dim = 64
+        hidden_dim = 128
+        
+        x = np.random.randn(batch_size, input_dim)
+        gate = np.random.randn(input_dim, hidden_dim)
+        up = np.random.randn(input_dim, hidden_dim)
+        
+        output = SwiGLU.forward(x, gate, up)
+        
+        self.assertEqual(output.shape, (batch_size, hidden_dim))
+    
+    def test_swiglu_gating_effect(self):
+        """Test that gating modulates the output."""
+        x = np.random.randn(10, 20)
+        gate = np.random.randn(20, 30)
+        up = np.random.randn(20, 30)
+        
+        # Forward pass
+        output = SwiGLU.forward(x, gate, up)
+        
+        # Output should not be zero
+        self.assertFalse(np.allclose(output, 0))
+        
+        # Output should be finite
+        self.assertTrue(np.all(np.isfinite(output)))
+
+
+class TestUNetMLP(unittest.TestCase):
+    """Test QLabs Technique #5: U-Net Skip Connections"""
+    
+    def test_unet_initialization(self):
+        """Test U-Net initializes correctly."""
+        unet = UNetMLP(
+            input_dim=33,
+            hidden_dims=[64, 32],
+            output_dim=1,
+            use_swiglu=True
+        )
+        
+        self.assertEqual(unet.input_dim, 33)
+        self.assertEqual(len(unet.hidden_dims), 2)
+        self.assertIn('enc_gate_0', unet.weights)
+    
+    def test_unet_forward(self):
+        """Test U-Net forward pass."""
+        unet = UNetMLP(
+            input_dim=33,
+            hidden_dims=[64, 32],
+            output_dim=1,
+            use_swiglu=False  # Simpler for testing
+        )
+        
+        batch_size = 16
+        x = np.random.randn(batch_size, 33)
+        
+        output = unet.forward(x)
+        
+        self.assertEqual(output.shape, (batch_size, 1))
+        self.assertTrue(np.all(np.isfinite(output)))
+    
+    def test_unet_skip_connections(self):
+        """Test that skip connections preserve information."""
+        unet = UNetMLP(
+            input_dim=33,
+            hidden_dims=[64, 32],
+            output_dim=1,
+            use_swiglu=False
+        )
+        
+        x = np.random.randn(8, 33)
+        
+        # Forward pass
+        output = unet.forward(x)
+        
+        # Skip weights should exist
+        self.assertIn('skip_0', unet.weights)
+        self.assertIn('skip_1', unet.weights)
+
+
+class TestDeepEnsemble(unittest.TestCase):
+    """Test QLabs Technique #6: Deep Ensembling"""
+    
+    def test_ensemble_initialization(self):
+        """Test ensemble initializes with correct number of models."""
+        from sklearn.linear_model import LinearRegression
+        
+        ensemble = DeepEnsemble(
+            LinearRegression,
+            n_models=5,
+            seeds=[1, 2, 3, 4, 5]
+        )
+        
+        self.assertEqual(ensemble.n_models, 5)
+        self.assertEqual(len(ensemble.seeds), 5)
+    
+    def test_ensemble_fit_predict(self):
+        """Test ensemble fitting and prediction."""
+        from sklearn.linear_model import Ridge
+        
+        # Generate synthetic data
+        np.random.seed(42)
+        X = np.random.randn(100, 5)
+        y = X[:, 0] + 2*X[:, 1] + np.random.randn(100) * 0.1
+        
+        ensemble = DeepEnsemble(
+            Ridge,
+            n_models=3,
+            seeds=[1, 2, 3]
+        )
+        
+        ensemble.fit(X, y, alpha=1.0)
+        
+        # Predict
+        X_test = np.random.randn(10, 5)
+        mean_pred, std_pred = ensemble.predict_regression(X_test)
+        
+        self.assertEqual(mean_pred.shape, (10,))
+        self.assertEqual(std_pred.shape, (10,))
+        self.assertTrue(np.all(std_pred >= 0))  # Std should be non-negative
+
+
+class TestQLabsHyperParams(unittest.TestCase):
+    """Test QLabs Technique #2: Heavy Regularization"""
+    
+    def test_heavy_regularization_values(self):
+        """Test that QLabs hyperparameters use heavy regularization."""
+        params = QLabsHyperParams()
+        
+        # XGBoost regularization should be high (QLabs: 1.6)
+        self.assertEqual(params.xgb_reg_lambda, 1.6)
+        
+        # Min samples should be higher than sklearn defaults
+        self.assertGreater(params.gb_min_samples_leaf, 1)
+        self.assertGreater(params.gb_min_samples_split, 2)
+        
+        # Dropout should be set
+        self.assertGreater(params.dropout, 0)
+    
+    def test_epoch_shuffling_config(self):
+        """Test epoch shuffling configuration."""
+        params = QLabsHyperParams()
+        
+        # Should have early stopping configured
+        self.assertGreater(params.early_stopping_rounds, 0)
+
+
+class TestMCMLQLabs(unittest.TestCase):
+    """Test QLabs-enhanced MCML system"""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.output_dir = "mc_forewarning_qlabs_fork/results/test_mcml_qlabs"
+        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+    
+    def test_initialization(self):
+        """Test QLabs ML trainer initializes correctly."""
+        ml = MCMLQLabs(
+            output_dir=self.output_dir,
+            use_ensemble=True,
+            n_ensemble_models=4,
+            use_unet=True,
+            heavy_regularization=True
+        )
+        
+        self.assertTrue(ml.use_ensemble)
+        self.assertEqual(ml.n_ensemble_models, 4)
+        self.assertTrue(ml.heavy_regularization)
+    
+    def test_epoch_shuffling(self):
+        """Test epoch shuffling produces different orderings."""
+        ml = MCMLQLabs(output_dir=self.output_dir)
+        
+        X = np.random.randn(100, 10)
+        y = np.random.randn(100)
+        
+        epoch_data = ml._shuffle_epochs(X, y, n_epochs=5)
+        
+        self.assertEqual(len(epoch_data), 5)
+        
+        # First elements should be different across epochs
+        first_elements = [epoch[0][0][0] for epoch in epoch_data]
+        self.assertGreater(len(set(first_elements)), 1)
+
+
+class TestE2EForewarning(unittest.TestCase):
+    """End-to-end tests for the forewarning system"""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.output_dir = "mc_forewarning_qlabs_fork/results/test_e2e"
+        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+        
+        # Generate synthetic corpus data
+        self._generate_synthetic_corpus()
+    
+    def _generate_synthetic_corpus(self):
+        """Generate synthetic MC trial data for testing."""
+        import pandas as pd
+        
+        np.random.seed(42)
+        n_trials = 500
+        
+        # Generate parameter columns
+        data = {
+            'trial_id': range(n_trials),
+            'P_vel_div_threshold': np.random.uniform(-0.04, -0.008, n_trials),
+            'P_vel_div_extreme': np.random.uniform(-0.12, -0.02, n_trials),
+            'P_max_leverage': np.random.uniform(1.5, 12, n_trials),
+            'P_min_leverage': np.random.uniform(0.1, 1.5, n_trials),
+            'P_fraction': np.random.uniform(0.05, 0.4, n_trials),
+            'P_fixed_tp_pct': np.random.uniform(0.003, 0.03, n_trials),
+            'P_stop_pct': np.random.uniform(0.2, 5, n_trials),
+            'P_max_hold_bars': np.random.randint(20, 600, n_trials),
+            'P_leverage_convexity': np.random.uniform(0.75, 6, n_trials),
+            'P_use_direction_confirm': np.random.choice([True, False], n_trials),
+            'P_use_alpha_layers': np.random.choice([True, False], n_trials),
+            'P_use_dynamic_leverage': np.random.choice([True, False], n_trials),
+            'P_use_sp_fees': np.random.choice([True, False], n_trials),
+            'P_use_sp_slippage': np.random.choice([True, False], n_trials),
+            'P_use_ob_edge': np.random.choice([True, False], n_trials),
+            'P_use_asset_selection': np.random.choice([True, False], n_trials),
+            'P_ob_imbalance_bias': np.random.uniform(-0.25, 0.15, n_trials),
+            'P_ob_depth_scale': np.random.uniform(0.3, 2, n_trials),
+            'P_acb_beta_high': np.random.uniform(0.4, 1.5, n_trials),
+            'P_acb_beta_low': np.random.uniform(0, 0.6, n_trials),
+        }
+        
+        # Generate metrics based on parameters (simplified model)
+        roi = (
+            -data['P_vel_div_threshold'] * 1000 +
+            data['P_max_leverage'] * 2 -
+            data['P_stop_pct'] * 5 +
+            np.random.randn(n_trials) * 10
+        )
+        
+        data['M_roi_pct'] = roi
+        data['M_max_drawdown_pct'] = np.abs(roi) * 0.5 + np.random.randn(n_trials) * 5
+        data['M_profit_factor'] = 1 + roi / 100 + np.random.randn(n_trials) * 0.2
+        data['M_win_rate'] = 0.4 + roi / 500 + np.random.randn(n_trials) * 0.05
+        data['M_sharpe_ratio'] = roi / 20 + np.random.randn(n_trials) * 0.5
+        data['M_n_trades'] = np.random.randint(20, 200, n_trials)
+        
+        # Classification labels
+        data['L_profitable'] = roi > 0
+        data['L_strongly_profitable'] = roi > 30
+        data['L_drawdown_ok'] = data['M_max_drawdown_pct'] < 20
+        data['L_sharpe_ok'] = data['M_sharpe_ratio'] > 1.5
+        data['L_pf_ok'] = data['M_profit_factor'] > 1.10
+        data['L_wr_ok'] = data['M_win_rate'] > 0.45
+        data['L_champion_region'] = (
+            data['L_strongly_profitable'] &
+            data['L_drawdown_ok'] &
+            data['L_sharpe_ok'] &
+            data['L_pf_ok'] &
+            data['L_wr_ok']
+        )
+        data['L_catastrophic'] = (roi < -30) | (data['M_max_drawdown_pct'] > 40)
+        data['L_inert'] = data['M_n_trades'] < 50
+        data['L_h2_degradation'] = np.random.choice([True, False], n_trials)
+        
+        df = pd.DataFrame(data)
+        
+        # Save to parquet
+        results_dir = Path(self.output_dir) / "results"
+        results_dir.mkdir(parents=True, exist_ok=True)
+        df.to_parquet(results_dir / "batch_0001_results.parquet", index=False)
+        
+        # Create SQLite index
+        import sqlite3
+        conn = sqlite3.connect(Path(self.output_dir) / "mc_index.sqlite")
+        cursor = conn.cursor()
+        
+        cursor.execute('DROP TABLE IF EXISTS mc_index')
+        
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS mc_index (
+                trial_id INTEGER PRIMARY KEY,
+                batch_id INTEGER,
+                status TEXT,
+                roi_pct REAL,
+                profit_factor REAL,
+                win_rate REAL,
+                max_dd_pct REAL,
+                sharpe REAL,
+                n_trades INTEGER,
+                champion_region INTEGER,
+                catastrophic INTEGER,
+                created_at INTEGER
+            )
+        ''')
+        
+        for i in range(n_trials):
+            try:
+                cursor.execute('''
+                    INSERT INTO mc_index VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                ''', (
+                    i, 1, 'completed', float(roi[i]), float(data['M_profit_factor'][i]),
+                    float(data['M_win_rate'][i]), float(data['M_max_drawdown_pct'][i]),
+                    float(data['M_sharpe_ratio'][i]), int(data['M_n_trades'][i]),
+                    int(data['L_champion_region'][i]), int(data['L_catastrophic'][i]), 0
+                ))
+            except sqlite3.IntegrityError:
+                pass  # Skip duplicates
+        
+        conn.commit()
+        conn.close()
+    
+    def test_training_pipeline(self):
+        """Test full training pipeline."""
+        ml = MCMLQLabs(
+            output_dir=self.output_dir,
+            models_dir=f"{self.output_dir}/models_qlabs",
+            use_ensemble=False,  # Faster for testing
+            n_ensemble_models=2,
+            use_unet=False,  # Skip for speed
+            heavy_regularization=True
+        )
+        
+        try:
+            result = ml.train_all_models(test_size=0.2, n_epochs=3)
+            
+            self.assertEqual(result['status'], 'success')
+            self.assertIn('qlabs_techniques', result)
+            
+            # Check models were saved
+            models_dir = Path(ml.models_dir)
+            self.assertTrue((models_dir / "feature_names.json").exists())
+            self.assertTrue((models_dir / "qlabs_config.json").exists())
+            
+        except Exception as e:
+            self.skipTest(f"Training failed (may need real data): {e}")
+    
+    def test_forewarning_assessment(self):
+        """Test forewarning assessment."""
+        # Try to load existing models or skip
+        models_dir = Path(self.output_dir) / "models_qlabs"
+        
+        if not (models_dir / "feature_names.json").exists():
+            self.skipTest("No trained models available")
+        
+        try:
+            forewarner = DolphinForewarnerQLabs(models_dir=str(models_dir))
+        except Exception as e:
+            self.skipTest(f"Could not load forewarner: {e}")
+        
+        # Create test config with only the features used during training
+        # Get feature names from the scaler
+        try:
+            import json
+            with open(models_dir / "feature_names.json", 'r') as f:
+                feature_names = json.load(f)
+            
+            # Create a minimal config with just those features
+            config_dict = {name: MCSampler.CHAMPION.get(name, 0) for name in feature_names}
+            from mc.mc_sampler import MCTrialConfig
+            config = MCTrialConfig.from_dict(config_dict)
+        except Exception as e:
+            self.skipTest(f"Could not create config: {e}")
+        
+        report = forewarner.assess(config)
+        
+        self.assertIsNotNone(report)
+        self.assertIn('config', report.to_dict())
+        self.assertIn('predicted_roi', report.to_dict())
+
+
+class TestComparisonWithBaseline(unittest.TestCase):
+    """Compare QLabs-enhanced vs baseline MCML"""
+    
+    def setUp(self):
+        """Set up test fixtures."""
+        self.output_dir = "mc_forewarning_qlabs_fork/results/test_comparison"
+        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+    
+    def test_prediction_uncertainty(self):
+        """Test that ensemble provides uncertainty estimates."""
+        ml_qlabs = MCMLQLabs(
+            output_dir=self.output_dir,
+            use_ensemble=True,
+            n_ensemble_models=4
+        )
+        
+        # Create dummy models for testing
+        from sklearn.linear_model import Ridge
+        
+        ensemble = DeepEnsemble(Ridge, n_models=4)
+        
+        # Generate synthetic data
+        np.random.seed(42)
+        X_train = np.random.randn(50, 10)
+        y_train = X_train[:, 0] + np.random.randn(50) * 0.1
+        
+        # Fit ensemble - models will have variation due to different random states
+        ensemble.fit(X_train, y_train, alpha=1.0)
+        
+        # Predict
+        X_test = np.random.randn(5, 10)
+        mean, std = ensemble.predict_regression(X_test)
+        
+        # Should have valid uncertainty estimates
+        self.assertTrue(np.all(np.isfinite(std)))  # No NaN or Inf
+        self.assertTrue(np.all(std >= 0))  # Non-negative std
+
+
+def run_tests():
+    """Run all tests."""
+    # Create test suite
+    loader = unittest.TestLoader()
+    suite = unittest.TestSuite()
+    
+    # Add all test classes
+    suite.addTests(loader.loadTestsFromTestCase(TestMuonOptimizer))
+    suite.addTests(loader.loadTestsFromTestCase(TestSwiGLU))
+    suite.addTests(loader.loadTestsFromTestCase(TestUNetMLP))
+    suite.addTests(loader.loadTestsFromTestCase(TestDeepEnsemble))
+    suite.addTests(loader.loadTestsFromTestCase(TestQLabsHyperParams))
+    suite.addTests(loader.loadTestsFromTestCase(TestMCMLQLabs))
+    suite.addTests(loader.loadTestsFromTestCase(TestE2EForewarning))
+    suite.addTests(loader.loadTestsFromTestCase(TestComparisonWithBaseline))
+    
+    # Run tests
+    runner = unittest.TextTestRunner(verbosity=2)
+    result = runner.run(suite)
+    
+    return result.wasSuccessful()
+
+
+if __name__ == "__main__":
+    success = run_tests()
+    sys.exit(0 if success else 1)