initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

View File

@@ -0,0 +1,523 @@
"""
Test Suite for QLabs-Enhanced MC Forewarning System
===================================================
Comprehensive tests for:
1. Individual QLabs ML techniques
2. End-to-end ML model training
3. E2E forewarning system performance
4. Comparison with baseline MCML
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import unittest
import numpy as np
import json
from pathlib import Path
from typing import Dict, Any
# Import MC modules
from mc.mc_sampler import MCSampler, MCTrialConfig
from mc.mc_metrics import MCTrialResult, MCMetrics
from mc.mc_ml import MCML, DolphinForewarner
from mc.mc_ml_qlabs import (
MCMLQLabs, DolphinForewarnerQLabs, MuonOptimizer,
SwiGLU, UNetMLP, DeepEnsemble, QLabsHyperParams
)
class TestMuonOptimizer(unittest.TestCase):
"""Test QLabs Technique #1: Muon Optimizer"""
def test_newton_schulz_orthogonalization(self):
"""Test that Newton-Schulz produces near-orthogonal matrices."""
optimizer = MuonOptimizer()
# Create random matrix
X = np.random.randn(10, 8)
# Orthogonalize
X_ortho = optimizer.newton_schulz(X)
# Check orthogonality: X^T @ X should be close to identity
if X.shape[0] >= X.shape[1]:
gram = X_ortho.T @ X_ortho
else:
gram = X_ortho @ X_ortho.T
# Check diagonal is close to 1, off-diagonal close to 0
diag_mean = np.mean(np.diag(gram))
off_diag_mean = np.mean(np.abs(gram - np.eye(gram.shape[0])))
self.assertGreater(diag_mean, 0.8, "Diagonal should be close to 1")
self.assertLess(off_diag_mean, 0.3, "Off-diagonal should be close to 0")
def test_compute_update_shape(self):
"""Test that Muon update has correct shape."""
optimizer = MuonOptimizer()
grad = np.random.randn(10, 8)
param = np.random.randn(10, 8)
update = optimizer.compute_update(grad, param)
self.assertEqual(update.shape, param.shape)
def test_momentum_accumulation(self):
"""Test that momentum accumulates over steps."""
optimizer = MuonOptimizer(momentum=0.9)
grad1 = np.random.randn(5, 4)
grad2 = np.random.randn(5, 4)
param = np.random.randn(5, 4)
# First update
update1 = optimizer.compute_update(grad1, param)
# Second update
update2 = optimizer.compute_update(grad2, param)
# Momentum buffer should have history
self.assertIsNotNone(optimizer.momentum_buffer)
self.assertEqual(optimizer.step_count, 2)
class TestSwiGLU(unittest.TestCase):
"""Test QLabs Technique #4: SwiGLU Activation"""
def test_swiglu_output_shape(self):
"""Test SwiGLU output shape."""
batch_size = 32
input_dim = 64
hidden_dim = 128
x = np.random.randn(batch_size, input_dim)
gate = np.random.randn(input_dim, hidden_dim)
up = np.random.randn(input_dim, hidden_dim)
output = SwiGLU.forward(x, gate, up)
self.assertEqual(output.shape, (batch_size, hidden_dim))
def test_swiglu_gating_effect(self):
"""Test that gating modulates the output."""
x = np.random.randn(10, 20)
gate = np.random.randn(20, 30)
up = np.random.randn(20, 30)
# Forward pass
output = SwiGLU.forward(x, gate, up)
# Output should not be zero
self.assertFalse(np.allclose(output, 0))
# Output should be finite
self.assertTrue(np.all(np.isfinite(output)))
class TestUNetMLP(unittest.TestCase):
"""Test QLabs Technique #5: U-Net Skip Connections"""
def test_unet_initialization(self):
"""Test U-Net initializes correctly."""
unet = UNetMLP(
input_dim=33,
hidden_dims=[64, 32],
output_dim=1,
use_swiglu=True
)
self.assertEqual(unet.input_dim, 33)
self.assertEqual(len(unet.hidden_dims), 2)
self.assertIn('enc_gate_0', unet.weights)
def test_unet_forward(self):
"""Test U-Net forward pass."""
unet = UNetMLP(
input_dim=33,
hidden_dims=[64, 32],
output_dim=1,
use_swiglu=False # Simpler for testing
)
batch_size = 16
x = np.random.randn(batch_size, 33)
output = unet.forward(x)
self.assertEqual(output.shape, (batch_size, 1))
self.assertTrue(np.all(np.isfinite(output)))
def test_unet_skip_connections(self):
"""Test that skip connections preserve information."""
unet = UNetMLP(
input_dim=33,
hidden_dims=[64, 32],
output_dim=1,
use_swiglu=False
)
x = np.random.randn(8, 33)
# Forward pass
output = unet.forward(x)
# Skip weights should exist
self.assertIn('skip_0', unet.weights)
self.assertIn('skip_1', unet.weights)
class TestDeepEnsemble(unittest.TestCase):
"""Test QLabs Technique #6: Deep Ensembling"""
def test_ensemble_initialization(self):
"""Test ensemble initializes with correct number of models."""
from sklearn.linear_model import LinearRegression
ensemble = DeepEnsemble(
LinearRegression,
n_models=5,
seeds=[1, 2, 3, 4, 5]
)
self.assertEqual(ensemble.n_models, 5)
self.assertEqual(len(ensemble.seeds), 5)
def test_ensemble_fit_predict(self):
"""Test ensemble fitting and prediction."""
from sklearn.linear_model import Ridge
# Generate synthetic data
np.random.seed(42)
X = np.random.randn(100, 5)
y = X[:, 0] + 2*X[:, 1] + np.random.randn(100) * 0.1
ensemble = DeepEnsemble(
Ridge,
n_models=3,
seeds=[1, 2, 3]
)
ensemble.fit(X, y, alpha=1.0)
# Predict
X_test = np.random.randn(10, 5)
mean_pred, std_pred = ensemble.predict_regression(X_test)
self.assertEqual(mean_pred.shape, (10,))
self.assertEqual(std_pred.shape, (10,))
self.assertTrue(np.all(std_pred >= 0)) # Std should be non-negative
class TestQLabsHyperParams(unittest.TestCase):
"""Test QLabs Technique #2: Heavy Regularization"""
def test_heavy_regularization_values(self):
"""Test that QLabs hyperparameters use heavy regularization."""
params = QLabsHyperParams()
# XGBoost regularization should be high (QLabs: 1.6)
self.assertEqual(params.xgb_reg_lambda, 1.6)
# Min samples should be higher than sklearn defaults
self.assertGreater(params.gb_min_samples_leaf, 1)
self.assertGreater(params.gb_min_samples_split, 2)
# Dropout should be set
self.assertGreater(params.dropout, 0)
def test_epoch_shuffling_config(self):
"""Test epoch shuffling configuration."""
params = QLabsHyperParams()
# Should have early stopping configured
self.assertGreater(params.early_stopping_rounds, 0)
class TestMCMLQLabs(unittest.TestCase):
"""Test QLabs-enhanced MCML system"""
def setUp(self):
"""Set up test fixtures."""
self.output_dir = "mc_forewarning_qlabs_fork/results/test_mcml_qlabs"
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
def test_initialization(self):
"""Test QLabs ML trainer initializes correctly."""
ml = MCMLQLabs(
output_dir=self.output_dir,
use_ensemble=True,
n_ensemble_models=4,
use_unet=True,
heavy_regularization=True
)
self.assertTrue(ml.use_ensemble)
self.assertEqual(ml.n_ensemble_models, 4)
self.assertTrue(ml.heavy_regularization)
def test_epoch_shuffling(self):
"""Test epoch shuffling produces different orderings."""
ml = MCMLQLabs(output_dir=self.output_dir)
X = np.random.randn(100, 10)
y = np.random.randn(100)
epoch_data = ml._shuffle_epochs(X, y, n_epochs=5)
self.assertEqual(len(epoch_data), 5)
# First elements should be different across epochs
first_elements = [epoch[0][0][0] for epoch in epoch_data]
self.assertGreater(len(set(first_elements)), 1)
class TestE2EForewarning(unittest.TestCase):
"""End-to-end tests for the forewarning system"""
def setUp(self):
"""Set up test fixtures."""
self.output_dir = "mc_forewarning_qlabs_fork/results/test_e2e"
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
# Generate synthetic corpus data
self._generate_synthetic_corpus()
def _generate_synthetic_corpus(self):
"""Generate synthetic MC trial data for testing."""
import pandas as pd
np.random.seed(42)
n_trials = 500
# Generate parameter columns
data = {
'trial_id': range(n_trials),
'P_vel_div_threshold': np.random.uniform(-0.04, -0.008, n_trials),
'P_vel_div_extreme': np.random.uniform(-0.12, -0.02, n_trials),
'P_max_leverage': np.random.uniform(1.5, 12, n_trials),
'P_min_leverage': np.random.uniform(0.1, 1.5, n_trials),
'P_fraction': np.random.uniform(0.05, 0.4, n_trials),
'P_fixed_tp_pct': np.random.uniform(0.003, 0.03, n_trials),
'P_stop_pct': np.random.uniform(0.2, 5, n_trials),
'P_max_hold_bars': np.random.randint(20, 600, n_trials),
'P_leverage_convexity': np.random.uniform(0.75, 6, n_trials),
'P_use_direction_confirm': np.random.choice([True, False], n_trials),
'P_use_alpha_layers': np.random.choice([True, False], n_trials),
'P_use_dynamic_leverage': np.random.choice([True, False], n_trials),
'P_use_sp_fees': np.random.choice([True, False], n_trials),
'P_use_sp_slippage': np.random.choice([True, False], n_trials),
'P_use_ob_edge': np.random.choice([True, False], n_trials),
'P_use_asset_selection': np.random.choice([True, False], n_trials),
'P_ob_imbalance_bias': np.random.uniform(-0.25, 0.15, n_trials),
'P_ob_depth_scale': np.random.uniform(0.3, 2, n_trials),
'P_acb_beta_high': np.random.uniform(0.4, 1.5, n_trials),
'P_acb_beta_low': np.random.uniform(0, 0.6, n_trials),
}
# Generate metrics based on parameters (simplified model)
roi = (
-data['P_vel_div_threshold'] * 1000 +
data['P_max_leverage'] * 2 -
data['P_stop_pct'] * 5 +
np.random.randn(n_trials) * 10
)
data['M_roi_pct'] = roi
data['M_max_drawdown_pct'] = np.abs(roi) * 0.5 + np.random.randn(n_trials) * 5
data['M_profit_factor'] = 1 + roi / 100 + np.random.randn(n_trials) * 0.2
data['M_win_rate'] = 0.4 + roi / 500 + np.random.randn(n_trials) * 0.05
data['M_sharpe_ratio'] = roi / 20 + np.random.randn(n_trials) * 0.5
data['M_n_trades'] = np.random.randint(20, 200, n_trials)
# Classification labels
data['L_profitable'] = roi > 0
data['L_strongly_profitable'] = roi > 30
data['L_drawdown_ok'] = data['M_max_drawdown_pct'] < 20
data['L_sharpe_ok'] = data['M_sharpe_ratio'] > 1.5
data['L_pf_ok'] = data['M_profit_factor'] > 1.10
data['L_wr_ok'] = data['M_win_rate'] > 0.45
data['L_champion_region'] = (
data['L_strongly_profitable'] &
data['L_drawdown_ok'] &
data['L_sharpe_ok'] &
data['L_pf_ok'] &
data['L_wr_ok']
)
data['L_catastrophic'] = (roi < -30) | (data['M_max_drawdown_pct'] > 40)
data['L_inert'] = data['M_n_trades'] < 50
data['L_h2_degradation'] = np.random.choice([True, False], n_trials)
df = pd.DataFrame(data)
# Save to parquet
results_dir = Path(self.output_dir) / "results"
results_dir.mkdir(parents=True, exist_ok=True)
df.to_parquet(results_dir / "batch_0001_results.parquet", index=False)
# Create SQLite index
import sqlite3
conn = sqlite3.connect(Path(self.output_dir) / "mc_index.sqlite")
cursor = conn.cursor()
cursor.execute('DROP TABLE IF EXISTS mc_index')
cursor.execute('''
CREATE TABLE IF NOT EXISTS mc_index (
trial_id INTEGER PRIMARY KEY,
batch_id INTEGER,
status TEXT,
roi_pct REAL,
profit_factor REAL,
win_rate REAL,
max_dd_pct REAL,
sharpe REAL,
n_trades INTEGER,
champion_region INTEGER,
catastrophic INTEGER,
created_at INTEGER
)
''')
for i in range(n_trials):
try:
cursor.execute('''
INSERT INTO mc_index VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
i, 1, 'completed', float(roi[i]), float(data['M_profit_factor'][i]),
float(data['M_win_rate'][i]), float(data['M_max_drawdown_pct'][i]),
float(data['M_sharpe_ratio'][i]), int(data['M_n_trades'][i]),
int(data['L_champion_region'][i]), int(data['L_catastrophic'][i]), 0
))
except sqlite3.IntegrityError:
pass # Skip duplicates
conn.commit()
conn.close()
def test_training_pipeline(self):
"""Test full training pipeline."""
ml = MCMLQLabs(
output_dir=self.output_dir,
models_dir=f"{self.output_dir}/models_qlabs",
use_ensemble=False, # Faster for testing
n_ensemble_models=2,
use_unet=False, # Skip for speed
heavy_regularization=True
)
try:
result = ml.train_all_models(test_size=0.2, n_epochs=3)
self.assertEqual(result['status'], 'success')
self.assertIn('qlabs_techniques', result)
# Check models were saved
models_dir = Path(ml.models_dir)
self.assertTrue((models_dir / "feature_names.json").exists())
self.assertTrue((models_dir / "qlabs_config.json").exists())
except Exception as e:
self.skipTest(f"Training failed (may need real data): {e}")
def test_forewarning_assessment(self):
"""Test forewarning assessment."""
# Try to load existing models or skip
models_dir = Path(self.output_dir) / "models_qlabs"
if not (models_dir / "feature_names.json").exists():
self.skipTest("No trained models available")
try:
forewarner = DolphinForewarnerQLabs(models_dir=str(models_dir))
except Exception as e:
self.skipTest(f"Could not load forewarner: {e}")
# Create test config with only the features used during training
# Get feature names from the scaler
try:
import json
with open(models_dir / "feature_names.json", 'r') as f:
feature_names = json.load(f)
# Create a minimal config with just those features
config_dict = {name: MCSampler.CHAMPION.get(name, 0) for name in feature_names}
from mc.mc_sampler import MCTrialConfig
config = MCTrialConfig.from_dict(config_dict)
except Exception as e:
self.skipTest(f"Could not create config: {e}")
report = forewarner.assess(config)
self.assertIsNotNone(report)
self.assertIn('config', report.to_dict())
self.assertIn('predicted_roi', report.to_dict())
class TestComparisonWithBaseline(unittest.TestCase):
"""Compare QLabs-enhanced vs baseline MCML"""
def setUp(self):
"""Set up test fixtures."""
self.output_dir = "mc_forewarning_qlabs_fork/results/test_comparison"
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
def test_prediction_uncertainty(self):
"""Test that ensemble provides uncertainty estimates."""
ml_qlabs = MCMLQLabs(
output_dir=self.output_dir,
use_ensemble=True,
n_ensemble_models=4
)
# Create dummy models for testing
from sklearn.linear_model import Ridge
ensemble = DeepEnsemble(Ridge, n_models=4)
# Generate synthetic data
np.random.seed(42)
X_train = np.random.randn(50, 10)
y_train = X_train[:, 0] + np.random.randn(50) * 0.1
# Fit ensemble - models will have variation due to different random states
ensemble.fit(X_train, y_train, alpha=1.0)
# Predict
X_test = np.random.randn(5, 10)
mean, std = ensemble.predict_regression(X_test)
# Should have valid uncertainty estimates
self.assertTrue(np.all(np.isfinite(std))) # No NaN or Inf
self.assertTrue(np.all(std >= 0)) # Non-negative std
def run_tests():
"""Run all tests."""
# Create test suite
loader = unittest.TestLoader()
suite = unittest.TestSuite()
# Add all test classes
suite.addTests(loader.loadTestsFromTestCase(TestMuonOptimizer))
suite.addTests(loader.loadTestsFromTestCase(TestSwiGLU))
suite.addTests(loader.loadTestsFromTestCase(TestUNetMLP))
suite.addTests(loader.loadTestsFromTestCase(TestDeepEnsemble))
suite.addTests(loader.loadTestsFromTestCase(TestQLabsHyperParams))
suite.addTests(loader.loadTestsFromTestCase(TestMCMLQLabs))
suite.addTests(loader.loadTestsFromTestCase(TestE2EForewarning))
suite.addTests(loader.loadTestsFromTestCase(TestComparisonWithBaseline))
# Run tests
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
return result.wasSuccessful()
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)