initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
523
mc_forewarning_qlabs_fork/tests/test_qlabs_ml.py
Executable file
523
mc_forewarning_qlabs_fork/tests/test_qlabs_ml.py
Executable file
@@ -0,0 +1,523 @@
|
||||
"""
|
||||
Test Suite for QLabs-Enhanced MC Forewarning System
|
||||
===================================================
|
||||
|
||||
Comprehensive tests for:
|
||||
1. Individual QLabs ML techniques
|
||||
2. End-to-end ML model training
|
||||
3. E2E forewarning system performance
|
||||
4. Comparison with baseline MCML
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
# Import MC modules
|
||||
from mc.mc_sampler import MCSampler, MCTrialConfig
|
||||
from mc.mc_metrics import MCTrialResult, MCMetrics
|
||||
from mc.mc_ml import MCML, DolphinForewarner
|
||||
from mc.mc_ml_qlabs import (
|
||||
MCMLQLabs, DolphinForewarnerQLabs, MuonOptimizer,
|
||||
SwiGLU, UNetMLP, DeepEnsemble, QLabsHyperParams
|
||||
)
|
||||
|
||||
|
||||
class TestMuonOptimizer(unittest.TestCase):
|
||||
"""Test QLabs Technique #1: Muon Optimizer"""
|
||||
|
||||
def test_newton_schulz_orthogonalization(self):
|
||||
"""Test that Newton-Schulz produces near-orthogonal matrices."""
|
||||
optimizer = MuonOptimizer()
|
||||
|
||||
# Create random matrix
|
||||
X = np.random.randn(10, 8)
|
||||
|
||||
# Orthogonalize
|
||||
X_ortho = optimizer.newton_schulz(X)
|
||||
|
||||
# Check orthogonality: X^T @ X should be close to identity
|
||||
if X.shape[0] >= X.shape[1]:
|
||||
gram = X_ortho.T @ X_ortho
|
||||
else:
|
||||
gram = X_ortho @ X_ortho.T
|
||||
|
||||
# Check diagonal is close to 1, off-diagonal close to 0
|
||||
diag_mean = np.mean(np.diag(gram))
|
||||
off_diag_mean = np.mean(np.abs(gram - np.eye(gram.shape[0])))
|
||||
|
||||
self.assertGreater(diag_mean, 0.8, "Diagonal should be close to 1")
|
||||
self.assertLess(off_diag_mean, 0.3, "Off-diagonal should be close to 0")
|
||||
|
||||
def test_compute_update_shape(self):
|
||||
"""Test that Muon update has correct shape."""
|
||||
optimizer = MuonOptimizer()
|
||||
|
||||
grad = np.random.randn(10, 8)
|
||||
param = np.random.randn(10, 8)
|
||||
|
||||
update = optimizer.compute_update(grad, param)
|
||||
|
||||
self.assertEqual(update.shape, param.shape)
|
||||
|
||||
def test_momentum_accumulation(self):
|
||||
"""Test that momentum accumulates over steps."""
|
||||
optimizer = MuonOptimizer(momentum=0.9)
|
||||
|
||||
grad1 = np.random.randn(5, 4)
|
||||
grad2 = np.random.randn(5, 4)
|
||||
param = np.random.randn(5, 4)
|
||||
|
||||
# First update
|
||||
update1 = optimizer.compute_update(grad1, param)
|
||||
|
||||
# Second update
|
||||
update2 = optimizer.compute_update(grad2, param)
|
||||
|
||||
# Momentum buffer should have history
|
||||
self.assertIsNotNone(optimizer.momentum_buffer)
|
||||
self.assertEqual(optimizer.step_count, 2)
|
||||
|
||||
|
||||
class TestSwiGLU(unittest.TestCase):
|
||||
"""Test QLabs Technique #4: SwiGLU Activation"""
|
||||
|
||||
def test_swiglu_output_shape(self):
|
||||
"""Test SwiGLU output shape."""
|
||||
batch_size = 32
|
||||
input_dim = 64
|
||||
hidden_dim = 128
|
||||
|
||||
x = np.random.randn(batch_size, input_dim)
|
||||
gate = np.random.randn(input_dim, hidden_dim)
|
||||
up = np.random.randn(input_dim, hidden_dim)
|
||||
|
||||
output = SwiGLU.forward(x, gate, up)
|
||||
|
||||
self.assertEqual(output.shape, (batch_size, hidden_dim))
|
||||
|
||||
def test_swiglu_gating_effect(self):
|
||||
"""Test that gating modulates the output."""
|
||||
x = np.random.randn(10, 20)
|
||||
gate = np.random.randn(20, 30)
|
||||
up = np.random.randn(20, 30)
|
||||
|
||||
# Forward pass
|
||||
output = SwiGLU.forward(x, gate, up)
|
||||
|
||||
# Output should not be zero
|
||||
self.assertFalse(np.allclose(output, 0))
|
||||
|
||||
# Output should be finite
|
||||
self.assertTrue(np.all(np.isfinite(output)))
|
||||
|
||||
|
||||
class TestUNetMLP(unittest.TestCase):
|
||||
"""Test QLabs Technique #5: U-Net Skip Connections"""
|
||||
|
||||
def test_unet_initialization(self):
|
||||
"""Test U-Net initializes correctly."""
|
||||
unet = UNetMLP(
|
||||
input_dim=33,
|
||||
hidden_dims=[64, 32],
|
||||
output_dim=1,
|
||||
use_swiglu=True
|
||||
)
|
||||
|
||||
self.assertEqual(unet.input_dim, 33)
|
||||
self.assertEqual(len(unet.hidden_dims), 2)
|
||||
self.assertIn('enc_gate_0', unet.weights)
|
||||
|
||||
def test_unet_forward(self):
|
||||
"""Test U-Net forward pass."""
|
||||
unet = UNetMLP(
|
||||
input_dim=33,
|
||||
hidden_dims=[64, 32],
|
||||
output_dim=1,
|
||||
use_swiglu=False # Simpler for testing
|
||||
)
|
||||
|
||||
batch_size = 16
|
||||
x = np.random.randn(batch_size, 33)
|
||||
|
||||
output = unet.forward(x)
|
||||
|
||||
self.assertEqual(output.shape, (batch_size, 1))
|
||||
self.assertTrue(np.all(np.isfinite(output)))
|
||||
|
||||
def test_unet_skip_connections(self):
|
||||
"""Test that skip connections preserve information."""
|
||||
unet = UNetMLP(
|
||||
input_dim=33,
|
||||
hidden_dims=[64, 32],
|
||||
output_dim=1,
|
||||
use_swiglu=False
|
||||
)
|
||||
|
||||
x = np.random.randn(8, 33)
|
||||
|
||||
# Forward pass
|
||||
output = unet.forward(x)
|
||||
|
||||
# Skip weights should exist
|
||||
self.assertIn('skip_0', unet.weights)
|
||||
self.assertIn('skip_1', unet.weights)
|
||||
|
||||
|
||||
class TestDeepEnsemble(unittest.TestCase):
|
||||
"""Test QLabs Technique #6: Deep Ensembling"""
|
||||
|
||||
def test_ensemble_initialization(self):
|
||||
"""Test ensemble initializes with correct number of models."""
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
ensemble = DeepEnsemble(
|
||||
LinearRegression,
|
||||
n_models=5,
|
||||
seeds=[1, 2, 3, 4, 5]
|
||||
)
|
||||
|
||||
self.assertEqual(ensemble.n_models, 5)
|
||||
self.assertEqual(len(ensemble.seeds), 5)
|
||||
|
||||
def test_ensemble_fit_predict(self):
|
||||
"""Test ensemble fitting and prediction."""
|
||||
from sklearn.linear_model import Ridge
|
||||
|
||||
# Generate synthetic data
|
||||
np.random.seed(42)
|
||||
X = np.random.randn(100, 5)
|
||||
y = X[:, 0] + 2*X[:, 1] + np.random.randn(100) * 0.1
|
||||
|
||||
ensemble = DeepEnsemble(
|
||||
Ridge,
|
||||
n_models=3,
|
||||
seeds=[1, 2, 3]
|
||||
)
|
||||
|
||||
ensemble.fit(X, y, alpha=1.0)
|
||||
|
||||
# Predict
|
||||
X_test = np.random.randn(10, 5)
|
||||
mean_pred, std_pred = ensemble.predict_regression(X_test)
|
||||
|
||||
self.assertEqual(mean_pred.shape, (10,))
|
||||
self.assertEqual(std_pred.shape, (10,))
|
||||
self.assertTrue(np.all(std_pred >= 0)) # Std should be non-negative
|
||||
|
||||
|
||||
class TestQLabsHyperParams(unittest.TestCase):
|
||||
"""Test QLabs Technique #2: Heavy Regularization"""
|
||||
|
||||
def test_heavy_regularization_values(self):
|
||||
"""Test that QLabs hyperparameters use heavy regularization."""
|
||||
params = QLabsHyperParams()
|
||||
|
||||
# XGBoost regularization should be high (QLabs: 1.6)
|
||||
self.assertEqual(params.xgb_reg_lambda, 1.6)
|
||||
|
||||
# Min samples should be higher than sklearn defaults
|
||||
self.assertGreater(params.gb_min_samples_leaf, 1)
|
||||
self.assertGreater(params.gb_min_samples_split, 2)
|
||||
|
||||
# Dropout should be set
|
||||
self.assertGreater(params.dropout, 0)
|
||||
|
||||
def test_epoch_shuffling_config(self):
|
||||
"""Test epoch shuffling configuration."""
|
||||
params = QLabsHyperParams()
|
||||
|
||||
# Should have early stopping configured
|
||||
self.assertGreater(params.early_stopping_rounds, 0)
|
||||
|
||||
|
||||
class TestMCMLQLabs(unittest.TestCase):
|
||||
"""Test QLabs-enhanced MCML system"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.output_dir = "mc_forewarning_qlabs_fork/results/test_mcml_qlabs"
|
||||
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def test_initialization(self):
|
||||
"""Test QLabs ML trainer initializes correctly."""
|
||||
ml = MCMLQLabs(
|
||||
output_dir=self.output_dir,
|
||||
use_ensemble=True,
|
||||
n_ensemble_models=4,
|
||||
use_unet=True,
|
||||
heavy_regularization=True
|
||||
)
|
||||
|
||||
self.assertTrue(ml.use_ensemble)
|
||||
self.assertEqual(ml.n_ensemble_models, 4)
|
||||
self.assertTrue(ml.heavy_regularization)
|
||||
|
||||
def test_epoch_shuffling(self):
|
||||
"""Test epoch shuffling produces different orderings."""
|
||||
ml = MCMLQLabs(output_dir=self.output_dir)
|
||||
|
||||
X = np.random.randn(100, 10)
|
||||
y = np.random.randn(100)
|
||||
|
||||
epoch_data = ml._shuffle_epochs(X, y, n_epochs=5)
|
||||
|
||||
self.assertEqual(len(epoch_data), 5)
|
||||
|
||||
# First elements should be different across epochs
|
||||
first_elements = [epoch[0][0][0] for epoch in epoch_data]
|
||||
self.assertGreater(len(set(first_elements)), 1)
|
||||
|
||||
|
||||
class TestE2EForewarning(unittest.TestCase):
|
||||
"""End-to-end tests for the forewarning system"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.output_dir = "mc_forewarning_qlabs_fork/results/test_e2e"
|
||||
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate synthetic corpus data
|
||||
self._generate_synthetic_corpus()
|
||||
|
||||
def _generate_synthetic_corpus(self):
|
||||
"""Generate synthetic MC trial data for testing."""
|
||||
import pandas as pd
|
||||
|
||||
np.random.seed(42)
|
||||
n_trials = 500
|
||||
|
||||
# Generate parameter columns
|
||||
data = {
|
||||
'trial_id': range(n_trials),
|
||||
'P_vel_div_threshold': np.random.uniform(-0.04, -0.008, n_trials),
|
||||
'P_vel_div_extreme': np.random.uniform(-0.12, -0.02, n_trials),
|
||||
'P_max_leverage': np.random.uniform(1.5, 12, n_trials),
|
||||
'P_min_leverage': np.random.uniform(0.1, 1.5, n_trials),
|
||||
'P_fraction': np.random.uniform(0.05, 0.4, n_trials),
|
||||
'P_fixed_tp_pct': np.random.uniform(0.003, 0.03, n_trials),
|
||||
'P_stop_pct': np.random.uniform(0.2, 5, n_trials),
|
||||
'P_max_hold_bars': np.random.randint(20, 600, n_trials),
|
||||
'P_leverage_convexity': np.random.uniform(0.75, 6, n_trials),
|
||||
'P_use_direction_confirm': np.random.choice([True, False], n_trials),
|
||||
'P_use_alpha_layers': np.random.choice([True, False], n_trials),
|
||||
'P_use_dynamic_leverage': np.random.choice([True, False], n_trials),
|
||||
'P_use_sp_fees': np.random.choice([True, False], n_trials),
|
||||
'P_use_sp_slippage': np.random.choice([True, False], n_trials),
|
||||
'P_use_ob_edge': np.random.choice([True, False], n_trials),
|
||||
'P_use_asset_selection': np.random.choice([True, False], n_trials),
|
||||
'P_ob_imbalance_bias': np.random.uniform(-0.25, 0.15, n_trials),
|
||||
'P_ob_depth_scale': np.random.uniform(0.3, 2, n_trials),
|
||||
'P_acb_beta_high': np.random.uniform(0.4, 1.5, n_trials),
|
||||
'P_acb_beta_low': np.random.uniform(0, 0.6, n_trials),
|
||||
}
|
||||
|
||||
# Generate metrics based on parameters (simplified model)
|
||||
roi = (
|
||||
-data['P_vel_div_threshold'] * 1000 +
|
||||
data['P_max_leverage'] * 2 -
|
||||
data['P_stop_pct'] * 5 +
|
||||
np.random.randn(n_trials) * 10
|
||||
)
|
||||
|
||||
data['M_roi_pct'] = roi
|
||||
data['M_max_drawdown_pct'] = np.abs(roi) * 0.5 + np.random.randn(n_trials) * 5
|
||||
data['M_profit_factor'] = 1 + roi / 100 + np.random.randn(n_trials) * 0.2
|
||||
data['M_win_rate'] = 0.4 + roi / 500 + np.random.randn(n_trials) * 0.05
|
||||
data['M_sharpe_ratio'] = roi / 20 + np.random.randn(n_trials) * 0.5
|
||||
data['M_n_trades'] = np.random.randint(20, 200, n_trials)
|
||||
|
||||
# Classification labels
|
||||
data['L_profitable'] = roi > 0
|
||||
data['L_strongly_profitable'] = roi > 30
|
||||
data['L_drawdown_ok'] = data['M_max_drawdown_pct'] < 20
|
||||
data['L_sharpe_ok'] = data['M_sharpe_ratio'] > 1.5
|
||||
data['L_pf_ok'] = data['M_profit_factor'] > 1.10
|
||||
data['L_wr_ok'] = data['M_win_rate'] > 0.45
|
||||
data['L_champion_region'] = (
|
||||
data['L_strongly_profitable'] &
|
||||
data['L_drawdown_ok'] &
|
||||
data['L_sharpe_ok'] &
|
||||
data['L_pf_ok'] &
|
||||
data['L_wr_ok']
|
||||
)
|
||||
data['L_catastrophic'] = (roi < -30) | (data['M_max_drawdown_pct'] > 40)
|
||||
data['L_inert'] = data['M_n_trades'] < 50
|
||||
data['L_h2_degradation'] = np.random.choice([True, False], n_trials)
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Save to parquet
|
||||
results_dir = Path(self.output_dir) / "results"
|
||||
results_dir.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(results_dir / "batch_0001_results.parquet", index=False)
|
||||
|
||||
# Create SQLite index
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(Path(self.output_dir) / "mc_index.sqlite")
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('DROP TABLE IF EXISTS mc_index')
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS mc_index (
|
||||
trial_id INTEGER PRIMARY KEY,
|
||||
batch_id INTEGER,
|
||||
status TEXT,
|
||||
roi_pct REAL,
|
||||
profit_factor REAL,
|
||||
win_rate REAL,
|
||||
max_dd_pct REAL,
|
||||
sharpe REAL,
|
||||
n_trades INTEGER,
|
||||
champion_region INTEGER,
|
||||
catastrophic INTEGER,
|
||||
created_at INTEGER
|
||||
)
|
||||
''')
|
||||
|
||||
for i in range(n_trials):
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO mc_index VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
i, 1, 'completed', float(roi[i]), float(data['M_profit_factor'][i]),
|
||||
float(data['M_win_rate'][i]), float(data['M_max_drawdown_pct'][i]),
|
||||
float(data['M_sharpe_ratio'][i]), int(data['M_n_trades'][i]),
|
||||
int(data['L_champion_region'][i]), int(data['L_catastrophic'][i]), 0
|
||||
))
|
||||
except sqlite3.IntegrityError:
|
||||
pass # Skip duplicates
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def test_training_pipeline(self):
|
||||
"""Test full training pipeline."""
|
||||
ml = MCMLQLabs(
|
||||
output_dir=self.output_dir,
|
||||
models_dir=f"{self.output_dir}/models_qlabs",
|
||||
use_ensemble=False, # Faster for testing
|
||||
n_ensemble_models=2,
|
||||
use_unet=False, # Skip for speed
|
||||
heavy_regularization=True
|
||||
)
|
||||
|
||||
try:
|
||||
result = ml.train_all_models(test_size=0.2, n_epochs=3)
|
||||
|
||||
self.assertEqual(result['status'], 'success')
|
||||
self.assertIn('qlabs_techniques', result)
|
||||
|
||||
# Check models were saved
|
||||
models_dir = Path(ml.models_dir)
|
||||
self.assertTrue((models_dir / "feature_names.json").exists())
|
||||
self.assertTrue((models_dir / "qlabs_config.json").exists())
|
||||
|
||||
except Exception as e:
|
||||
self.skipTest(f"Training failed (may need real data): {e}")
|
||||
|
||||
def test_forewarning_assessment(self):
|
||||
"""Test forewarning assessment."""
|
||||
# Try to load existing models or skip
|
||||
models_dir = Path(self.output_dir) / "models_qlabs"
|
||||
|
||||
if not (models_dir / "feature_names.json").exists():
|
||||
self.skipTest("No trained models available")
|
||||
|
||||
try:
|
||||
forewarner = DolphinForewarnerQLabs(models_dir=str(models_dir))
|
||||
except Exception as e:
|
||||
self.skipTest(f"Could not load forewarner: {e}")
|
||||
|
||||
# Create test config with only the features used during training
|
||||
# Get feature names from the scaler
|
||||
try:
|
||||
import json
|
||||
with open(models_dir / "feature_names.json", 'r') as f:
|
||||
feature_names = json.load(f)
|
||||
|
||||
# Create a minimal config with just those features
|
||||
config_dict = {name: MCSampler.CHAMPION.get(name, 0) for name in feature_names}
|
||||
from mc.mc_sampler import MCTrialConfig
|
||||
config = MCTrialConfig.from_dict(config_dict)
|
||||
except Exception as e:
|
||||
self.skipTest(f"Could not create config: {e}")
|
||||
|
||||
report = forewarner.assess(config)
|
||||
|
||||
self.assertIsNotNone(report)
|
||||
self.assertIn('config', report.to_dict())
|
||||
self.assertIn('predicted_roi', report.to_dict())
|
||||
|
||||
|
||||
class TestComparisonWithBaseline(unittest.TestCase):
|
||||
"""Compare QLabs-enhanced vs baseline MCML"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.output_dir = "mc_forewarning_qlabs_fork/results/test_comparison"
|
||||
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def test_prediction_uncertainty(self):
|
||||
"""Test that ensemble provides uncertainty estimates."""
|
||||
ml_qlabs = MCMLQLabs(
|
||||
output_dir=self.output_dir,
|
||||
use_ensemble=True,
|
||||
n_ensemble_models=4
|
||||
)
|
||||
|
||||
# Create dummy models for testing
|
||||
from sklearn.linear_model import Ridge
|
||||
|
||||
ensemble = DeepEnsemble(Ridge, n_models=4)
|
||||
|
||||
# Generate synthetic data
|
||||
np.random.seed(42)
|
||||
X_train = np.random.randn(50, 10)
|
||||
y_train = X_train[:, 0] + np.random.randn(50) * 0.1
|
||||
|
||||
# Fit ensemble - models will have variation due to different random states
|
||||
ensemble.fit(X_train, y_train, alpha=1.0)
|
||||
|
||||
# Predict
|
||||
X_test = np.random.randn(5, 10)
|
||||
mean, std = ensemble.predict_regression(X_test)
|
||||
|
||||
# Should have valid uncertainty estimates
|
||||
self.assertTrue(np.all(np.isfinite(std))) # No NaN or Inf
|
||||
self.assertTrue(np.all(std >= 0)) # Non-negative std
|
||||
|
||||
|
||||
def run_tests():
|
||||
"""Run all tests."""
|
||||
# Create test suite
|
||||
loader = unittest.TestLoader()
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
# Add all test classes
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestMuonOptimizer))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestSwiGLU))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestUNetMLP))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestDeepEnsemble))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestQLabsHyperParams))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestMCMLQLabs))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestE2EForewarning))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestComparisonWithBaseline))
|
||||
|
||||
# Run tests
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
result = runner.run(suite)
|
||||
|
||||
return result.wasSuccessful()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user