Files
DOLPHIN/nautilus_dolphin/test_noise_experiment.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

355 lines
16 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Stochastic resonance + noise injection experiment.
Question: can controlled randomness at specific points in a near-threshold system
improve performance? Three mechanistic hypotheses tested:
H1 — STOCHASTIC RESONANCE on vel_div signal (col: 'vel_div')
Bars hovering just above threshold (-0.02) occasionally fire with noise.
SR predicts: optimal sigma ≈ mean(distance_to_threshold) for near-miss bars.
Sigmas: 0.001 / 0.003 / 0.005 / 0.010 (5% / 15% / 25% / 50% of |threshold|)
H2 — PRICE DITHER on asset execution prices
Avoid fill clustering at round price levels. Small multiplicative noise.
Sigmas: 0.0001 (1bp) / 0.0005 (5bp)
H3 — TP TARGET DITHER per-run
Sensitivity analysis: does TP=99bps sit at a local optimum?
Sigma: 0.0001 (±1bp 1-sigma band around 0.0099)
Stats: Mann-Whitney U vs baseline, Cohen's d, 95% bootstrap CI on ROI delta.
Results saved incrementally → run_logs/noise_exp_YYYYMMDD_HHMMSS.csv
"""
import sys, time, math, csv, os
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from scipy.stats import mannwhitneyu
import numpy as np
import pandas as pd
HCM = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
sys.path.insert(0, str(HCM / "nautilus_dolphin"))
VBT_DIR = HCM / "vbt_cache"
MC_MODELS_DIR= str(HCM / "nautilus_dolphin" / "mc_results" / "models")
LOG_DIR = HCM / "nautilus_dolphin" / "run_logs"
LOG_DIR.mkdir(exist_ok=True)
# ── experiment parameters ───────────────────────────────────────────────────
N_SEEDS = 25 # per (noise_type, sigma) — 25 gives ~80% power to detect 3% ROI shift
CONFIGS = [
# (label, noise_type, sigma)
("baseline", "none", 0.0),
("sr_5pct", "signal_sr", 0.001),
("sr_15pct", "signal_sr", 0.003),
("sr_25pct", "signal_sr", 0.005),
("sr_50pct", "signal_sr", 0.010),
("price_1bp", "price_dither", 0.0001),
("price_5bp", "price_dither", 0.0005),
("tp_1bp", "tp_dither", 0.0001),
]
BASELINE_LABEL = "baseline"
# ── engine config (exact champion) ──────────────────────────────────────────
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity',
'v150_lambda_max_velocity', 'v300_lambda_max_velocity',
'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150'}
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
# ── JIT warmup (one-time) ────────────────────────────────────────────────────
print("JIT warmup...", end='', flush=True)
t_jit = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, np.int64), np.zeros(4, np.int64),
np.zeros(5, np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100., 200., 300., 400., 500.], dtype=np.float64)
_a = np.array([110., 190., 310., 390., 510.], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210., 200.); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100., 110.], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f" {time.time()-t_jit:.1f}s")
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from mc.mc_ml import DolphinForewarner
# ── load shared infrastructure (one-time) ───────────────────────────────────
print("Loading MC-Forewarner...", end='', flush=True)
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" OK")
parquet_files = sorted([p for p in VBT_DIR.glob("*.parquet") if 'catalog' not in str(p)])
date_strings = [pf.stem for pf in parquet_files]
print("Initializing ACB...", end='', flush=True)
acb = AdaptiveCircuitBreaker()
acb.preload_w750(date_strings)
print(f" OK (w750 p60={acb._w750_threshold:.6f})")
OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
# ── pre-load all parquet data ────────────────────────────────────────────────
print("Pre-loading parquet data...", end='', flush=True)
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) >= 10:
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
pq_data = {}
for pf in parquet_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0, i-50):i]
if len(seg) >= 10:
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
print(f" {len(pq_data)} days")
# also log near-threshold vel_div distribution (SR calibration info)
all_vd = []
for ds, (df, ac, dv) in pq_data.items():
if 'vel_div' in df.columns:
vd_vals = df['vel_div'].dropna().values
all_vd.extend(vd_vals[(vd_vals > -0.05) & (vd_vals < 0.0)])
all_vd = np.array(all_vd)
near_thresh = all_vd[(all_vd > -0.025) & (all_vd < -0.015)]
print(f" vel_div near-threshold (-0.025 to -0.015): N={len(near_thresh)}, "
f"mean={np.mean(near_thresh):+.5f}, σ={np.std(near_thresh):.5f}")
print(f" SR optimal sigma ≈ mean distance to threshold: "
f"{float(np.mean(np.abs(near_thresh - (-0.02)))):.4f}")
# ── engine runner ────────────────────────────────────────────────────────────
def run_engine(data_dict, engine_kw, vol_p60_val):
eng = NDAlphaEngine(**engine_kw)
eng.set_ob_engine(ob_eng)
eng.set_acb(acb)
eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
dstats = []
for pf in parquet_files:
ds = pf.stem
df, acols, dvol = data_dict[ds]
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60_val, False)
stats = eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
dstats.append({**stats, 'cap': eng.capital})
tr = eng.trade_history
wins = [t for t in tr if t.pnl_absolute > 0]
loss = [t for t in tr if t.pnl_absolute <= 0]
gw = sum(t.pnl_absolute for t in wins) if wins else 0.0
gl = abs(sum(t.pnl_absolute for t in loss)) if loss else 0.0
roi = (eng.capital - 25000.0) / 25000.0 * 100
pff = gw / gl if gl > 0 else 999.0
dr = np.array([s['pnl'] / 25000.0 * 100 for s in dstats])
sh = float(np.mean(dr) / np.std(dr) * np.sqrt(365)) if np.std(dr) > 0 else 0.0
pk = 25000.0; mdd = 0.0
for s in dstats:
pk = max(pk, s['cap']); mdd = max(mdd, (pk - s['cap']) / pk * 100)
wr = len(wins) / len(tr) * 100 if tr else 0.0
return dict(roi=roi, pf=pff, dd=mdd, sharpe=sh, wr=wr,
trades=len(tr), capital=eng.capital)
# ── noise application ────────────────────────────────────────────────────────
def apply_noise(noise_type, sigma, seed_val):
"""Return (data_dict_noisy, engine_kw_noisy)."""
rng = np.random.default_rng(seed_val)
ekw = dict(ENGINE_KWARGS)
if noise_type == "none":
return pq_data, ekw
if noise_type == "tp_dither":
tp_noise = float(rng.normal(0, sigma))
ekw = dict(ENGINE_KWARGS)
ekw['fixed_tp_pct'] = max(0.003, ENGINE_KWARGS['fixed_tp_pct'] + tp_noise)
return pq_data, ekw # no data modification
# signal_sr or price_dither — need data copies
noisy = {}
for ds, (df, ac, dvol) in pq_data.items():
df2 = df.copy()
if noise_type == "signal_sr":
if 'vel_div' in df2.columns:
noise = rng.normal(0, sigma, len(df2)).astype(np.float32)
df2['vel_div'] = df2['vel_div'] + noise
elif noise_type == "price_dither":
for col in ac:
if col in df2.columns:
noise = rng.normal(0, sigma, len(df2))
df2[col] = df2[col] * (1.0 + noise)
# recompute dvol from dithered BTC prices
if 'BTCUSDT' in df2.columns:
bp = df2['BTCUSDT'].values
dv2 = np.full(len(df2), np.nan)
for i in range(50, len(bp)):
seg = bp[max(0, i-50):i]
if len(seg) >= 10:
dv2[i] = float(np.std(np.diff(seg) / seg[:-1]))
dvol = dv2
noisy[ds] = (df2, ac, dvol)
return noisy, ekw
# ── incremental CSV output ───────────────────────────────────────────────────
run_ts = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = LOG_DIR / f"noise_exp_{run_ts}.csv"
FIELDS = ['label', 'noise_type', 'sigma', 'seed',
'roi', 'pf', 'dd', 'sharpe', 'wr', 'trades', 'capital', 'elapsed_s']
with open(out_path, 'w', newline='') as f:
csv.writer(f).writerow(FIELDS)
def append_row(row_dict):
with open(out_path, 'a', newline='') as f:
csv.writer(f).writerow([row_dict[k] for k in FIELDS])
# ── main experiment loop ─────────────────────────────────────────────────────
print(f"\n{'='*65}")
print(f" NOISE EXPERIMENT — {len(CONFIGS)} configs × up to {N_SEEDS} seeds")
print(f" Output: {out_path.name}")
print(f"{'='*65}")
all_results = {} # label → list of roi values (for final stats)
t_exp_start = time.time()
completed = 0
total_runs = 1 + (len(CONFIGS) - 1) * N_SEEDS # baseline=1, others=N_SEEDS
for label, noise_type, sigma in CONFIGS:
n = 1 if noise_type == "none" else N_SEEDS
rois = []
print(f"\n [{label}] noise={noise_type} σ={sigma} n={n}")
for seed_i in range(n):
t0 = time.time()
data_d, eng_kw = apply_noise(noise_type, sigma, seed_val=seed_i + 1000)
result = run_engine(data_d, eng_kw, vol_p60)
elapsed = time.time() - t0
rois.append(result['roi'])
row = dict(label=label, noise_type=noise_type, sigma=sigma, seed=seed_i,
elapsed_s=round(elapsed, 1), **{k: round(result[k], 4) for k in result})
append_row(row)
completed += 1
eta_s = (time.time() - t_exp_start) / completed * (total_runs - completed)
print(f" seed={seed_i:2d} ROI={result['roi']:+6.2f}% PF={result['pf']:.3f}"
f" DD={result['dd']:.2f}% T={result['trades']}"
f" [{elapsed:.0f}s | ETA {eta_s/60:.0f}min]")
all_results[label] = rois
# ── final analysis ───────────────────────────────────────────────────────────
print(f"\n{'='*65}")
print(f" RESULTS SUMMARY")
print(f"{'='*65}")
baseline_rois = all_results.get(BASELINE_LABEL, [44.89])
b_roi = float(np.mean(baseline_rois))
print(f" {'Label':<14} {'σ':>8} {'E[ROI]':>8} {'±std':>7} {'ΔROI':>7} "
f"{'Cohen_d':>8} {'MW_p':>6} {'Beat%':>6} {'E[PF]':>6} {'E[T]':>6}")
print(f" {'-'*90}")
for label, noise_type, sigma in CONFIGS:
rois = all_results[label]
df_res = pd.read_csv(out_path)
sub = df_res[df_res['label'] == label]
mean_roi = float(np.mean(rois))
std_roi = float(np.std(rois)) if len(rois) > 1 else 0.0
delta = mean_roi - b_roi
mean_pf = float(sub['pf'].mean())
mean_t = float(sub['trades'].mean())
beat_pct = float(np.mean(np.array(rois) > b_roi)) * 100 if len(rois) > 1 else (100 if mean_roi > b_roi else 0)
# Cohen's d vs baseline
if len(rois) > 1 and len(baseline_rois) > 1:
pooled_std = math.sqrt((np.var(rois) + np.var(baseline_rois)) / 2)
cohens_d = delta / pooled_std if pooled_std > 0 else 0.0
else:
cohens_d = 0.0
# Mann-Whitney U vs baseline
if len(rois) > 1 and len(baseline_rois) > 1:
try:
_, mw_p = mannwhitneyu(rois, baseline_rois, alternative='two-sided')
except Exception:
mw_p = 1.0
else:
mw_p = 1.0
print(f" {label:<14} {sigma:>8.4f} {mean_roi:>+7.2f}% {std_roi:>6.2f}%"
f" {delta:>+6.2f}% {cohens_d:>+8.3f} {mw_p:>6.3f} {beat_pct:>5.1f}%"
f" {mean_pf:>6.3f} {mean_t:>6.0f}")
print(f"{'='*65}")
print(f"\n Interpretation guide:")
print(f" ΔROI > 0 & MW_p < 0.10 & Cohen_d > 0.3 → promising signal")
print(f" SR optimal sigma ≈ mean near-threshold distance (see calibration above)")
print(f"\n Full results → {out_path}")
print(f" Total time: {(time.time()-t_exp_start)/60:.1f} min")