initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

View File

@@ -0,0 +1,225 @@
"""Crossover Scalp — 5s Gold Standard Data
==========================================
Option 4: Port vel_div crossover to 5s resolution.
Signal: vel_div <= -ENTRY_T → LONG
Exit: vel_div >= +ENTRY_T (mean-reversion complete)
OR MAX_HOLD bars reached (safety cap)
1 bar = ~5 seconds on this dataset.
Legacy optimal hold: 120 bars × 5s = 600s = 10 min.
Sweep:
ENTRY_T = [0.020, 0.050, 0.100, 0.200]
MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m)
Compare PF vs 1m klines crossover result (PF=1.007 ungated).
Output:
run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv
run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt
Runtime: ~10s
"""
import sys, time, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd
VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
# Sweep parameters
ENTRY_Ts = [0.020, 0.050, 0.100, 0.200]
MAX_HOLDS = [10, 20, 60, 120, 240] # bars (× 5s = real seconds)
# stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold}
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
# Also track per-date stats for PF per file
daily_rows = []
parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files: {total} (5s gold standard data)")
print(f"Entry Ts: {ENTRY_Ts}")
print(f"MaxHold: {MAX_HOLDS} bars (×5s = {[h*5 for h in MAX_HOLDS]}s)")
print()
t0 = time.time()
# Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars
ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0})
for i, pf in enumerate(parquet_files):
ds = pf.stem
try:
df = pd.read_parquet(pf)
except Exception:
continue
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
continue
vd = df['vel_div'].values.astype(np.float64)
btc = df['BTCUSDT'].values.astype(np.float64)
vd = np.where(np.isfinite(vd), vd, 0.0)
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
n = len(btc)
del df
MAX_H = max(MAX_HOLDS)
if n < MAX_H + 5:
del vd, btc
continue
# cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold)
# Iterate per ENTRY_T
for entry_t in ENTRY_Ts:
entry_mask = (vd <= -entry_t) & np.isfinite(btc)
cross_back = (vd >= entry_t)
# Build trades for all max_holds at once
# For each entry, find the first cross_back within each max_hold window
# Vectorized approach: build the trade list once, then tally by max_hold
# trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[])
# Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first
# then earlier exits apply to smaller max_hold caps too
for t in range(n - MAX_H):
if not entry_mask[t]:
continue
ep = btc[t]
if not np.isfinite(ep) or ep <= 0:
continue
# Find first crossover bar
first_cross = MAX_H # default: no crossover within max window
for k in range(1, MAX_H + 1):
tb = t + k
if tb >= n:
first_cross = k
break
if cross_back[tb]:
first_cross = k
break
for max_hold in MAX_HOLDS:
# Actual exit: min(first_cross, max_hold)
exit_bar = min(first_cross, max_hold)
tb = t + exit_bar
if tb >= n:
continue
xp = btc[tb]
if not np.isfinite(xp) or xp <= 0:
continue
ret = (xp - ep) / ep # LONG return
key = (entry_t, max_hold)
s = stats[key]
if ret >= 0:
s['wins'] += 1
s['gw'] += ret
else:
s['losses'] += 1
s['gl'] += abs(ret)
s['n'] += 1
s['total_hold'] += exit_bar
del entry_mask, cross_back
del vd, btc
if (i + 1) % 10 == 0:
gc.collect()
print(f" [{i+1}/{total}] {ds} {time.time()-t0:.0f}s")
elapsed = time.time() - t0
print(f"\nPass complete: {elapsed:.0f}s\n")
# ─── Results Table ─────────────────────────────────────────────────────────────
rows = []
for entry_t in ENTRY_Ts:
for max_hold in MAX_HOLDS:
key = (entry_t, max_hold)
s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
n_t = s['wins'] + s['losses']
if n_t == 0:
continue
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
wr = s['wins'] / n_t * 100
avg_hold = s['total_hold'] / n_t
avg_win = s['gw'] / s['wins'] if s['wins'] > 0 else 0.0
avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0
hold_sec = avg_hold * 5 # 5s per bar
rows.append({
'entry_t': entry_t,
'max_hold_bars': max_hold,
'max_hold_sec': max_hold * 5,
'n_trades': n_t,
'pf': round(pf, 4),
'wr': round(wr, 3),
'avg_hold_bars': round(avg_hold, 2),
'avg_hold_sec': round(hold_sec, 1),
'avg_win_pct': round(avg_win * 100, 4),
'avg_loss_pct': round(avg_loss * 100, 4),
'gross_win': round(s['gw'], 6),
'gross_loss': round(s['gl'], 6),
})
# ─── Console ──────────────────────────────────────────────────────────────────
print(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} {'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}")
print("-" * 90)
for r in rows:
marker = "" if r['pf'] > 1.01 else ""
print(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%{marker}")
# Highlight best
best = max(rows, key=lambda r: r['pf']) if rows else None
if best:
print(f"\n Best: T={best['entry_t']:.3f} MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s) "
f"PF={best['pf']:.4f} WR={best['wr']:.1f}% AvgHold={best['avg_hold_sec']:.0f}s "
f"N={best['n_trades']:,}")
# ─── Comparison with 1m ────────────────────────────────────────────────────────
print(f"\n{'='*50}")
print(f" 1m KLINES REFERENCE (ungated crossover):")
print(f" PF=1.0073 N=1,005,665 AvgHold=2.2 bars (2.2 min)")
print(f" BEST3 (9h,12h,18h): PF=1.0429 N=127,760")
print(f" 5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):")
# ─── Save ──────────────────────────────────────────────────────────────────────
LOG_DIR.mkdir(exist_ok=True)
ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv"
if rows:
with open(out_csv, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=rows[0].keys())
w.writeheader(); w.writerows(rows)
print(f"\n{out_csv}")
out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt"
with open(out_txt, 'w', encoding='utf-8') as f:
f.write(f"Crossover Scalp — 5s Gold Standard Data\n")
f.write(f"56 days 2025-12-31 to 2026-02-26\n")
f.write(f"Runtime: {elapsed:.0f}s\n\n")
f.write(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} "
f"{'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}\n")
f.write("-" * 90 + "\n")
for r in rows:
f.write(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%\n")
f.write(f"\n1m reference (ungated): PF=1.0073 BEST3: PF=1.0429\n")
print(f"{out_txt}")
print(f"\n Runtime: {elapsed:.0f}s")
print(f" KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.")
print(f" AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")