initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/crossover_5s_test.py
+++ b/nautilus_dolphin/crossover_5s_test.py
@@ -0,0 +1,225 @@
+"""Crossover Scalp — 5s Gold Standard Data
+==========================================
+Option 4: Port vel_div crossover to 5s resolution.
+
+Signal:  vel_div <= -ENTRY_T  →  LONG
+Exit:    vel_div >= +ENTRY_T  (mean-reversion complete)
+OR       MAX_HOLD bars reached (safety cap)
+
+1 bar = ~5 seconds on this dataset.
+Legacy optimal hold: 120 bars × 5s = 600s = 10 min.
+
+Sweep:
+  ENTRY_T  = [0.020, 0.050, 0.100, 0.200]
+  MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m)
+
+Compare PF vs 1m klines crossover result (PF=1.007 ungated).
+
+Output:
+  run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv
+  run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt
+Runtime: ~10s
+"""
+import sys, time, csv, gc
+sys.stdout.reconfigure(encoding='utf-8', errors='replace')
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+import numpy as np
+import pandas as pd
+
+VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
+LOG_DIR    = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
+
+# Sweep parameters
+ENTRY_Ts = [0.020, 0.050, 0.100, 0.200]
+MAX_HOLDS = [10, 20, 60, 120, 240]  # bars (× 5s = real seconds)
+
+# stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold}
+stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
+# Also track per-date stats for PF per file
+daily_rows = []
+
+parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
+parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
+total = len(parquet_files)
+print(f"Files:   {total}   (5s gold standard data)")
+print(f"Entry Ts: {ENTRY_Ts}")
+print(f"MaxHold:  {MAX_HOLDS} bars  (×5s = {[h*5 for h in MAX_HOLDS]}s)")
+print()
+
+t0 = time.time()
+
+# Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars
+ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0})
+
+for i, pf in enumerate(parquet_files):
+    ds = pf.stem
+
+    try:
+        df = pd.read_parquet(pf)
+    except Exception:
+        continue
+    if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
+        continue
+
+    vd   = df['vel_div'].values.astype(np.float64)
+    btc  = df['BTCUSDT'].values.astype(np.float64)
+    vd   = np.where(np.isfinite(vd),  vd,  0.0)
+    btc  = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
+    n    = len(btc)
+    del df
+
+    MAX_H = max(MAX_HOLDS)
+    if n < MAX_H + 5:
+        del vd, btc
+        continue
+
+    # cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold)
+    # Iterate per ENTRY_T
+    for entry_t in ENTRY_Ts:
+        entry_mask  = (vd <= -entry_t) & np.isfinite(btc)
+        cross_back  = (vd >=  entry_t)
+
+        # Build trades for all max_holds at once
+        # For each entry, find the first cross_back within each max_hold window
+        # Vectorized approach: build the trade list once, then tally by max_hold
+
+        # trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[])
+        # Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first
+        # then earlier exits apply to smaller max_hold caps too
+
+        for t in range(n - MAX_H):
+            if not entry_mask[t]:
+                continue
+            ep = btc[t]
+            if not np.isfinite(ep) or ep <= 0:
+                continue
+
+            # Find first crossover bar
+            first_cross = MAX_H  # default: no crossover within max window
+            for k in range(1, MAX_H + 1):
+                tb = t + k
+                if tb >= n:
+                    first_cross = k
+                    break
+                if cross_back[tb]:
+                    first_cross = k
+                    break
+
+            for max_hold in MAX_HOLDS:
+                # Actual exit: min(first_cross, max_hold)
+                exit_bar = min(first_cross, max_hold)
+                tb = t + exit_bar
+                if tb >= n:
+                    continue
+                xp = btc[tb]
+                if not np.isfinite(xp) or xp <= 0:
+                    continue
+
+                ret = (xp - ep) / ep  # LONG return
+
+                key = (entry_t, max_hold)
+                s = stats[key]
+                if ret >= 0:
+                    s['wins']    += 1
+                    s['gw']      += ret
+                else:
+                    s['losses']  += 1
+                    s['gl']      += abs(ret)
+                s['n']           += 1
+                s['total_hold']  += exit_bar
+
+        del entry_mask, cross_back
+
+    del vd, btc
+
+    if (i + 1) % 10 == 0:
+        gc.collect()
+        print(f"  [{i+1}/{total}] {ds}  {time.time()-t0:.0f}s")
+
+elapsed = time.time() - t0
+print(f"\nPass complete: {elapsed:.0f}s\n")
+
+# ─── Results Table ─────────────────────────────────────────────────────────────
+rows = []
+for entry_t in ENTRY_Ts:
+    for max_hold in MAX_HOLDS:
+        key = (entry_t, max_hold)
+        s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
+        n_t = s['wins'] + s['losses']
+        if n_t == 0:
+            continue
+        pf       = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
+        wr       = s['wins'] / n_t * 100
+        avg_hold = s['total_hold'] / n_t
+        avg_win  = s['gw'] / s['wins']   if s['wins']   > 0 else 0.0
+        avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0
+        hold_sec = avg_hold * 5  # 5s per bar
+        rows.append({
+            'entry_t': entry_t,
+            'max_hold_bars': max_hold,
+            'max_hold_sec': max_hold * 5,
+            'n_trades': n_t,
+            'pf': round(pf, 4),
+            'wr': round(wr, 3),
+            'avg_hold_bars': round(avg_hold, 2),
+            'avg_hold_sec': round(hold_sec, 1),
+            'avg_win_pct': round(avg_win * 100, 4),
+            'avg_loss_pct': round(avg_loss * 100, 4),
+            'gross_win': round(s['gw'], 6),
+            'gross_loss': round(s['gl'], 6),
+        })
+
+# ─── Console ──────────────────────────────────────────────────────────────────
+print(f"{'EntryT':>8}  {'MaxH':>5}  {'MaxSec':>6}  {'N':>8}  {'PF':>7}  {'WR%':>6}  {'AvgH_s':>7}  {'AvgW%':>7}  {'AvgL%':>7}")
+print("-" * 90)
+for r in rows:
+    marker = " ◄" if r['pf'] > 1.01 else ""
+    print(f"  T={r['entry_t']:.3f}  {r['max_hold_bars']:>5}b  {r['max_hold_sec']:>5}s  "
+          f"{r['n_trades']:>8,}  {r['pf']:>7.4f}  {r['wr']:>6.2f}%  "
+          f"{r['avg_hold_sec']:>7.1f}s  {r['avg_win_pct']:>7.4f}%  {r['avg_loss_pct']:>7.4f}%{marker}")
+
+# Highlight best
+best = max(rows, key=lambda r: r['pf']) if rows else None
+if best:
+    print(f"\n  Best: T={best['entry_t']:.3f}  MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s)  "
+          f"PF={best['pf']:.4f}  WR={best['wr']:.1f}%  AvgHold={best['avg_hold_sec']:.0f}s  "
+          f"N={best['n_trades']:,}")
+
+# ─── Comparison with 1m ────────────────────────────────────────────────────────
+print(f"\n{'='*50}")
+print(f"  1m KLINES REFERENCE (ungated crossover):")
+print(f"    PF=1.0073  N=1,005,665  AvgHold=2.2 bars (2.2 min)")
+print(f"    BEST3 (9h,12h,18h): PF=1.0429  N=127,760")
+print(f"  5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):")
+
+# ─── Save ──────────────────────────────────────────────────────────────────────
+LOG_DIR.mkdir(exist_ok=True)
+ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv"
+if rows:
+    with open(out_csv, 'w', newline='') as f:
+        w = csv.DictWriter(f, fieldnames=rows[0].keys())
+        w.writeheader(); w.writerows(rows)
+    print(f"\n  → {out_csv}")
+
+out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt"
+with open(out_txt, 'w', encoding='utf-8') as f:
+    f.write(f"Crossover Scalp — 5s Gold Standard Data\n")
+    f.write(f"56 days  2025-12-31 to 2026-02-26\n")
+    f.write(f"Runtime: {elapsed:.0f}s\n\n")
+    f.write(f"{'EntryT':>8}  {'MaxH':>5}  {'MaxSec':>6}  {'N':>8}  {'PF':>7}  {'WR%':>6}  "
+            f"{'AvgH_s':>7}  {'AvgW%':>7}  {'AvgL%':>7}\n")
+    f.write("-" * 90 + "\n")
+    for r in rows:
+        f.write(f"  T={r['entry_t']:.3f}  {r['max_hold_bars']:>5}b  {r['max_hold_sec']:>5}s  "
+                f"{r['n_trades']:>8,}  {r['pf']:>7.4f}  {r['wr']:>6.2f}%  "
+                f"{r['avg_hold_sec']:>7.1f}s  {r['avg_win_pct']:>7.4f}%  {r['avg_loss_pct']:>7.4f}%\n")
+    f.write(f"\n1m reference (ungated): PF=1.0073  BEST3: PF=1.0429\n")
+
+print(f"  → {out_txt}")
+print(f"\n  Runtime: {elapsed:.0f}s")
+print(f"  KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.")
+print(f"       AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")