initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/vel_div_depletion_test.py
+++ b/nautilus_dolphin/vel_div_depletion_test.py
@@ -0,0 +1,270 @@
+"""vel_div Signal Freshness / Depletion Test
+============================================
+Hypothesis: "firing late into a depleted move"
+
+On 1m klines, vel_div may have been below threshold for many bars already by
+the time we "see" a signal. The NG3 5s system catches it at bar 1; the 1m
+system catches it at bar N (the move is already mostly done).
+
+Test: compute edge conditional on HOW MANY BARS the signal has been continuously
+active (bars_since_trigger). If edge decays with signal age → hypothesis confirmed.
+
+Also tests: does tightening to FIRST FIRES ONLY (cooldown between signals)
+recover the edge?
+
+Outputs:
+  run_logs/depletion_SHORT_YYYYMMDD.csv  — per (bars_since_trigger_bucket, year)
+  run_logs/depletion_LONG_YYYYMMDD.csv   — same for LONG
+  Console: edge decay table
+"""
+import sys, time, csv, gc
+sys.stdout.reconfigure(encoding='utf-8', errors='replace')
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+import numpy as np
+import pandas as pd
+from numpy.lib.stride_tricks import sliding_window_view
+
+VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
+LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
+TP_BPS   = 95
+MAX_HOLD = 120
+tp_pct   = TP_BPS / 10000.0
+
+# Focus threshold: the current system threshold
+SHORT_T =  -0.020
+LONG_T  =  +0.020
+
+# Buckets: how many bars has this signal been continuously active?
+# "fresh" = 1st bar, "stale" = been below threshold for a long time
+AGE_BUCKETS = [
+    ('fresh_1',     1, 1),
+    ('young_2_5',   2, 5),
+    ('mid_6_20',    6, 20),
+    ('old_21_60',  21, 60),
+    ('stale_61+',  61, 9999),
+]
+
+# Also test with a cooldown filter: only fire on the FIRST bar of each trigger episode
+# (simulates catching the signal fresh, like a 5s system would)
+COOLDOWN_BARS = 60  # min bars between consecutive signals
+
+parquet_files = sorted(VBT_DIR.glob("*.parquet"))
+parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
+total = len(parquet_files)
+print(f"Files: {total}   SHORT_T={SHORT_T}   LONG_T={LONG_T}")
+print(f"TP={TP_BPS}bps   MAX_HOLD={MAX_HOLD}   COOLDOWN={COOLDOWN_BARS} bars\n")
+
+# Accumulators
+# stats[(direction, age_bucket, year)] = {wins, losses, gw, gl}
+stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
+# cooldown stats
+cd_stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
+# control
+ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0})
+
+t0 = time.time()
+
+for i, pf in enumerate(parquet_files):
+    ds   = pf.stem
+    year = ds[:4]
+
+    try:
+        df = pd.read_parquet(pf)
+    except Exception:
+        continue
+    if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
+        continue
+
+    vd  = df['vel_div'].values.astype(np.float64)
+    btc = df['BTCUSDT'].values.astype(np.float64)
+    vd  = np.where(np.isfinite(vd), vd, 0.0)
+    btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
+    n = len(btc)
+    if n < MAX_HOLD + 5:
+        del df, vd, btc
+        continue
+
+    # Control
+    ck = (year,)
+    for j in range(0, n - MAX_HOLD, 60):
+        ep = btc[j]; ex = btc[j + MAX_HOLD]
+        if np.isfinite(ep) and np.isfinite(ex) and ep > 0:
+            r = (ex - ep) / ep
+            ctrl[ck]['up'] += int(r >=  tp_pct)
+            ctrl[ck]['dn'] += int(r <= -tp_pct)
+            ctrl[ck]['n']  += 1
+
+    # Precompute rolling windows (only for bars where we can look forward MAX_HOLD)
+    n_usable = n - MAX_HOLD
+    windows  = sliding_window_view(btc, MAX_HOLD + 1)[:n_usable]  # (n_usable, 121)
+    ep_arr   = windows[:, 0]
+    fut_min  = np.nanmin(windows[:, 1:], axis=1)
+    fut_max  = np.nanmax(windows[:, 1:], axis=1)
+    last_px  = windows[:, -1]
+    valid    = np.isfinite(ep_arr) & (ep_arr > 0)
+
+    for direction, threshold in [('S', SHORT_T), ('L', LONG_T)]:
+        # Compute continuous trigger age for each bar
+        # age[j] = number of consecutive bars (including j) where signal has been active
+        if direction == 'S':
+            active = (vd[:n_usable] <= threshold)
+        else:
+            active = (vd[:n_usable] >= threshold)
+
+        age = np.zeros(n_usable, dtype=np.int32)
+        for j in range(n_usable):
+            if active[j]:
+                age[j] = age[j-1] + 1 if j > 0 else 1
+            else:
+                age[j] = 0
+
+        sig_idx = np.where(active & valid)[0]
+        if len(sig_idx) == 0:
+            continue
+
+        ep_s   = ep_arr[sig_idx]
+        fmin_s = fut_min[sig_idx]
+        fmax_s = fut_max[sig_idx]
+        last_s = last_px[sig_idx]
+        age_s  = age[sig_idx]
+
+        if direction == 'S':
+            hit  = fmin_s <= ep_s * (1.0 - tp_pct)
+            lret = np.where(np.isfinite(last_s), (ep_s - last_s) / ep_s, 0.0)
+        else:
+            hit  = fmax_s >= ep_s * (1.0 + tp_pct)
+            lret = np.where(np.isfinite(last_s), (last_s - ep_s) / ep_s, 0.0)
+
+        # Age-bucketed stats
+        for bucket_name, age_lo, age_hi in AGE_BUCKETS:
+            mask = (age_s >= age_lo) & (age_s <= age_hi)
+            if not np.any(mask):
+                continue
+            w = int(np.sum(hit[mask]))
+            l = int(np.sum(~hit[mask]))
+            gw = w * tp_pct
+            gl = float(np.sum(np.abs(lret[~hit & mask])))
+            k  = (direction, bucket_name, year)
+            stats[k]['wins']    += w
+            stats[k]['losses']  += l
+            stats[k]['gw']      += gw
+            stats[k]['gl']      += gl
+
+        # Cooldown filter: only fire on FIRST bar of each episode (age == 1)
+        # OR any bar after COOLDOWN_BARS since last fire
+        last_fire = -COOLDOWN_BARS - 1
+        for idx_pos in range(len(sig_idx)):
+            j = sig_idx[idx_pos]
+            if age_s[idx_pos] == 1 or (j - last_fire) >= COOLDOWN_BARS:
+                last_fire = j
+                w = int(hit[idx_pos])
+                l = 1 - w
+                gw = w * tp_pct
+                gl = float(abs(lret[idx_pos])) if not hit[idx_pos] else 0.0
+                ck2 = (direction, year)
+                cd_stats[ck2]['wins']   += w
+                cd_stats[ck2]['losses'] += l
+                cd_stats[ck2]['gw']     += gw
+                cd_stats[ck2]['gl']     += gl
+
+    del df, vd, btc, windows, ep_arr, fut_min, fut_max, last_px, valid, age
+
+    if (i + 1) % 100 == 0:
+        gc.collect()
+        elapsed = time.time() - t0
+        print(f"  [{i+1}/{total}] {ds}  {elapsed/60:.1f}m")
+
+elapsed = time.time() - t0
+print(f"\nPass complete: {elapsed:.0f}s")
+
+# Control baselines
+ctrl_dn = sum(v['dn'] for v in ctrl.values())
+ctrl_up = sum(v['up'] for v in ctrl.values())
+ctrl_n  = sum(v['n']  for v in ctrl.values())
+ctrl_dn_pct = ctrl_dn / ctrl_n * 100 if ctrl_n else 0
+ctrl_up_pct = ctrl_up / ctrl_n * 100 if ctrl_n else 0
+print(f"\nControl: DOWN={ctrl_dn_pct:.1f}%  UP={ctrl_up_pct:.1f}%  n={ctrl_n:,}")
+
+YEARS = ['2021', '2022', '2023', '2024', '2025', '2026']
+
+def print_depletion_table(direction, ctrl_bl):
+    print(f"\n{'='*90}")
+    print(f"  SIGNAL FRESHNESS / DEPLETION — {direction}   ctrl={ctrl_bl:.1f}%")
+    print(f"  (reading: does edge decay as vel_div has been active for longer?)")
+    print(f"{'='*90}")
+    hdr = f"  {'Bucket':<16}" + "".join(f" {yr:>10}" for yr in YEARS) + f"  {'TOTAL':>10}  {'n_trades':>9}"
+    print(hdr)
+    print(f"  {'-'*88}")
+    for bucket_name, _, _ in AGE_BUCKETS:
+        yr_edges = []
+        tot_w = tot_l = tot_n = 0
+        for yr in YEARS:
+            k = (direction, bucket_name, yr)
+            s = stats.get(k, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
+            n_t = s['wins'] + s['losses']
+            wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
+            edge = wr - ctrl_bl if n_t > 0 else float('nan')
+            yr_edges.append(f"{edge:>+8.1f}pp" if n_t > 0 else "       ---")
+            tot_w += s['wins']; tot_l += s['losses']; tot_n += n_t
+        tot_wr   = tot_w / tot_n * 100 if tot_n > 0 else float('nan')
+        tot_edge = tot_wr - ctrl_bl if tot_n > 0 else float('nan')
+        print(f"  {bucket_name:<16}" + "".join(f" {e:>10}" for e in yr_edges) +
+              f"  {tot_edge:>+8.1f}pp  {tot_n:>9,}")
+    print(f"  {'-'*88}")
+    print(f"  ({'freshest' if direction=='S' else 'freshest'} = strongest edge → confirms 'firing late' hypothesis if edge decays)")
+
+print_depletion_table('S', ctrl_dn_pct)
+print_depletion_table('L', ctrl_up_pct)
+
+# Cooldown filter summary
+print(f"\n{'='*70}")
+print(f"  COOLDOWN FILTER (fire only on fresh signal OR after {COOLDOWN_BARS}-bar gap)")
+print(f"  (simulates catching the signal at the same moment a faster system would)")
+print(f"{'='*70}")
+print(f"  {'Dir':<5} {'Year':<6} {'n_trades':>9} {'WR':>8} {'PF':>8} {'Edge':>9}")
+print(f"  {'-'*50}")
+for direction, ctrl_bl in [('S', ctrl_dn_pct), ('L', ctrl_up_pct)]:
+    tot_w = tot_l = 0; tot_gw = tot_gl = 0.0
+    for yr in YEARS:
+        ck2 = (direction, yr)
+        s = cd_stats.get(ck2, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
+        n_t = s['wins'] + s['losses']
+        if n_t == 0:
+            continue
+        wr = s['wins'] / n_t * 100
+        pf = s['gw'] / s['gl'] if s['gl'] > 0 else 999
+        edge = wr - ctrl_bl
+        print(f"  {direction:<5} {yr:<6} {n_t:>9,} {wr:>7.1f}% {pf:>8.3f} {edge:>+8.1f}pp")
+        tot_w += s['wins']; tot_l += s['losses']; tot_gw += s['gw']; tot_gl += s['gl']
+    tot_n = tot_w + tot_l
+    if tot_n > 0:
+        tot_wr = tot_w / tot_n * 100
+        tot_pf = tot_gw / tot_gl if tot_gl > 0 else 999
+        tot_edge = tot_wr - ctrl_bl
+        print(f"  {direction:<5} {'TOTAL':<6} {tot_n:>9,} {tot_wr:>7.1f}% {tot_pf:>8.3f} {tot_edge:>+8.1f}pp")
+    print()
+
+# Save CSV
+LOG_DIR.mkdir(exist_ok=True)
+ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+rows = []
+for (direction, bucket_name, yr), s in stats.items():
+    n_t = s['wins'] + s['losses']
+    ctrl_bl = ctrl_dn_pct if direction == 'S' else ctrl_up_pct
+    wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
+    pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
+    edge = wr - ctrl_bl if n_t > 0 else float('nan')
+    rows.append({'direction': direction, 'age_bucket': bucket_name, 'year': yr,
+                 'n_trades': n_t, 'wins': s['wins'], 'losses': s['losses'],
+                 'wr': round(wr, 3), 'pf': round(pf, 4), 'edge_pp': round(edge, 3),
+                 'gross_win': round(s['gw'], 6), 'gross_loss': round(s['gl'], 6)})
+
+out_path = LOG_DIR / f"depletion_test_{ts}.csv"
+with open(out_path, 'w', newline='') as f:
+    w = csv.DictWriter(f, fieldnames=rows[0].keys())
+    w.writeheader(); w.writerows(rows)
+
+print(f"\n  → {out_path}")
+print(f"  Runtime: {elapsed:.0f}s")