initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/test_entry_quality_sweep.py
+++ b/nautilus_dolphin/test_entry_quality_sweep.py
@@ -0,0 +1,656 @@
+"""Entry-bar eigenvalue feature sweep — winner vs loser discrimination.
+
+Loads the 55-day champion dataset, re-runs the full engine stack to collect
+trade_history with entry_bar indices, then extracts a rich feature matrix
+(raw + derivatives + cross-TF combos + historical context) at each entry bar.
+
+Statistical analysis:
+  - Pearson r vs pnl_pct
+  - Point-biserial + KS stat vs winner binary
+  - ROC-AUC for winner / MAX_HOLD-loss / TP discrimination
+
+Outputs:
+  run_logs/entry_quality_features_<ts>.csv  — per-trade feature matrix
+  run_logs/entry_quality_sweep_<ts>.csv     — per-feature analysis table
+"""
+import sys, time, csv
+from pathlib import Path
+from datetime import datetime
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+# ── Numba JIT warmup (must run BEFORE other imports that touch numpy internals) ──
+print("Compiling numba kernels...")
+t0c = time.time()
+from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
+from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
+from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
+from nautilus_dolphin.nautilus.ob_features import (
+    OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
+    compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
+    compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
+    compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
+)
+from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
+import numpy as np
+
+_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
+rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
+compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
+                  np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
+                  np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
+check_dc_nb(_p, 3, 1, 0.75)
+_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
+_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
+compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
+compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
+compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
+compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
+compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
+compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
+compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
+print(f"  JIT: {time.time() - t0c:.1f}s")
+
+import pandas as pd
+from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
+from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
+from mc.mc_ml import DolphinForewarner
+
+# ── Config (identical to test_pf_dynamic_beta_validate.py) ──────────────────────
+VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
+MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
+
+META_COLS = {
+    'timestamp', 'scan_number',
+    'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
+    'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
+    'vel_div', 'instability_50', 'instability_150',
+}
+
+ENGINE_KWARGS = dict(
+    initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
+    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
+    fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
+    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
+    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
+    use_asset_selection=True, min_irp_alignment=0.45,
+    use_sp_fees=True, use_sp_slippage=True,
+    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
+    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
+    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
+    tf_enabled=False,
+)
+
+MC_BASE_CFG = {
+    'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
+    'use_direction_confirm': True, 'dc_lookback_bars': 7,
+    'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
+    'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
+    'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
+    'leverage_convexity': 3.00, 'fraction': 0.20,
+    'use_alpha_layers': True, 'use_dynamic_leverage': True,
+    'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120,
+    'use_sp_fees': True, 'use_sp_slippage': True,
+    'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
+    'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
+    'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
+    'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
+    'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
+}
+
+FEAT_COLS = ['vel_div', 'v50', 'v150', 'v300', 'v750', 'inst50', 'inst150']
+N_FEAT = len(FEAT_COLS)  # 7
+
+# ── Step 1: Load data + build global bar feature arrays ──────────────────────────
+print("\nLoading parquet files...")
+parquet_files = sorted(VBT_DIR.glob("*.parquet"))
+parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
+print(f"  {len(parquet_files)} files")
+
+pq_data = {}
+for pf in parquet_files:
+    df = pd.read_parquet(pf)
+    ac = [c for c in df.columns if c not in META_COLS]
+    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
+    dv = np.full(len(df), np.nan)
+    if bp is not None:
+        for i in range(50, len(bp)):
+            seg = bp[max(0, i-50):i]
+            if len(seg) >= 10:
+                dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
+    pq_data[pf.stem] = (df, ac, dv)
+
+# vol_p60 from first 2 files (matches reference)
+all_vols = []
+for pf in parquet_files[:2]:
+    df, _, _ = pq_data[pf.stem]
+    if 'BTCUSDT' not in df.columns:
+        continue
+    pr = df['BTCUSDT'].values
+    for i in range(60, len(pr)):
+        seg = pr[max(0, i-50):i]
+        if len(seg) >= 10:
+            v = float(np.std(np.diff(seg) / seg[:-1]))
+            if v > 0:
+                all_vols.append(v)
+vol_p60 = float(np.percentile(all_vols, 60))
+
+print(f"\nBuilding global bar feature arrays...")
+# gbar_features: list accumulator → converted to np.ndarray after full pass
+# Layout: [vel_div, v50, v150, v300, v750, inst50, inst150]
+# gbar_valid_mask: array[bool] indexed by global_bar_idx
+# gbar_to_rownum: global_bar_idx → index into gbar_features (only valid bars)
+# Every bar (including NaN-vd bars) increments the global counter, matching engine.
+
+feat_accum = []          # only valid bars appended
+gbar_valid_mask_list = []  # one bool per global bar
+gbar_to_rownum = {}      # global_bar_idx → row index in feat_accum
+
+g = 0
+for pf in parquet_files:
+    df, _, _ = pq_data[pf.stem]
+    vd_col = df['vel_div'].values
+    v50_col = df['v50_lambda_max_velocity'].values
+    v150_col = df['v150_lambda_max_velocity'].values
+    v300_col = df['v300_lambda_max_velocity'].values
+    v750_col = df['v750_lambda_max_velocity'].values
+    i50_col = df['instability_50'].values
+    i150_col = df['instability_150'].values
+
+    for ri in range(len(df)):
+        vd = vd_col[ri]
+        valid = np.isfinite(vd)
+        if valid:
+            row_idx = len(feat_accum)
+            feat_accum.append([
+                float(vd),
+                float(v50_col[ri]) if np.isfinite(v50_col[ri]) else np.nan,
+                float(v150_col[ri]) if np.isfinite(v150_col[ri]) else np.nan,
+                float(v300_col[ri]) if np.isfinite(v300_col[ri]) else np.nan,
+                float(v750_col[ri]) if np.isfinite(v750_col[ri]) else np.nan,
+                float(i50_col[ri]) if np.isfinite(i50_col[ri]) else np.nan,
+                float(i150_col[ri]) if np.isfinite(i150_col[ri]) else np.nan,
+            ])
+            gbar_to_rownum[g] = row_idx
+            gbar_valid_mask_list.append(True)
+        else:
+            gbar_valid_mask_list.append(False)
+        g += 1
+
+gbar_features = np.array(feat_accum, dtype=np.float64)  # (N_valid, 7)
+gbar_valid_mask = np.array(gbar_valid_mask_list, dtype=bool)
+N_total_gbars = g
+
+print(f"  Total global bars: {N_total_gbars}")
+print(f"  Valid bars (non-NaN vel_div): {len(feat_accum)} ({len(feat_accum)/N_total_gbars*100:.1f}%)")
+
+# ── Step 2: Run champion engine ──────────────────────────────────────────────────
+print("\nLoading MC-Forewarner...")
+forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
+print("  OK")
+
+acb = AdaptiveCircuitBreaker()
+date_strings = [pf.stem for pf in parquet_files]
+acb.preload_w750(date_strings)
+
+OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
+_mock_ob = MockOBProvider(
+    imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
+    imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
+                      "BNBUSDT": +0.05, "SOLUSDT": +0.05},
+)
+ob_eng = OBFeatureEngine(_mock_ob)
+ob_eng.preload_date("mock", OB_ASSETS)
+
+print(f"\n=== Running champion engine (55 days) ===")
+t0 = time.time()
+engine = NDAlphaEngine(**ENGINE_KWARGS)
+engine.set_ob_engine(ob_eng)
+engine.set_acb(acb)
+engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
+engine.set_esoteric_hazard_multiplier(0.0)
+
+for pf in parquet_files:
+    ds = pf.stem
+    df, acols, dvol = pq_data[ds]
+    vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
+    engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
+
+tr = engine.trade_history
+roi = (engine.capital - 25000) / 25000 * 100
+w_count = sum(1 for t in tr if t.pnl_absolute > 0)
+print(f"  {time.time()-t0:.1f}s  |  {len(tr)} trades  |  ROI={roi:+.2f}%  |  WR={w_count/len(tr)*100:.1f}%")
+
+# ── Step 3: Extract entry-bar features ──────────────────────────────────────────
+print(f"\nExtracting entry-bar features for {len(tr)} trades...")
+
+def _get_row(gbar_idx):
+    """Return feature vector for a global bar, or all-NaN if invalid."""
+    if gbar_idx < 0 or not gbar_valid_mask[gbar_idx]:
+        return np.full(N_FEAT, np.nan)
+    ri = gbar_to_rownum.get(gbar_idx)
+    if ri is None:
+        return np.full(N_FEAT, np.nan)
+    return gbar_features[ri]
+
+# Feature name catalogue
+feat_names = []
+feat_names += FEAT_COLS                                            # 7 raw
+feat_names += [f"d1_{c}" for c in FEAT_COLS]                      # 7 first-deriv
+feat_names += [f"d2_{c}" for c in FEAT_COLS]                      # 7 second-deriv
+feat_names += [f"d3_{c}" for c in FEAT_COLS]                      # 7 third-deriv
+
+VEL_NAMES = ['v50', 'v150', 'v300', 'v750']
+VEL_IDX   = [1, 2, 3, 4]  # indices in FEAT_COLS
+pairs = [(i, j) for i in range(4) for j in range(i+1, 4)]
+for pi, pj in pairs:
+    feat_names.append(f"diff_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}")   # 6 diffs
+for pi, pj in pairs:
+    feat_names.append(f"ratio_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}")  # 6 ratios
+
+feat_names += [
+    'inter_inst_ratio',          # inst50/inst150
+    'inter_inst_prod',           # inst50*inst150
+    'inter_vd_inst50',           # vel_div*inst50
+    'inter_vd_inst150',          # vel_div*inst150
+]                                                                  # 4 instability interactions
+
+feat_names += [
+    'hist_vd_mean3',             # mean vel_div last 3 bars
+    'hist_vd_std3',              # std vel_div last 3 bars
+    'hist_vd_min5',              # min vel_div last 5 bars
+    'hist_v50_mean3',            # mean v50 last 3 bars
+    'hist_v750_mean3',           # mean v750 last 3 bars
+]                                                                  # 5 historical context
+
+N_FEATURES = len(feat_names)  # should be 52
+print(f"  Feature count: {N_FEATURES}")
+
+trade_feat_rows = []   # list of np.ndarray length N_FEATURES
+outcome_rows = []      # list of dicts
+
+NAN_VEC = np.full(N_FEATURES, np.nan)
+
+for t in tr:
+    eb = t.entry_bar
+    # Raw features at entry bar
+    f0 = _get_row(eb)
+    f1 = _get_row(eb - 1)
+    f2 = _get_row(eb - 2)
+    f3 = _get_row(eb - 3)
+
+    # Check if valid lookback (all 4 bars must be valid)
+    invalid_lookback = (
+        eb - 3 < 0
+        or not gbar_valid_mask[eb]
+        or not gbar_valid_mask[eb - 1]
+        or not gbar_valid_mask[eb - 2]
+        or not gbar_valid_mask[eb - 3]
+    )
+
+    row = np.empty(N_FEATURES, dtype=np.float64)
+
+    # --- Raw (7) ---
+    row[:7] = f0
+
+    if invalid_lookback:
+        # Derivatives all NaN; raw features still set above
+        row[7:] = np.nan
+    else:
+        # --- 1st derivatives (7) ---
+        d1_0 = f0 - f1
+        d1_1 = f1 - f2
+        d1_2 = f2 - f3
+        row[7:14] = d1_0
+
+        # --- 2nd derivatives (7) ---
+        d2_0 = d1_0 - d1_1
+        d2_1 = d1_1 - d1_2
+        row[14:21] = d2_0
+
+        # --- 3rd derivatives (7) ---
+        d3_0 = d2_0 - d2_1
+        row[21:28] = d3_0
+
+        # --- Cross-TF velocity at entry bar ---
+        vi = VEL_IDX  # [1,2,3,4]
+        offset = 28
+        for pi, pj in pairs:                           # 6 diffs
+            row[offset] = f0[vi[pi]] - f0[vi[pj]]
+            offset += 1
+        for pi, pj in pairs:                           # 6 ratios
+            denom = f0[vi[pj]]
+            row[offset] = f0[vi[pi]] / denom if (np.isfinite(denom) and denom != 0.0) else np.nan
+            offset += 1
+
+        # --- Instability interactions ---
+        inst50  = f0[5]
+        inst150 = f0[6]
+        vd      = f0[0]
+        row[offset]     = inst50 / inst150 if (np.isfinite(inst150) and inst150 != 0.0) else np.nan
+        row[offset + 1] = inst50 * inst150
+        row[offset + 2] = vd * inst50
+        row[offset + 3] = vd * inst150
+        offset += 4
+
+        # --- Historical context: collect vel_div over last 5 valid bars ---
+        # Use last 5 global bars [eb-4..eb] for vd_min5; [eb-2..eb] for mean3/std3
+        vd_last5 = np.array([_get_row(eb - k)[0] for k in range(4, -1, -1)])
+        vd_last3 = vd_last5[2:]  # [eb-2, eb-1, eb]
+        v50_last3 = np.array([_get_row(eb - k)[1] for k in range(2, -1, -1)])
+        v750_last3 = np.array([_get_row(eb - k)[4] for k in range(2, -1, -1)])
+
+        def _safe_mean(arr):
+            valid = arr[np.isfinite(arr)]
+            return float(np.mean(valid)) if len(valid) > 0 else np.nan
+
+        def _safe_std(arr):
+            valid = arr[np.isfinite(arr)]
+            return float(np.std(valid)) if len(valid) > 1 else np.nan
+
+        def _safe_min(arr):
+            valid = arr[np.isfinite(arr)]
+            return float(np.min(valid)) if len(valid) > 0 else np.nan
+
+        row[offset]     = _safe_mean(vd_last3)
+        row[offset + 1] = _safe_std(vd_last3)
+        row[offset + 2] = _safe_min(vd_last5)
+        row[offset + 3] = _safe_mean(v50_last3)
+        row[offset + 4] = _safe_mean(v750_last3)
+
+    trade_feat_rows.append(row)
+
+    pnl_abs = t.pnl_absolute
+    outcome_rows.append({
+        'trade_id':       t.trade_id,
+        'asset':          t.asset,
+        'direction':      t.direction,
+        'entry_bar':      t.entry_bar,
+        'exit_bar':       t.exit_bar,
+        'bars_held':      t.bars_held,
+        'exit_reason':    t.exit_reason,
+        'leverage':       t.leverage,
+        'notional':       t.notional,
+        'pnl_pct':        t.pnl_pct,
+        'pnl_absolute':   pnl_abs,
+        'winner':         int(pnl_abs > 0),
+        'is_tp':          int(t.exit_reason == 'FIXED_TP'),
+        'is_maxhold_loss':int(t.exit_reason == 'MAX_HOLD' and pnl_abs <= 0),
+        'is_maxhold_win': int(t.exit_reason == 'MAX_HOLD' and pnl_abs > 0),
+        'invalid_lookback': int(invalid_lookback),
+    })
+
+feat_matrix = np.array(trade_feat_rows, dtype=np.float64)  # (N_trades, N_FEATURES)
+print(f"  Feature matrix: {feat_matrix.shape}")
+print(f"  Trades with invalid lookback: {sum(o['invalid_lookback'] for o in outcome_rows)}")
+
+# ── Step 4: Clip extreme ratios at 99th percentile ──────────────────────────────
+print("\nClipping ratio features at 99th percentile...")
+ratio_start = 28 + 6  # after diffs
+ratio_end   = ratio_start + 6
+for col_i in range(ratio_start, ratio_end):
+    col = feat_matrix[:, col_i]
+    valid = col[np.isfinite(col)]
+    if len(valid) > 10:
+        lo, hi = np.percentile(valid, 1), np.percentile(valid, 99)
+        feat_matrix[:, col_i] = np.clip(col, lo, hi)
+
+# ── Step 5: Statistical analysis ────────────────────────────────────────────────
+print("Running statistical analysis...")
+try:
+    from scipy.stats import ks_2samp, pearsonr, pointbiserialr
+    HAS_SCIPY = True
+except ImportError:
+    HAS_SCIPY = False
+    print("  WARNING: scipy not available — KS + point-biserial skipped")
+
+try:
+    from sklearn.metrics import roc_auc_score as _sklearn_auc
+    def roc_auc_score(y_true, y_score):
+        return _sklearn_auc(y_true, y_score)
+    HAS_SKLEARN = True
+except ImportError:
+    HAS_SKLEARN = False
+    def roc_auc_score(y_true, y_score):
+        """Manual ROC-AUC via rank sum."""
+        y_true = np.asarray(y_true, dtype=np.float64)
+        y_score = np.asarray(y_score, dtype=np.float64)
+        n1 = int(np.sum(y_true == 1))
+        n0 = len(y_true) - n1
+        if n1 == 0 or n0 == 0:
+            return np.nan
+        order = np.argsort(y_score)
+        ranks = np.empty(len(y_score), dtype=np.float64)
+        ranks[order] = np.arange(1, len(y_score) + 1)
+        auc = (np.sum(ranks[y_true == 1]) - n1 * (n1 + 1) / 2) / (n1 * n0)
+        return float(auc)
+
+pnl_pct_arr = np.array([o['pnl_pct'] for o in outcome_rows])
+winner_arr  = np.array([o['winner']   for o in outcome_rows])
+is_tp_arr   = np.array([o['is_tp']    for o in outcome_rows])
+is_mhl_arr  = np.array([o['is_maxhold_loss'] for o in outcome_rows])
+
+analysis_rows = []
+
+for fi, fname in enumerate(feat_names):
+    col = feat_matrix[:, fi]
+    valid_mask = np.isfinite(col) & np.isfinite(pnl_pct_arr)
+    n_valid = int(np.sum(valid_mask))
+
+    if n_valid < 10:
+        analysis_rows.append({
+            'feature': fname, 'n_valid': n_valid,
+            'pearson_r': np.nan, 'pearson_p': np.nan, 'pb_corr': np.nan,
+            'ks_stat_winner': np.nan, 'ks_pval_winner': np.nan,
+            'roc_auc_winner': np.nan, 'roc_auc_maxhold_loss': np.nan,
+            'roc_auc_tp': np.nan, 'winner_mean': np.nan, 'loser_mean': np.nan,
+        })
+        continue
+
+    x = col[valid_mask]
+    y_pnl = pnl_pct_arr[valid_mask]
+    y_win = winner_arr[valid_mask]
+    y_tp  = is_tp_arr[valid_mask]
+    y_mhl = is_mhl_arr[valid_mask]
+
+    # Pearson r with pnl_pct
+    if HAS_SCIPY:
+        pr, pp = pearsonr(x, y_pnl)
+    else:
+        cov = np.cov(x, y_pnl)[0, 1]
+        pr = cov / (np.std(x) * np.std(y_pnl) + 1e-15)
+        pp = np.nan
+
+    # Point-biserial
+    pb = np.nan
+    if HAS_SCIPY and len(np.unique(y_win)) == 2:
+        pb, _ = pointbiserialr(y_win, x)
+
+    # KS stat winner
+    ks_stat, ks_pval = np.nan, np.nan
+    if HAS_SCIPY and len(np.unique(y_win)) == 2:
+        w_vals = x[y_win == 1]
+        l_vals = x[y_win == 0]
+        if len(w_vals) >= 2 and len(l_vals) >= 2:
+            ks_stat, ks_pval = ks_2samp(w_vals, l_vals)
+
+    # ROC-AUC: winner (take max of both directions)
+    roc_w = np.nan
+    if len(np.unique(y_win)) == 2:
+        try:
+            a1 = roc_auc_score(y_win, x)
+            roc_w = max(a1, 1.0 - a1)
+        except Exception:
+            pass
+
+    # ROC-AUC: MAX_HOLD loss
+    roc_mhl = np.nan
+    if len(np.unique(y_mhl)) == 2:
+        try:
+            a1 = roc_auc_score(y_mhl, x)
+            roc_mhl = max(a1, 1.0 - a1)
+        except Exception:
+            pass
+
+    # ROC-AUC: TP
+    roc_tp = np.nan
+    if len(np.unique(y_tp)) == 2:
+        try:
+            a1 = roc_auc_score(y_tp, x)
+            roc_tp = max(a1, 1.0 - a1)
+        except Exception:
+            pass
+
+    # Winner / loser mean
+    w_mean = float(np.mean(x[y_win == 1])) if np.any(y_win == 1) else np.nan
+    l_mean = float(np.mean(x[y_win == 0])) if np.any(y_win == 0) else np.nan
+
+    analysis_rows.append({
+        'feature': fname, 'n_valid': n_valid,
+        'pearson_r': float(pr), 'pearson_p': float(pp) if not np.isnan(pp) else np.nan,
+        'pb_corr': float(pb) if not np.isnan(pb) else np.nan,
+        'ks_stat_winner': float(ks_stat) if not np.isnan(ks_stat) else np.nan,
+        'ks_pval_winner': float(ks_pval) if not np.isnan(ks_pval) else np.nan,
+        'roc_auc_winner': float(roc_w) if not np.isnan(roc_w) else np.nan,
+        'roc_auc_maxhold_loss': float(roc_mhl) if not np.isnan(roc_mhl) else np.nan,
+        'roc_auc_tp': float(roc_tp) if not np.isnan(roc_tp) else np.nan,
+        'winner_mean': float(w_mean) if not np.isnan(w_mean) else np.nan,
+        'loser_mean': float(l_mean) if not np.isnan(l_mean) else np.nan,
+    })
+
+# ── Step 6: Print sorted tables ──────────────────────────────────────────────────
+def _fmt(v, fmt=".4f"):
+    return f"{v:{fmt}}" if (v is not None and not np.isnan(v)) else "    nan"
+
+def _print_table(rows, sort_key, title, n=20):
+    def _sk(r):
+        v = r[sort_key]
+        return float(abs(v)) if (v is not None and not np.isnan(v)) else 0.0
+    sorted_rows = sorted(rows, key=_sk, reverse=True)[:n]
+    print(f"\n{'═'*95}")
+    print(f"  {title}")
+    print(f"{'─'*95}")
+    print(f"  {'feature':<30}  {'n_valid':>7}  {'pearson_r':>9}  {'pb_corr':>7}  "
+          f"{'ks_stat':>7}  {'roc_win':>7}  {'roc_mhl':>7}  {'roc_tp':>7}  {'win_mean':>9}  {'los_mean':>9}")
+    print(f"{'─'*95}")
+    for r in sorted_rows:
+        print(f"  {r['feature']:<30}  {r['n_valid']:>7d}  {_fmt(r['pearson_r']):>9}  "
+              f"{_fmt(r['pb_corr']):>7}  {_fmt(r['ks_stat_winner']):>7}  "
+              f"{_fmt(r['roc_auc_winner']):>7}  {_fmt(r['roc_auc_maxhold_loss']):>7}  "
+              f"{_fmt(r['roc_auc_tp']):>7}  {_fmt(r['winner_mean']):>9}  {_fmt(r['loser_mean']):>9}")
+
+# Sort guards: handle nan safely
+def _key_roc_win(r):
+    v = r['roc_auc_winner']
+    return float(v) if (v is not None and not np.isnan(v)) else 0.0
+
+def _key_roc_mhl(r):
+    v = r['roc_auc_maxhold_loss']
+    return float(v) if (v is not None and not np.isnan(v)) else 0.0
+
+def _key_pearson(r):
+    v = r['pearson_r']
+    return abs(float(v)) if (v is not None and not np.isnan(v)) else 0.0
+
+_print_table(
+    sorted(analysis_rows, key=_key_roc_win, reverse=True),
+    'roc_auc_winner',
+    "TOP 20 BY ROC-AUC (winner discrimination)", n=20
+)
+
+_print_table(
+    sorted(analysis_rows, key=_key_roc_mhl, reverse=True),
+    'roc_auc_maxhold_loss',
+    "TOP 20 BY ROC-AUC (MAX_HOLD loss discrimination)", n=20
+)
+
+_print_table(
+    sorted(analysis_rows, key=_key_pearson, reverse=True),
+    'pearson_r',
+    "TOP 20 BY |Pearson r| (pnl_pct correlation)", n=20
+)
+
+# ── ASCII histograms for top 5 winner-discriminating features ───────────────────
+top5_feats = [r['feature'] for r in sorted(analysis_rows, key=_key_roc_win, reverse=True)[:5]]
+
+def ascii_hist_pair(feat_name, fi):
+    col = feat_matrix[:, fi]
+    w_vals = col[(winner_arr == 1) & np.isfinite(col)]
+    l_vals = col[(winner_arr == 0) & np.isfinite(col)]
+    if len(w_vals) < 5 or len(l_vals) < 5:
+        return
+    all_valid = col[np.isfinite(col)]
+    lo = float(np.percentile(all_valid, 1))
+    hi = float(np.percentile(all_valid, 99))
+    BINS = 15
+    edges = np.linspace(lo, hi, BINS + 1)
+
+    def _hist_bar(vals, edges):
+        counts, _ = np.histogram(vals, bins=edges)
+        return counts
+
+    wc = _hist_bar(np.clip(w_vals, lo, hi), edges)
+    lc = _hist_bar(np.clip(l_vals, lo, hi), edges)
+    w_tot = max(1, len(w_vals))
+    l_tot = max(1, len(l_vals))
+    bar_max = max(1, max(np.max(wc) / w_tot, np.max(lc) / l_tot))
+    WIDTH = 20
+
+    print(f"\n  {'─'*60}")
+    print(f"  {feat_name}  (winners n={w_tot}  losers n={l_tot}  range=[{lo:.4f}, {hi:.4f}])")
+    print(f"  {'─'*60}")
+    print(f"  {'bin':>22}   {'WINNER':>{WIDTH}}  {'LOSER':>{WIDTH}}")
+    for i in range(BINS):
+        lbl = f"[{edges[i]:+.4f},{edges[i+1]:+.4f})"
+        wbar = '█' * int(wc[i] / w_tot / bar_max * WIDTH)
+        lbar = '█' * int(lc[i] / l_tot / bar_max * WIDTH)
+        print(f"  {lbl:>22}   {wbar:<{WIDTH}}  {lbar:<{WIDTH}}")
+
+print(f"\n{'═'*95}")
+print(f"  ASCII HISTOGRAMS — TOP 5 WINNER-DISCRIMINATING FEATURES")
+
+for fname in top5_feats:
+    fi = feat_names.index(fname)
+    ascii_hist_pair(fname, fi)
+
+# ── Step 7: Save outputs ─────────────────────────────────────────────────────────
+LOG_DIR = Path(__file__).parent / "run_logs"
+LOG_DIR.mkdir(exist_ok=True)
+ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+# Per-trade feature matrix + outcomes
+feat_csv = LOG_DIR / f"entry_quality_features_{ts}.csv"
+with open(feat_csv, 'w', newline='') as f:
+    cw = csv.writer(f)
+    outcome_keys = ['trade_id', 'asset', 'direction', 'entry_bar', 'exit_bar',
+                    'bars_held', 'exit_reason', 'leverage', 'notional',
+                    'pnl_pct', 'pnl_absolute', 'winner', 'is_tp',
+                    'is_maxhold_loss', 'is_maxhold_win', 'invalid_lookback']
+    cw.writerow(outcome_keys + feat_names)
+    for i, o in enumerate(outcome_rows):
+        out_vals = [o[k] for k in outcome_keys]
+        feat_vals = [f"{v:.8g}" if np.isfinite(v) else '' for v in feat_matrix[i]]
+        cw.writerow(out_vals + feat_vals)
+
+# Feature analysis table
+sweep_csv = LOG_DIR / f"entry_quality_sweep_{ts}.csv"
+with open(sweep_csv, 'w', newline='') as f:
+    cw = csv.writer(f)
+    cw.writerow(['feature', 'n_valid', 'pearson_r', 'pearson_p', 'pb_corr',
+                 'ks_stat_winner', 'ks_pval_winner',
+                 'roc_auc_winner', 'roc_auc_maxhold_loss', 'roc_auc_tp',
+                 'winner_mean', 'loser_mean'])
+    for r in sorted(analysis_rows, key=_key_roc_win, reverse=True):
+        def fmtv(v): return f"{v:.6f}" if (v is not None and not np.isnan(v)) else ''
+        cw.writerow([r['feature'], r['n_valid'],
+                     fmtv(r['pearson_r']), fmtv(r['pearson_p']), fmtv(r['pb_corr']),
+                     fmtv(r['ks_stat_winner']), fmtv(r['ks_pval_winner']),
+                     fmtv(r['roc_auc_winner']), fmtv(r['roc_auc_maxhold_loss']),
+                     fmtv(r['roc_auc_tp']),
+                     fmtv(r['winner_mean']), fmtv(r['loser_mean'])])
+
+print(f"\n{'═'*95}")
+print(f"  per-trade features → {feat_csv}  ({len(outcome_rows)} rows, {N_FEATURES} features)")
+print(f"  feature analysis   → {sweep_csv}  ({len(analysis_rows)} rows)")
+print(f"{'═'*95}")