"""Entry-bar eigenvalue feature sweep — winner vs loser discrimination. Loads the 55-day champion dataset, re-runs the full engine stack to collect trade_history with entry_bar indices, then extracts a rich feature matrix (raw + derivatives + cross-TF combos + historical context) at each entry bar. Statistical analysis: - Pearson r vs pnl_pct - Point-biserial + KS stat vs winner binary - ROC-AUC for winner / MAX_HOLD-loss / TP discrimination Outputs: run_logs/entry_quality_features_.csv — per-trade feature matrix run_logs/entry_quality_sweep_.csv — per-feature analysis table """ import sys, time, csv from pathlib import Path from datetime import datetime sys.path.insert(0, str(Path(__file__).parent)) # ── Numba JIT warmup (must run BEFORE other imports that touch numpy internals) ── print("Compiling numba kernels...") t0c = time.time() from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb from nautilus_dolphin.nautilus.ob_features import ( OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb, compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb, compute_depth_asymmetry_nb, compute_imbalance_persistence_nb, compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb, ) from nautilus_dolphin.nautilus.ob_provider import MockOBProvider import numpy as np _p = np.array([1.0, 2.0, 3.0], dtype=np.float64) compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01) rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20) compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0, np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64), np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04) check_dc_nb(_p, 3, 1, 0.75) _b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64) _a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64) compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a) compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0) compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a) compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2) compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1) compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2) compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10) print(f" JIT: {time.time() - t0c:.1f}s") import pandas as pd from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker from mc.mc_ml import DolphinForewarner # ── Config (identical to test_pf_dynamic_beta_validate.py) ────────────────────── VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models")) META_COLS = { 'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', } ENGINE_KWARGS = dict( initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, tf_enabled=False, ) MC_BASE_CFG = { 'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50, 'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60, } FEAT_COLS = ['vel_div', 'v50', 'v150', 'v300', 'v750', 'inst50', 'inst150'] N_FEAT = len(FEAT_COLS) # 7 # ── Step 1: Load data + build global bar feature arrays ────────────────────────── print("\nLoading parquet files...") parquet_files = sorted(VBT_DIR.glob("*.parquet")) parquet_files = [p for p in parquet_files if 'catalog' not in str(p)] print(f" {len(parquet_files)} files") pq_data = {} for pf in parquet_files: df = pd.read_parquet(pf) ac = [c for c in df.columns if c not in META_COLS] bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None dv = np.full(len(df), np.nan) if bp is not None: for i in range(50, len(bp)): seg = bp[max(0, i-50):i] if len(seg) >= 10: dv[i] = float(np.std(np.diff(seg) / seg[:-1])) pq_data[pf.stem] = (df, ac, dv) # vol_p60 from first 2 files (matches reference) all_vols = [] for pf in parquet_files[:2]: df, _, _ = pq_data[pf.stem] if 'BTCUSDT' not in df.columns: continue pr = df['BTCUSDT'].values for i in range(60, len(pr)): seg = pr[max(0, i-50):i] if len(seg) >= 10: v = float(np.std(np.diff(seg) / seg[:-1])) if v > 0: all_vols.append(v) vol_p60 = float(np.percentile(all_vols, 60)) print(f"\nBuilding global bar feature arrays...") # gbar_features: list accumulator → converted to np.ndarray after full pass # Layout: [vel_div, v50, v150, v300, v750, inst50, inst150] # gbar_valid_mask: array[bool] indexed by global_bar_idx # gbar_to_rownum: global_bar_idx → index into gbar_features (only valid bars) # Every bar (including NaN-vd bars) increments the global counter, matching engine. feat_accum = [] # only valid bars appended gbar_valid_mask_list = [] # one bool per global bar gbar_to_rownum = {} # global_bar_idx → row index in feat_accum g = 0 for pf in parquet_files: df, _, _ = pq_data[pf.stem] vd_col = df['vel_div'].values v50_col = df['v50_lambda_max_velocity'].values v150_col = df['v150_lambda_max_velocity'].values v300_col = df['v300_lambda_max_velocity'].values v750_col = df['v750_lambda_max_velocity'].values i50_col = df['instability_50'].values i150_col = df['instability_150'].values for ri in range(len(df)): vd = vd_col[ri] valid = np.isfinite(vd) if valid: row_idx = len(feat_accum) feat_accum.append([ float(vd), float(v50_col[ri]) if np.isfinite(v50_col[ri]) else np.nan, float(v150_col[ri]) if np.isfinite(v150_col[ri]) else np.nan, float(v300_col[ri]) if np.isfinite(v300_col[ri]) else np.nan, float(v750_col[ri]) if np.isfinite(v750_col[ri]) else np.nan, float(i50_col[ri]) if np.isfinite(i50_col[ri]) else np.nan, float(i150_col[ri]) if np.isfinite(i150_col[ri]) else np.nan, ]) gbar_to_rownum[g] = row_idx gbar_valid_mask_list.append(True) else: gbar_valid_mask_list.append(False) g += 1 gbar_features = np.array(feat_accum, dtype=np.float64) # (N_valid, 7) gbar_valid_mask = np.array(gbar_valid_mask_list, dtype=bool) N_total_gbars = g print(f" Total global bars: {N_total_gbars}") print(f" Valid bars (non-NaN vel_div): {len(feat_accum)} ({len(feat_accum)/N_total_gbars*100:.1f}%)") # ── Step 2: Run champion engine ────────────────────────────────────────────────── print("\nLoading MC-Forewarner...") forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR) print(" OK") acb = AdaptiveCircuitBreaker() date_strings = [pf.stem for pf in parquet_files] acb.preload_w750(date_strings) OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"] _mock_ob = MockOBProvider( imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS, imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092, "BNBUSDT": +0.05, "SOLUSDT": +0.05}, ) ob_eng = OBFeatureEngine(_mock_ob) ob_eng.preload_date("mock", OB_ASSETS) print(f"\n=== Running champion engine (55 days) ===") t0 = time.time() engine = NDAlphaEngine(**ENGINE_KWARGS) engine.set_ob_engine(ob_eng) engine.set_acb(acb) engine.set_mc_forewarner(forewarner, MC_BASE_CFG) engine.set_esoteric_hazard_multiplier(0.0) for pf in parquet_files: ds = pf.stem df, acols, dvol = pq_data[ds] vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) engine.process_day(ds, df, acols, vol_regime_ok=vol_ok) tr = engine.trade_history roi = (engine.capital - 25000) / 25000 * 100 w_count = sum(1 for t in tr if t.pnl_absolute > 0) print(f" {time.time()-t0:.1f}s | {len(tr)} trades | ROI={roi:+.2f}% | WR={w_count/len(tr)*100:.1f}%") # ── Step 3: Extract entry-bar features ────────────────────────────────────────── print(f"\nExtracting entry-bar features for {len(tr)} trades...") def _get_row(gbar_idx): """Return feature vector for a global bar, or all-NaN if invalid.""" if gbar_idx < 0 or not gbar_valid_mask[gbar_idx]: return np.full(N_FEAT, np.nan) ri = gbar_to_rownum.get(gbar_idx) if ri is None: return np.full(N_FEAT, np.nan) return gbar_features[ri] # Feature name catalogue feat_names = [] feat_names += FEAT_COLS # 7 raw feat_names += [f"d1_{c}" for c in FEAT_COLS] # 7 first-deriv feat_names += [f"d2_{c}" for c in FEAT_COLS] # 7 second-deriv feat_names += [f"d3_{c}" for c in FEAT_COLS] # 7 third-deriv VEL_NAMES = ['v50', 'v150', 'v300', 'v750'] VEL_IDX = [1, 2, 3, 4] # indices in FEAT_COLS pairs = [(i, j) for i in range(4) for j in range(i+1, 4)] for pi, pj in pairs: feat_names.append(f"diff_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}") # 6 diffs for pi, pj in pairs: feat_names.append(f"ratio_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}") # 6 ratios feat_names += [ 'inter_inst_ratio', # inst50/inst150 'inter_inst_prod', # inst50*inst150 'inter_vd_inst50', # vel_div*inst50 'inter_vd_inst150', # vel_div*inst150 ] # 4 instability interactions feat_names += [ 'hist_vd_mean3', # mean vel_div last 3 bars 'hist_vd_std3', # std vel_div last 3 bars 'hist_vd_min5', # min vel_div last 5 bars 'hist_v50_mean3', # mean v50 last 3 bars 'hist_v750_mean3', # mean v750 last 3 bars ] # 5 historical context N_FEATURES = len(feat_names) # should be 52 print(f" Feature count: {N_FEATURES}") trade_feat_rows = [] # list of np.ndarray length N_FEATURES outcome_rows = [] # list of dicts NAN_VEC = np.full(N_FEATURES, np.nan) for t in tr: eb = t.entry_bar # Raw features at entry bar f0 = _get_row(eb) f1 = _get_row(eb - 1) f2 = _get_row(eb - 2) f3 = _get_row(eb - 3) # Check if valid lookback (all 4 bars must be valid) invalid_lookback = ( eb - 3 < 0 or not gbar_valid_mask[eb] or not gbar_valid_mask[eb - 1] or not gbar_valid_mask[eb - 2] or not gbar_valid_mask[eb - 3] ) row = np.empty(N_FEATURES, dtype=np.float64) # --- Raw (7) --- row[:7] = f0 if invalid_lookback: # Derivatives all NaN; raw features still set above row[7:] = np.nan else: # --- 1st derivatives (7) --- d1_0 = f0 - f1 d1_1 = f1 - f2 d1_2 = f2 - f3 row[7:14] = d1_0 # --- 2nd derivatives (7) --- d2_0 = d1_0 - d1_1 d2_1 = d1_1 - d1_2 row[14:21] = d2_0 # --- 3rd derivatives (7) --- d3_0 = d2_0 - d2_1 row[21:28] = d3_0 # --- Cross-TF velocity at entry bar --- vi = VEL_IDX # [1,2,3,4] offset = 28 for pi, pj in pairs: # 6 diffs row[offset] = f0[vi[pi]] - f0[vi[pj]] offset += 1 for pi, pj in pairs: # 6 ratios denom = f0[vi[pj]] row[offset] = f0[vi[pi]] / denom if (np.isfinite(denom) and denom != 0.0) else np.nan offset += 1 # --- Instability interactions --- inst50 = f0[5] inst150 = f0[6] vd = f0[0] row[offset] = inst50 / inst150 if (np.isfinite(inst150) and inst150 != 0.0) else np.nan row[offset + 1] = inst50 * inst150 row[offset + 2] = vd * inst50 row[offset + 3] = vd * inst150 offset += 4 # --- Historical context: collect vel_div over last 5 valid bars --- # Use last 5 global bars [eb-4..eb] for vd_min5; [eb-2..eb] for mean3/std3 vd_last5 = np.array([_get_row(eb - k)[0] for k in range(4, -1, -1)]) vd_last3 = vd_last5[2:] # [eb-2, eb-1, eb] v50_last3 = np.array([_get_row(eb - k)[1] for k in range(2, -1, -1)]) v750_last3 = np.array([_get_row(eb - k)[4] for k in range(2, -1, -1)]) def _safe_mean(arr): valid = arr[np.isfinite(arr)] return float(np.mean(valid)) if len(valid) > 0 else np.nan def _safe_std(arr): valid = arr[np.isfinite(arr)] return float(np.std(valid)) if len(valid) > 1 else np.nan def _safe_min(arr): valid = arr[np.isfinite(arr)] return float(np.min(valid)) if len(valid) > 0 else np.nan row[offset] = _safe_mean(vd_last3) row[offset + 1] = _safe_std(vd_last3) row[offset + 2] = _safe_min(vd_last5) row[offset + 3] = _safe_mean(v50_last3) row[offset + 4] = _safe_mean(v750_last3) trade_feat_rows.append(row) pnl_abs = t.pnl_absolute outcome_rows.append({ 'trade_id': t.trade_id, 'asset': t.asset, 'direction': t.direction, 'entry_bar': t.entry_bar, 'exit_bar': t.exit_bar, 'bars_held': t.bars_held, 'exit_reason': t.exit_reason, 'leverage': t.leverage, 'notional': t.notional, 'pnl_pct': t.pnl_pct, 'pnl_absolute': pnl_abs, 'winner': int(pnl_abs > 0), 'is_tp': int(t.exit_reason == 'FIXED_TP'), 'is_maxhold_loss':int(t.exit_reason == 'MAX_HOLD' and pnl_abs <= 0), 'is_maxhold_win': int(t.exit_reason == 'MAX_HOLD' and pnl_abs > 0), 'invalid_lookback': int(invalid_lookback), }) feat_matrix = np.array(trade_feat_rows, dtype=np.float64) # (N_trades, N_FEATURES) print(f" Feature matrix: {feat_matrix.shape}") print(f" Trades with invalid lookback: {sum(o['invalid_lookback'] for o in outcome_rows)}") # ── Step 4: Clip extreme ratios at 99th percentile ────────────────────────────── print("\nClipping ratio features at 99th percentile...") ratio_start = 28 + 6 # after diffs ratio_end = ratio_start + 6 for col_i in range(ratio_start, ratio_end): col = feat_matrix[:, col_i] valid = col[np.isfinite(col)] if len(valid) > 10: lo, hi = np.percentile(valid, 1), np.percentile(valid, 99) feat_matrix[:, col_i] = np.clip(col, lo, hi) # ── Step 5: Statistical analysis ──────────────────────────────────────────────── print("Running statistical analysis...") try: from scipy.stats import ks_2samp, pearsonr, pointbiserialr HAS_SCIPY = True except ImportError: HAS_SCIPY = False print(" WARNING: scipy not available — KS + point-biserial skipped") try: from sklearn.metrics import roc_auc_score as _sklearn_auc def roc_auc_score(y_true, y_score): return _sklearn_auc(y_true, y_score) HAS_SKLEARN = True except ImportError: HAS_SKLEARN = False def roc_auc_score(y_true, y_score): """Manual ROC-AUC via rank sum.""" y_true = np.asarray(y_true, dtype=np.float64) y_score = np.asarray(y_score, dtype=np.float64) n1 = int(np.sum(y_true == 1)) n0 = len(y_true) - n1 if n1 == 0 or n0 == 0: return np.nan order = np.argsort(y_score) ranks = np.empty(len(y_score), dtype=np.float64) ranks[order] = np.arange(1, len(y_score) + 1) auc = (np.sum(ranks[y_true == 1]) - n1 * (n1 + 1) / 2) / (n1 * n0) return float(auc) pnl_pct_arr = np.array([o['pnl_pct'] for o in outcome_rows]) winner_arr = np.array([o['winner'] for o in outcome_rows]) is_tp_arr = np.array([o['is_tp'] for o in outcome_rows]) is_mhl_arr = np.array([o['is_maxhold_loss'] for o in outcome_rows]) analysis_rows = [] for fi, fname in enumerate(feat_names): col = feat_matrix[:, fi] valid_mask = np.isfinite(col) & np.isfinite(pnl_pct_arr) n_valid = int(np.sum(valid_mask)) if n_valid < 10: analysis_rows.append({ 'feature': fname, 'n_valid': n_valid, 'pearson_r': np.nan, 'pearson_p': np.nan, 'pb_corr': np.nan, 'ks_stat_winner': np.nan, 'ks_pval_winner': np.nan, 'roc_auc_winner': np.nan, 'roc_auc_maxhold_loss': np.nan, 'roc_auc_tp': np.nan, 'winner_mean': np.nan, 'loser_mean': np.nan, }) continue x = col[valid_mask] y_pnl = pnl_pct_arr[valid_mask] y_win = winner_arr[valid_mask] y_tp = is_tp_arr[valid_mask] y_mhl = is_mhl_arr[valid_mask] # Pearson r with pnl_pct if HAS_SCIPY: pr, pp = pearsonr(x, y_pnl) else: cov = np.cov(x, y_pnl)[0, 1] pr = cov / (np.std(x) * np.std(y_pnl) + 1e-15) pp = np.nan # Point-biserial pb = np.nan if HAS_SCIPY and len(np.unique(y_win)) == 2: pb, _ = pointbiserialr(y_win, x) # KS stat winner ks_stat, ks_pval = np.nan, np.nan if HAS_SCIPY and len(np.unique(y_win)) == 2: w_vals = x[y_win == 1] l_vals = x[y_win == 0] if len(w_vals) >= 2 and len(l_vals) >= 2: ks_stat, ks_pval = ks_2samp(w_vals, l_vals) # ROC-AUC: winner (take max of both directions) roc_w = np.nan if len(np.unique(y_win)) == 2: try: a1 = roc_auc_score(y_win, x) roc_w = max(a1, 1.0 - a1) except Exception: pass # ROC-AUC: MAX_HOLD loss roc_mhl = np.nan if len(np.unique(y_mhl)) == 2: try: a1 = roc_auc_score(y_mhl, x) roc_mhl = max(a1, 1.0 - a1) except Exception: pass # ROC-AUC: TP roc_tp = np.nan if len(np.unique(y_tp)) == 2: try: a1 = roc_auc_score(y_tp, x) roc_tp = max(a1, 1.0 - a1) except Exception: pass # Winner / loser mean w_mean = float(np.mean(x[y_win == 1])) if np.any(y_win == 1) else np.nan l_mean = float(np.mean(x[y_win == 0])) if np.any(y_win == 0) else np.nan analysis_rows.append({ 'feature': fname, 'n_valid': n_valid, 'pearson_r': float(pr), 'pearson_p': float(pp) if not np.isnan(pp) else np.nan, 'pb_corr': float(pb) if not np.isnan(pb) else np.nan, 'ks_stat_winner': float(ks_stat) if not np.isnan(ks_stat) else np.nan, 'ks_pval_winner': float(ks_pval) if not np.isnan(ks_pval) else np.nan, 'roc_auc_winner': float(roc_w) if not np.isnan(roc_w) else np.nan, 'roc_auc_maxhold_loss': float(roc_mhl) if not np.isnan(roc_mhl) else np.nan, 'roc_auc_tp': float(roc_tp) if not np.isnan(roc_tp) else np.nan, 'winner_mean': float(w_mean) if not np.isnan(w_mean) else np.nan, 'loser_mean': float(l_mean) if not np.isnan(l_mean) else np.nan, }) # ── Step 6: Print sorted tables ────────────────────────────────────────────────── def _fmt(v, fmt=".4f"): return f"{v:{fmt}}" if (v is not None and not np.isnan(v)) else " nan" def _print_table(rows, sort_key, title, n=20): def _sk(r): v = r[sort_key] return float(abs(v)) if (v is not None and not np.isnan(v)) else 0.0 sorted_rows = sorted(rows, key=_sk, reverse=True)[:n] print(f"\n{'═'*95}") print(f" {title}") print(f"{'─'*95}") print(f" {'feature':<30} {'n_valid':>7} {'pearson_r':>9} {'pb_corr':>7} " f"{'ks_stat':>7} {'roc_win':>7} {'roc_mhl':>7} {'roc_tp':>7} {'win_mean':>9} {'los_mean':>9}") print(f"{'─'*95}") for r in sorted_rows: print(f" {r['feature']:<30} {r['n_valid']:>7d} {_fmt(r['pearson_r']):>9} " f"{_fmt(r['pb_corr']):>7} {_fmt(r['ks_stat_winner']):>7} " f"{_fmt(r['roc_auc_winner']):>7} {_fmt(r['roc_auc_maxhold_loss']):>7} " f"{_fmt(r['roc_auc_tp']):>7} {_fmt(r['winner_mean']):>9} {_fmt(r['loser_mean']):>9}") # Sort guards: handle nan safely def _key_roc_win(r): v = r['roc_auc_winner'] return float(v) if (v is not None and not np.isnan(v)) else 0.0 def _key_roc_mhl(r): v = r['roc_auc_maxhold_loss'] return float(v) if (v is not None and not np.isnan(v)) else 0.0 def _key_pearson(r): v = r['pearson_r'] return abs(float(v)) if (v is not None and not np.isnan(v)) else 0.0 _print_table( sorted(analysis_rows, key=_key_roc_win, reverse=True), 'roc_auc_winner', "TOP 20 BY ROC-AUC (winner discrimination)", n=20 ) _print_table( sorted(analysis_rows, key=_key_roc_mhl, reverse=True), 'roc_auc_maxhold_loss', "TOP 20 BY ROC-AUC (MAX_HOLD loss discrimination)", n=20 ) _print_table( sorted(analysis_rows, key=_key_pearson, reverse=True), 'pearson_r', "TOP 20 BY |Pearson r| (pnl_pct correlation)", n=20 ) # ── ASCII histograms for top 5 winner-discriminating features ─────────────────── top5_feats = [r['feature'] for r in sorted(analysis_rows, key=_key_roc_win, reverse=True)[:5]] def ascii_hist_pair(feat_name, fi): col = feat_matrix[:, fi] w_vals = col[(winner_arr == 1) & np.isfinite(col)] l_vals = col[(winner_arr == 0) & np.isfinite(col)] if len(w_vals) < 5 or len(l_vals) < 5: return all_valid = col[np.isfinite(col)] lo = float(np.percentile(all_valid, 1)) hi = float(np.percentile(all_valid, 99)) BINS = 15 edges = np.linspace(lo, hi, BINS + 1) def _hist_bar(vals, edges): counts, _ = np.histogram(vals, bins=edges) return counts wc = _hist_bar(np.clip(w_vals, lo, hi), edges) lc = _hist_bar(np.clip(l_vals, lo, hi), edges) w_tot = max(1, len(w_vals)) l_tot = max(1, len(l_vals)) bar_max = max(1, max(np.max(wc) / w_tot, np.max(lc) / l_tot)) WIDTH = 20 print(f"\n {'─'*60}") print(f" {feat_name} (winners n={w_tot} losers n={l_tot} range=[{lo:.4f}, {hi:.4f}])") print(f" {'─'*60}") print(f" {'bin':>22} {'WINNER':>{WIDTH}} {'LOSER':>{WIDTH}}") for i in range(BINS): lbl = f"[{edges[i]:+.4f},{edges[i+1]:+.4f})" wbar = '█' * int(wc[i] / w_tot / bar_max * WIDTH) lbar = '█' * int(lc[i] / l_tot / bar_max * WIDTH) print(f" {lbl:>22} {wbar:<{WIDTH}} {lbar:<{WIDTH}}") print(f"\n{'═'*95}") print(f" ASCII HISTOGRAMS — TOP 5 WINNER-DISCRIMINATING FEATURES") for fname in top5_feats: fi = feat_names.index(fname) ascii_hist_pair(fname, fi) # ── Step 7: Save outputs ───────────────────────────────────────────────────────── LOG_DIR = Path(__file__).parent / "run_logs" LOG_DIR.mkdir(exist_ok=True) ts = datetime.now().strftime("%Y%m%d_%H%M%S") # Per-trade feature matrix + outcomes feat_csv = LOG_DIR / f"entry_quality_features_{ts}.csv" with open(feat_csv, 'w', newline='') as f: cw = csv.writer(f) outcome_keys = ['trade_id', 'asset', 'direction', 'entry_bar', 'exit_bar', 'bars_held', 'exit_reason', 'leverage', 'notional', 'pnl_pct', 'pnl_absolute', 'winner', 'is_tp', 'is_maxhold_loss', 'is_maxhold_win', 'invalid_lookback'] cw.writerow(outcome_keys + feat_names) for i, o in enumerate(outcome_rows): out_vals = [o[k] for k in outcome_keys] feat_vals = [f"{v:.8g}" if np.isfinite(v) else '' for v in feat_matrix[i]] cw.writerow(out_vals + feat_vals) # Feature analysis table sweep_csv = LOG_DIR / f"entry_quality_sweep_{ts}.csv" with open(sweep_csv, 'w', newline='') as f: cw = csv.writer(f) cw.writerow(['feature', 'n_valid', 'pearson_r', 'pearson_p', 'pb_corr', 'ks_stat_winner', 'ks_pval_winner', 'roc_auc_winner', 'roc_auc_maxhold_loss', 'roc_auc_tp', 'winner_mean', 'loser_mean']) for r in sorted(analysis_rows, key=_key_roc_win, reverse=True): def fmtv(v): return f"{v:.6f}" if (v is not None and not np.isnan(v)) else '' cw.writerow([r['feature'], r['n_valid'], fmtv(r['pearson_r']), fmtv(r['pearson_p']), fmtv(r['pb_corr']), fmtv(r['ks_stat_winner']), fmtv(r['ks_pval_winner']), fmtv(r['roc_auc_winner']), fmtv(r['roc_auc_maxhold_loss']), fmtv(r['roc_auc_tp']), fmtv(r['winner_mean']), fmtv(r['loser_mean'])]) print(f"\n{'═'*95}") print(f" per-trade features → {feat_csv} ({len(outcome_rows)} rows, {N_FEATURES} features)") print(f" feature analysis → {sweep_csv} ({len(analysis_rows)} rows)") print(f"{'═'*95}")