initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

View File

@@ -0,0 +1,656 @@
"""Entry-bar eigenvalue feature sweep — winner vs loser discrimination.
Loads the 55-day champion dataset, re-runs the full engine stack to collect
trade_history with entry_bar indices, then extracts a rich feature matrix
(raw + derivatives + cross-TF combos + historical context) at each entry bar.
Statistical analysis:
- Pearson r vs pnl_pct
- Point-biserial + KS stat vs winner binary
- ROC-AUC for winner / MAX_HOLD-loss / TP discrimination
Outputs:
run_logs/entry_quality_features_<ts>.csv — per-trade feature matrix
run_logs/entry_quality_sweep_<ts>.csv — per-feature analysis table
"""
import sys, time, csv
from pathlib import Path
from datetime import datetime
sys.path.insert(0, str(Path(__file__).parent))
# ── Numba JIT warmup (must run BEFORE other imports that touch numpy internals) ──
print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
import numpy as np
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f" JIT: {time.time() - t0c:.1f}s")
import pandas as pd
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from mc.mc_ml import DolphinForewarner
# ── Config (identical to test_pf_dynamic_beta_validate.py) ──────────────────────
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
META_COLS = {
'timestamp', 'scan_number',
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
'vel_div', 'instability_50', 'instability_150',
}
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
tf_enabled=False,
)
MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
FEAT_COLS = ['vel_div', 'v50', 'v150', 'v300', 'v750', 'inst50', 'inst150']
N_FEAT = len(FEAT_COLS) # 7
# ── Step 1: Load data + build global bar feature arrays ──────────────────────────
print("\nLoading parquet files...")
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
print(f" {len(parquet_files)} files")
pq_data = {}
for pf in parquet_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0, i-50):i]
if len(seg) >= 10:
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
# vol_p60 from first 2 files (matches reference)
all_vols = []
for pf in parquet_files[:2]:
df, _, _ = pq_data[pf.stem]
if 'BTCUSDT' not in df.columns:
continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) >= 10:
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0:
all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
print(f"\nBuilding global bar feature arrays...")
# gbar_features: list accumulator → converted to np.ndarray after full pass
# Layout: [vel_div, v50, v150, v300, v750, inst50, inst150]
# gbar_valid_mask: array[bool] indexed by global_bar_idx
# gbar_to_rownum: global_bar_idx → index into gbar_features (only valid bars)
# Every bar (including NaN-vd bars) increments the global counter, matching engine.
feat_accum = [] # only valid bars appended
gbar_valid_mask_list = [] # one bool per global bar
gbar_to_rownum = {} # global_bar_idx → row index in feat_accum
g = 0
for pf in parquet_files:
df, _, _ = pq_data[pf.stem]
vd_col = df['vel_div'].values
v50_col = df['v50_lambda_max_velocity'].values
v150_col = df['v150_lambda_max_velocity'].values
v300_col = df['v300_lambda_max_velocity'].values
v750_col = df['v750_lambda_max_velocity'].values
i50_col = df['instability_50'].values
i150_col = df['instability_150'].values
for ri in range(len(df)):
vd = vd_col[ri]
valid = np.isfinite(vd)
if valid:
row_idx = len(feat_accum)
feat_accum.append([
float(vd),
float(v50_col[ri]) if np.isfinite(v50_col[ri]) else np.nan,
float(v150_col[ri]) if np.isfinite(v150_col[ri]) else np.nan,
float(v300_col[ri]) if np.isfinite(v300_col[ri]) else np.nan,
float(v750_col[ri]) if np.isfinite(v750_col[ri]) else np.nan,
float(i50_col[ri]) if np.isfinite(i50_col[ri]) else np.nan,
float(i150_col[ri]) if np.isfinite(i150_col[ri]) else np.nan,
])
gbar_to_rownum[g] = row_idx
gbar_valid_mask_list.append(True)
else:
gbar_valid_mask_list.append(False)
g += 1
gbar_features = np.array(feat_accum, dtype=np.float64) # (N_valid, 7)
gbar_valid_mask = np.array(gbar_valid_mask_list, dtype=bool)
N_total_gbars = g
print(f" Total global bars: {N_total_gbars}")
print(f" Valid bars (non-NaN vel_div): {len(feat_accum)} ({len(feat_accum)/N_total_gbars*100:.1f}%)")
# ── Step 2: Run champion engine ──────────────────────────────────────────────────
print("\nLoading MC-Forewarner...")
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" OK")
acb = AdaptiveCircuitBreaker()
date_strings = [pf.stem for pf in parquet_files]
acb.preload_w750(date_strings)
OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
print(f"\n=== Running champion engine (55 days) ===")
t0 = time.time()
engine = NDAlphaEngine(**ENGINE_KWARGS)
engine.set_ob_engine(ob_eng)
engine.set_acb(acb)
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
engine.set_esoteric_hazard_multiplier(0.0)
for pf in parquet_files:
ds = pf.stem
df, acols, dvol = pq_data[ds]
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
tr = engine.trade_history
roi = (engine.capital - 25000) / 25000 * 100
w_count = sum(1 for t in tr if t.pnl_absolute > 0)
print(f" {time.time()-t0:.1f}s | {len(tr)} trades | ROI={roi:+.2f}% | WR={w_count/len(tr)*100:.1f}%")
# ── Step 3: Extract entry-bar features ──────────────────────────────────────────
print(f"\nExtracting entry-bar features for {len(tr)} trades...")
def _get_row(gbar_idx):
"""Return feature vector for a global bar, or all-NaN if invalid."""
if gbar_idx < 0 or not gbar_valid_mask[gbar_idx]:
return np.full(N_FEAT, np.nan)
ri = gbar_to_rownum.get(gbar_idx)
if ri is None:
return np.full(N_FEAT, np.nan)
return gbar_features[ri]
# Feature name catalogue
feat_names = []
feat_names += FEAT_COLS # 7 raw
feat_names += [f"d1_{c}" for c in FEAT_COLS] # 7 first-deriv
feat_names += [f"d2_{c}" for c in FEAT_COLS] # 7 second-deriv
feat_names += [f"d3_{c}" for c in FEAT_COLS] # 7 third-deriv
VEL_NAMES = ['v50', 'v150', 'v300', 'v750']
VEL_IDX = [1, 2, 3, 4] # indices in FEAT_COLS
pairs = [(i, j) for i in range(4) for j in range(i+1, 4)]
for pi, pj in pairs:
feat_names.append(f"diff_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}") # 6 diffs
for pi, pj in pairs:
feat_names.append(f"ratio_{VEL_NAMES[pi]}_{VEL_NAMES[pj]}") # 6 ratios
feat_names += [
'inter_inst_ratio', # inst50/inst150
'inter_inst_prod', # inst50*inst150
'inter_vd_inst50', # vel_div*inst50
'inter_vd_inst150', # vel_div*inst150
] # 4 instability interactions
feat_names += [
'hist_vd_mean3', # mean vel_div last 3 bars
'hist_vd_std3', # std vel_div last 3 bars
'hist_vd_min5', # min vel_div last 5 bars
'hist_v50_mean3', # mean v50 last 3 bars
'hist_v750_mean3', # mean v750 last 3 bars
] # 5 historical context
N_FEATURES = len(feat_names) # should be 52
print(f" Feature count: {N_FEATURES}")
trade_feat_rows = [] # list of np.ndarray length N_FEATURES
outcome_rows = [] # list of dicts
NAN_VEC = np.full(N_FEATURES, np.nan)
for t in tr:
eb = t.entry_bar
# Raw features at entry bar
f0 = _get_row(eb)
f1 = _get_row(eb - 1)
f2 = _get_row(eb - 2)
f3 = _get_row(eb - 3)
# Check if valid lookback (all 4 bars must be valid)
invalid_lookback = (
eb - 3 < 0
or not gbar_valid_mask[eb]
or not gbar_valid_mask[eb - 1]
or not gbar_valid_mask[eb - 2]
or not gbar_valid_mask[eb - 3]
)
row = np.empty(N_FEATURES, dtype=np.float64)
# --- Raw (7) ---
row[:7] = f0
if invalid_lookback:
# Derivatives all NaN; raw features still set above
row[7:] = np.nan
else:
# --- 1st derivatives (7) ---
d1_0 = f0 - f1
d1_1 = f1 - f2
d1_2 = f2 - f3
row[7:14] = d1_0
# --- 2nd derivatives (7) ---
d2_0 = d1_0 - d1_1
d2_1 = d1_1 - d1_2
row[14:21] = d2_0
# --- 3rd derivatives (7) ---
d3_0 = d2_0 - d2_1
row[21:28] = d3_0
# --- Cross-TF velocity at entry bar ---
vi = VEL_IDX # [1,2,3,4]
offset = 28
for pi, pj in pairs: # 6 diffs
row[offset] = f0[vi[pi]] - f0[vi[pj]]
offset += 1
for pi, pj in pairs: # 6 ratios
denom = f0[vi[pj]]
row[offset] = f0[vi[pi]] / denom if (np.isfinite(denom) and denom != 0.0) else np.nan
offset += 1
# --- Instability interactions ---
inst50 = f0[5]
inst150 = f0[6]
vd = f0[0]
row[offset] = inst50 / inst150 if (np.isfinite(inst150) and inst150 != 0.0) else np.nan
row[offset + 1] = inst50 * inst150
row[offset + 2] = vd * inst50
row[offset + 3] = vd * inst150
offset += 4
# --- Historical context: collect vel_div over last 5 valid bars ---
# Use last 5 global bars [eb-4..eb] for vd_min5; [eb-2..eb] for mean3/std3
vd_last5 = np.array([_get_row(eb - k)[0] for k in range(4, -1, -1)])
vd_last3 = vd_last5[2:] # [eb-2, eb-1, eb]
v50_last3 = np.array([_get_row(eb - k)[1] for k in range(2, -1, -1)])
v750_last3 = np.array([_get_row(eb - k)[4] for k in range(2, -1, -1)])
def _safe_mean(arr):
valid = arr[np.isfinite(arr)]
return float(np.mean(valid)) if len(valid) > 0 else np.nan
def _safe_std(arr):
valid = arr[np.isfinite(arr)]
return float(np.std(valid)) if len(valid) > 1 else np.nan
def _safe_min(arr):
valid = arr[np.isfinite(arr)]
return float(np.min(valid)) if len(valid) > 0 else np.nan
row[offset] = _safe_mean(vd_last3)
row[offset + 1] = _safe_std(vd_last3)
row[offset + 2] = _safe_min(vd_last5)
row[offset + 3] = _safe_mean(v50_last3)
row[offset + 4] = _safe_mean(v750_last3)
trade_feat_rows.append(row)
pnl_abs = t.pnl_absolute
outcome_rows.append({
'trade_id': t.trade_id,
'asset': t.asset,
'direction': t.direction,
'entry_bar': t.entry_bar,
'exit_bar': t.exit_bar,
'bars_held': t.bars_held,
'exit_reason': t.exit_reason,
'leverage': t.leverage,
'notional': t.notional,
'pnl_pct': t.pnl_pct,
'pnl_absolute': pnl_abs,
'winner': int(pnl_abs > 0),
'is_tp': int(t.exit_reason == 'FIXED_TP'),
'is_maxhold_loss':int(t.exit_reason == 'MAX_HOLD' and pnl_abs <= 0),
'is_maxhold_win': int(t.exit_reason == 'MAX_HOLD' and pnl_abs > 0),
'invalid_lookback': int(invalid_lookback),
})
feat_matrix = np.array(trade_feat_rows, dtype=np.float64) # (N_trades, N_FEATURES)
print(f" Feature matrix: {feat_matrix.shape}")
print(f" Trades with invalid lookback: {sum(o['invalid_lookback'] for o in outcome_rows)}")
# ── Step 4: Clip extreme ratios at 99th percentile ──────────────────────────────
print("\nClipping ratio features at 99th percentile...")
ratio_start = 28 + 6 # after diffs
ratio_end = ratio_start + 6
for col_i in range(ratio_start, ratio_end):
col = feat_matrix[:, col_i]
valid = col[np.isfinite(col)]
if len(valid) > 10:
lo, hi = np.percentile(valid, 1), np.percentile(valid, 99)
feat_matrix[:, col_i] = np.clip(col, lo, hi)
# ── Step 5: Statistical analysis ────────────────────────────────────────────────
print("Running statistical analysis...")
try:
from scipy.stats import ks_2samp, pearsonr, pointbiserialr
HAS_SCIPY = True
except ImportError:
HAS_SCIPY = False
print(" WARNING: scipy not available — KS + point-biserial skipped")
try:
from sklearn.metrics import roc_auc_score as _sklearn_auc
def roc_auc_score(y_true, y_score):
return _sklearn_auc(y_true, y_score)
HAS_SKLEARN = True
except ImportError:
HAS_SKLEARN = False
def roc_auc_score(y_true, y_score):
"""Manual ROC-AUC via rank sum."""
y_true = np.asarray(y_true, dtype=np.float64)
y_score = np.asarray(y_score, dtype=np.float64)
n1 = int(np.sum(y_true == 1))
n0 = len(y_true) - n1
if n1 == 0 or n0 == 0:
return np.nan
order = np.argsort(y_score)
ranks = np.empty(len(y_score), dtype=np.float64)
ranks[order] = np.arange(1, len(y_score) + 1)
auc = (np.sum(ranks[y_true == 1]) - n1 * (n1 + 1) / 2) / (n1 * n0)
return float(auc)
pnl_pct_arr = np.array([o['pnl_pct'] for o in outcome_rows])
winner_arr = np.array([o['winner'] for o in outcome_rows])
is_tp_arr = np.array([o['is_tp'] for o in outcome_rows])
is_mhl_arr = np.array([o['is_maxhold_loss'] for o in outcome_rows])
analysis_rows = []
for fi, fname in enumerate(feat_names):
col = feat_matrix[:, fi]
valid_mask = np.isfinite(col) & np.isfinite(pnl_pct_arr)
n_valid = int(np.sum(valid_mask))
if n_valid < 10:
analysis_rows.append({
'feature': fname, 'n_valid': n_valid,
'pearson_r': np.nan, 'pearson_p': np.nan, 'pb_corr': np.nan,
'ks_stat_winner': np.nan, 'ks_pval_winner': np.nan,
'roc_auc_winner': np.nan, 'roc_auc_maxhold_loss': np.nan,
'roc_auc_tp': np.nan, 'winner_mean': np.nan, 'loser_mean': np.nan,
})
continue
x = col[valid_mask]
y_pnl = pnl_pct_arr[valid_mask]
y_win = winner_arr[valid_mask]
y_tp = is_tp_arr[valid_mask]
y_mhl = is_mhl_arr[valid_mask]
# Pearson r with pnl_pct
if HAS_SCIPY:
pr, pp = pearsonr(x, y_pnl)
else:
cov = np.cov(x, y_pnl)[0, 1]
pr = cov / (np.std(x) * np.std(y_pnl) + 1e-15)
pp = np.nan
# Point-biserial
pb = np.nan
if HAS_SCIPY and len(np.unique(y_win)) == 2:
pb, _ = pointbiserialr(y_win, x)
# KS stat winner
ks_stat, ks_pval = np.nan, np.nan
if HAS_SCIPY and len(np.unique(y_win)) == 2:
w_vals = x[y_win == 1]
l_vals = x[y_win == 0]
if len(w_vals) >= 2 and len(l_vals) >= 2:
ks_stat, ks_pval = ks_2samp(w_vals, l_vals)
# ROC-AUC: winner (take max of both directions)
roc_w = np.nan
if len(np.unique(y_win)) == 2:
try:
a1 = roc_auc_score(y_win, x)
roc_w = max(a1, 1.0 - a1)
except Exception:
pass
# ROC-AUC: MAX_HOLD loss
roc_mhl = np.nan
if len(np.unique(y_mhl)) == 2:
try:
a1 = roc_auc_score(y_mhl, x)
roc_mhl = max(a1, 1.0 - a1)
except Exception:
pass
# ROC-AUC: TP
roc_tp = np.nan
if len(np.unique(y_tp)) == 2:
try:
a1 = roc_auc_score(y_tp, x)
roc_tp = max(a1, 1.0 - a1)
except Exception:
pass
# Winner / loser mean
w_mean = float(np.mean(x[y_win == 1])) if np.any(y_win == 1) else np.nan
l_mean = float(np.mean(x[y_win == 0])) if np.any(y_win == 0) else np.nan
analysis_rows.append({
'feature': fname, 'n_valid': n_valid,
'pearson_r': float(pr), 'pearson_p': float(pp) if not np.isnan(pp) else np.nan,
'pb_corr': float(pb) if not np.isnan(pb) else np.nan,
'ks_stat_winner': float(ks_stat) if not np.isnan(ks_stat) else np.nan,
'ks_pval_winner': float(ks_pval) if not np.isnan(ks_pval) else np.nan,
'roc_auc_winner': float(roc_w) if not np.isnan(roc_w) else np.nan,
'roc_auc_maxhold_loss': float(roc_mhl) if not np.isnan(roc_mhl) else np.nan,
'roc_auc_tp': float(roc_tp) if not np.isnan(roc_tp) else np.nan,
'winner_mean': float(w_mean) if not np.isnan(w_mean) else np.nan,
'loser_mean': float(l_mean) if not np.isnan(l_mean) else np.nan,
})
# ── Step 6: Print sorted tables ──────────────────────────────────────────────────
def _fmt(v, fmt=".4f"):
return f"{v:{fmt}}" if (v is not None and not np.isnan(v)) else " nan"
def _print_table(rows, sort_key, title, n=20):
def _sk(r):
v = r[sort_key]
return float(abs(v)) if (v is not None and not np.isnan(v)) else 0.0
sorted_rows = sorted(rows, key=_sk, reverse=True)[:n]
print(f"\n{''*95}")
print(f" {title}")
print(f"{''*95}")
print(f" {'feature':<30} {'n_valid':>7} {'pearson_r':>9} {'pb_corr':>7} "
f"{'ks_stat':>7} {'roc_win':>7} {'roc_mhl':>7} {'roc_tp':>7} {'win_mean':>9} {'los_mean':>9}")
print(f"{''*95}")
for r in sorted_rows:
print(f" {r['feature']:<30} {r['n_valid']:>7d} {_fmt(r['pearson_r']):>9} "
f"{_fmt(r['pb_corr']):>7} {_fmt(r['ks_stat_winner']):>7} "
f"{_fmt(r['roc_auc_winner']):>7} {_fmt(r['roc_auc_maxhold_loss']):>7} "
f"{_fmt(r['roc_auc_tp']):>7} {_fmt(r['winner_mean']):>9} {_fmt(r['loser_mean']):>9}")
# Sort guards: handle nan safely
def _key_roc_win(r):
v = r['roc_auc_winner']
return float(v) if (v is not None and not np.isnan(v)) else 0.0
def _key_roc_mhl(r):
v = r['roc_auc_maxhold_loss']
return float(v) if (v is not None and not np.isnan(v)) else 0.0
def _key_pearson(r):
v = r['pearson_r']
return abs(float(v)) if (v is not None and not np.isnan(v)) else 0.0
_print_table(
sorted(analysis_rows, key=_key_roc_win, reverse=True),
'roc_auc_winner',
"TOP 20 BY ROC-AUC (winner discrimination)", n=20
)
_print_table(
sorted(analysis_rows, key=_key_roc_mhl, reverse=True),
'roc_auc_maxhold_loss',
"TOP 20 BY ROC-AUC (MAX_HOLD loss discrimination)", n=20
)
_print_table(
sorted(analysis_rows, key=_key_pearson, reverse=True),
'pearson_r',
"TOP 20 BY |Pearson r| (pnl_pct correlation)", n=20
)
# ── ASCII histograms for top 5 winner-discriminating features ───────────────────
top5_feats = [r['feature'] for r in sorted(analysis_rows, key=_key_roc_win, reverse=True)[:5]]
def ascii_hist_pair(feat_name, fi):
col = feat_matrix[:, fi]
w_vals = col[(winner_arr == 1) & np.isfinite(col)]
l_vals = col[(winner_arr == 0) & np.isfinite(col)]
if len(w_vals) < 5 or len(l_vals) < 5:
return
all_valid = col[np.isfinite(col)]
lo = float(np.percentile(all_valid, 1))
hi = float(np.percentile(all_valid, 99))
BINS = 15
edges = np.linspace(lo, hi, BINS + 1)
def _hist_bar(vals, edges):
counts, _ = np.histogram(vals, bins=edges)
return counts
wc = _hist_bar(np.clip(w_vals, lo, hi), edges)
lc = _hist_bar(np.clip(l_vals, lo, hi), edges)
w_tot = max(1, len(w_vals))
l_tot = max(1, len(l_vals))
bar_max = max(1, max(np.max(wc) / w_tot, np.max(lc) / l_tot))
WIDTH = 20
print(f"\n {''*60}")
print(f" {feat_name} (winners n={w_tot} losers n={l_tot} range=[{lo:.4f}, {hi:.4f}])")
print(f" {''*60}")
print(f" {'bin':>22} {'WINNER':>{WIDTH}} {'LOSER':>{WIDTH}}")
for i in range(BINS):
lbl = f"[{edges[i]:+.4f},{edges[i+1]:+.4f})"
wbar = '' * int(wc[i] / w_tot / bar_max * WIDTH)
lbar = '' * int(lc[i] / l_tot / bar_max * WIDTH)
print(f" {lbl:>22} {wbar:<{WIDTH}} {lbar:<{WIDTH}}")
print(f"\n{''*95}")
print(f" ASCII HISTOGRAMS — TOP 5 WINNER-DISCRIMINATING FEATURES")
for fname in top5_feats:
fi = feat_names.index(fname)
ascii_hist_pair(fname, fi)
# ── Step 7: Save outputs ─────────────────────────────────────────────────────────
LOG_DIR = Path(__file__).parent / "run_logs"
LOG_DIR.mkdir(exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
# Per-trade feature matrix + outcomes
feat_csv = LOG_DIR / f"entry_quality_features_{ts}.csv"
with open(feat_csv, 'w', newline='') as f:
cw = csv.writer(f)
outcome_keys = ['trade_id', 'asset', 'direction', 'entry_bar', 'exit_bar',
'bars_held', 'exit_reason', 'leverage', 'notional',
'pnl_pct', 'pnl_absolute', 'winner', 'is_tp',
'is_maxhold_loss', 'is_maxhold_win', 'invalid_lookback']
cw.writerow(outcome_keys + feat_names)
for i, o in enumerate(outcome_rows):
out_vals = [o[k] for k in outcome_keys]
feat_vals = [f"{v:.8g}" if np.isfinite(v) else '' for v in feat_matrix[i]]
cw.writerow(out_vals + feat_vals)
# Feature analysis table
sweep_csv = LOG_DIR / f"entry_quality_sweep_{ts}.csv"
with open(sweep_csv, 'w', newline='') as f:
cw = csv.writer(f)
cw.writerow(['feature', 'n_valid', 'pearson_r', 'pearson_p', 'pb_corr',
'ks_stat_winner', 'ks_pval_winner',
'roc_auc_winner', 'roc_auc_maxhold_loss', 'roc_auc_tp',
'winner_mean', 'loser_mean'])
for r in sorted(analysis_rows, key=_key_roc_win, reverse=True):
def fmtv(v): return f"{v:.6f}" if (v is not None and not np.isnan(v)) else ''
cw.writerow([r['feature'], r['n_valid'],
fmtv(r['pearson_r']), fmtv(r['pearson_p']), fmtv(r['pb_corr']),
fmtv(r['ks_stat_winner']), fmtv(r['ks_pval_winner']),
fmtv(r['roc_auc_winner']), fmtv(r['roc_auc_maxhold_loss']),
fmtv(r['roc_auc_tp']),
fmtv(r['winner_mean']), fmtv(r['loser_mean'])])
print(f"\n{''*95}")
print(f" per-trade features → {feat_csv} ({len(outcome_rows)} rows, {N_FEATURES} features)")
print(f" feature analysis → {sweep_csv} ({len(analysis_rows)} rows)")
print(f"{''*95}")