DOLPHIN/nautilus_dolphin/combined_strategy_5y.py

"""Combined Two-Strategy Architecture — 5y Klines
===================================================
Tests whether OLD (directional, dvol-gated) and NEW (crossover scalp, hour-gated)
strategies are additive, complementary, or competitive.

STRATEGY A — Directional Bet (dvol macro-gated)
  HIGH dvol (>p75): SHORT on vel_div >= +ENTRY_T, exit 95bps TP or 10-bar max-hold
  LOW  dvol (<p25): LONG  on vel_div <= -ENTRY_T, exit 95bps TP or 10-bar max-hold
  (10-bar = 10min on 1m klines ≈ legacy 600s optimal hold)
  Gate: dvol_btc from NPZ

STRATEGY B — Crossover Scalp (hour-gated)
  Entry:  vel_div <= -ENTRY_T → LONG
  Exit:   vel_div >= +ENTRY_T (reversion complete, exhaustion crossover)
  InvExit: vel_div <= -INV_T  (deepened, wrong-way, cut)
  Gate:   hour_utc in {9, 12, 18} (London/US-open hours with PF=1.05-1.06)

COMBINED MODES TESTED:
  1. A_ONLY  — strategy A alone (directional, dvol-gated)
  2. B_ONLY  — strategy B alone (crossover, hour-gated)
  3. A_AND_B — both active simultaneously (independent positions, additive PnL)
  4. A_OR_B  — regime-switched: A when dvol extreme, B when dvol mid + good hours, else FLAT
  5. B_UNGATED — strategy B without any gate (baseline for gate assessment)
  6. A_UNGATED — strategy A without dvol gate (directional at all dvol levels)
  7. HOUR_SWITCH — B during good hours, FLAT otherwise (no dvol gate)

Painstaking logs:
  combined_strategy_summary_TS.csv   — per (mode, direction, year)
  combined_strategy_byyear_TS.csv    — same × year
  combined_strategy_overlap_TS.csv   — day-level overlap between A and B signals
  combined_strategy_byhour_TS.csv    — per (mode, hour)
  combined_strategy_regime_TS.csv    — per (dvol_decile, mode)
  combined_strategy_top_TS.txt       — human-readable summary
"""
import sys, time, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view

VBT_DIR  = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
EIG_DIR  = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
LOG_DIR  = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
LOG_DIR.mkdir(exist_ok=True)

# ── Parameters ────────────────────────────────────────────────────────────
ENTRY_T   = 0.020    # vel_div threshold (both arms)
INV_T     = 0.100    # invalidation for crossover
TP_BPS    = 95       # Strategy A take profit
TP_PCT    = TP_BPS / 10_000.0
MH_A      = 10       # Strategy A max hold (10 bars = 10min on 1m ≈ legacy 600s)
MH_B      = 20       # Strategy B safety max hold
GOOD_HOURS = {9, 12, 18}   # hours UTC where crossover PF=1.05-1.06

# ── Load dvol_btc for all dates ───────────────────────────────────────────
print("Preloading dvol_btc...")
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)

dvol_map = {}
DVOL_IDX = None
for pf in parquet_files:
    ds = pf.stem
    npz_path = EIG_DIR / ds / "scan_000001__Indicators.npz"
    if not npz_path.exists(): continue
    try:
        data = np.load(npz_path, allow_pickle=True)
        names = list(data['api_names'])
        if DVOL_IDX is None and 'dvol_btc' in names:
            DVOL_IDX = names.index('dvol_btc')
        if DVOL_IDX is not None and data['api_success'][DVOL_IDX]:
            dvol_map[ds] = float(data['api_indicators'][DVOL_IDX])
    except: pass

dvol_vals = np.array(sorted(dvol_map.values()))
dvol_p25  = np.percentile(dvol_vals, 25)  # 47.5
dvol_p50  = np.percentile(dvol_vals, 50)  # 56.3
dvol_p75  = np.percentile(dvol_vals, 75)  # 71.8
# Best dvol zone for crossover: p50-p90 (53-75)
dvol_crossover_lo = dvol_p50    # lower bound for "mid" crossover zone
dvol_crossover_hi = dvol_p75    # upper bound
dvol_decile_edges = np.percentile(dvol_vals, np.arange(0, 101, 10))

print(f"  dvol: p25={dvol_p25:.1f}  p50={dvol_p50:.1f}  p75={dvol_p75:.1f}")
print(f"  crossover zone: {dvol_crossover_lo:.1f}–{dvol_crossover_hi:.1f}")
print(f"  Files: {total}\n")

YEARS = ['2021','2022','2023','2024','2025','2026']
MODES = ['A_ONLY','B_ONLY','A_AND_B','A_OR_B','B_UNGATED','A_UNGATED','HOUR_SWITCH']
DVOL_BKTS = [f'D{i+1}' for i in range(10)]  # D1=lowest, D10=highest

def make_s():
    return {'n':0,'wins':0,'gw':0.0,'gl':0.0,'hold_sum':0}

# Accumulators
stats      = defaultdict(make_s)   # (mode, component, year) — component: A/B/combined
hour_stats = defaultdict(make_s)   # (mode, component, hour)
dvol_stats = defaultdict(make_s)   # (dvol_bucket, component)
overlap_log = []                   # daily overlap info
daily_rows  = []                   # per-date × mode

def dvol_bucket(dv):
    for i in range(len(dvol_decile_edges)-1):
        if dv <= dvol_decile_edges[i+1]:
            return DVOL_BKTS[i]
    return DVOL_BKTS[-1]

def accum(key, n, wins, gw, gl, hs):
    s = stats[key]
    s['n']+=n; s['wins']+=wins; s['gw']+=gw; s['gl']+=gl; s['hold_sum']+=hs

def accum_h(key, n, wins, gw, gl, hs):
    s = hour_stats[key]
    s['n']+=n; s['wins']+=wins; s['gw']+=gw; s['gl']+=gl; s['hold_sum']+=hs

def accum_d(key, n, wins, gw, gl, hs):
    s = dvol_stats[key]
    s['n']+=n; s['wins']+=wins; s['gw']+=gw; s['gl']+=gl; s['hold_sum']+=hs

# ── Main loop ─────────────────────────────────────────────────────────────
t0 = time.time()
print(f"Main loop ({total} files)...")

for i_file, pf in enumerate(parquet_files):
    ds   = pf.stem
    year = ds[:4]
    dvol = dvol_map.get(ds, np.nan)
    dvol_bkt = dvol_bucket(dvol) if np.isfinite(dvol) else 'D5'

    # Regime classification
    if np.isnan(dvol):
        dvol_regime = 'MID'
    elif dvol > dvol_p75:
        dvol_regime = 'HIGH'
    elif dvol < dvol_p25:
        dvol_regime = 'LOW'
    else:
        dvol_regime = 'MID'

    try:
        df = pd.read_parquet(pf)
    except: continue
    if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
        continue

    vd  = df['vel_div'].values.astype(np.float64)
    btc = df['BTCUSDT'].values.astype(np.float64)
    if hasattr(df.index, 'hour'):
        bar_hours = df.index.hour.values
    elif pd.api.types.is_datetime64_any_dtype(df.index):
        bar_hours = df.index.hour
    else:
        bar_hours = np.zeros(len(btc), dtype=int)
    del df

    vd  = np.where(np.isfinite(vd), vd, 0.0)
    btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
    n   = len(btc)
    MH_MAX = max(MH_A, MH_B)
    if n < MH_MAX + 5: continue

    n_usable   = n - MH_MAX
    vd_win     = sliding_window_view(vd,  MH_MAX+1)[:n_usable]   # (n_usable, MH_MAX+1)
    btc_win    = sliding_window_view(btc, MH_MAX+1)[:n_usable]
    ep_arr     = btc_win[:, 0]
    valid      = np.isfinite(ep_arr) & (ep_arr > 0)
    bar_h_entry = bar_hours[:n_usable] if len(bar_hours) >= n_usable else np.zeros(n_usable, dtype=int)

    # ── STRATEGY A — Directional ──────────────────────────────────────────
    # HIGH dvol → SHORT on vel_div >= +ENTRY_T, TP=95bps, max-hold=MH_A
    # LOW  dvol → LONG  on vel_div <= -ENTRY_T, TP=95bps, max-hold=MH_A
    a_trades_n = a_wins = 0; a_gw = a_gl = a_hs = 0.0

    if dvol_regime in ('HIGH','LOW'):
        if dvol_regime == 'HIGH':
            entry_a = (vd[:n_usable] >= +ENTRY_T) & valid
            # SHORT: price must fall >= TP_PCT to hit TP; price rise is loss
            def a_pnl(ep, fp): return (ep - fp) / ep   # positive = price fell = SHORT win
        else:  # LOW
            entry_a = (vd[:n_usable] <= -ENTRY_T) & valid
            def a_pnl(ep, fp): return (fp - ep) / ep   # positive = price rose = LONG win

        idx_a = np.where(entry_a)[0]
        if len(idx_a):
            ep_a  = ep_arr[idx_a]
            # Future prices for MH_A bars
            fut_a = btc_win[idx_a, 1:MH_A+1]           # (N, MH_A)
            ep_col = ep_a[:, None]
            if dvol_regime == 'HIGH':
                pr_a  = (ep_col - fut_a) / ep_col       # SHORT price ret (positive=win)
                tp_mask = pr_a >= TP_PCT
            else:
                pr_a  = (fut_a - ep_col) / ep_col       # LONG price ret
                tp_mask = pr_a >= TP_PCT
            pr_a  = np.where(np.isfinite(fut_a), pr_a, 0.0)

            BIG = MH_A + 1
            tp_bar = np.where(tp_mask.any(1), np.argmax(tp_mask, 1), BIG)
            mh_bar = np.full(len(idx_a), MH_A-1, dtype=np.int32)
            exit_bar_a = np.minimum(tp_bar, mh_bar)
            exit_pnl_a = pr_a[np.arange(len(idx_a)), exit_bar_a]
            won_a = exit_pnl_a > 0
            holds_a = exit_bar_a + 1

            a_trades_n = len(idx_a)
            a_wins = int(won_a.sum())
            a_gw   = float(exit_pnl_a[won_a].sum()) if won_a.any() else 0.0
            a_gl   = float((-exit_pnl_a[~won_a]).sum()) if (~won_a).any() else 0.0
            a_hs   = int(holds_a.sum())

    # Ungated strategy A (all dvol levels, always LONG on vel_div<=-ENTRY_T)
    a_ung_n = a_ung_w = 0; a_ung_gw = a_ung_gl = a_ung_hs = 0.0
    entry_a_ung = (vd[:n_usable] <= -ENTRY_T) & valid
    idx_aung = np.where(entry_a_ung)[0]
    if len(idx_aung):
        ep_aung = ep_arr[idx_aung]
        fut_aung = btc_win[idx_aung, 1:MH_A+1]
        pr_aung  = (fut_aung - ep_aung[:, None]) / ep_aung[:, None]
        pr_aung  = np.where(np.isfinite(fut_aung), pr_aung, 0.0)
        tp_m = pr_aung >= TP_PCT
        tp_b = np.where(tp_m.any(1), np.argmax(tp_m, 1), MH_A+1)
        mhb  = np.full(len(idx_aung), MH_A-1, dtype=np.int32)
        eb   = np.minimum(tp_b, mhb)
        ep   = pr_aung[np.arange(len(idx_aung)), eb]
        wo   = ep > 0
        a_ung_n = len(idx_aung); a_ung_w = int(wo.sum())
        a_ung_gw = float(ep[wo].sum()) if wo.any() else 0.0
        a_ung_gl = float((-ep[~wo]).sum()) if (~wo).any() else 0.0
        a_ung_hs = int((eb+1).sum())

    # ── STRATEGY B — Crossover Scalp ─────────────────────────────────────
    # Always LONG: vel_div <= -ENTRY_T → LONG, exit vel_div >= +ENTRY_T
    entry_b = (vd[:n_usable] <= -ENTRY_T) & valid
    idx_b   = np.where(entry_b)[0]

    b_all_n = b_all_w = 0; b_all_gw = b_all_gl = b_all_hs = 0.0
    b_hour_n = b_hour_w = 0; b_hour_gw = b_hour_gl = b_hour_hs = 0.0
    b_mid_n  = b_mid_w  = 0; b_mid_gw  = b_mid_gl  = b_mid_hs  = 0.0

    if len(idx_b):
        ep_b   = ep_arr[idx_b]
        fut_vd_b  = vd_win[idx_b, 1:MH_B+1]
        fut_btc_b = btc_win[idx_b, 1:MH_B+1]
        pr_b   = (fut_btc_b - ep_b[:, None]) / ep_b[:, None]
        pr_b   = np.where(np.isfinite(fut_btc_b), pr_b, 0.0)
        h_entry= bar_h_entry[idx_b]

        BIG = MH_B + 1
        exhst = fut_vd_b >= +ENTRY_T
        inv   = fut_vd_b <= -INV_T
        exhst_b = np.where(exhst.any(1), np.argmax(exhst, 1), BIG)
        inv_b   = np.where(inv.any(1),   np.argmax(inv,   1), BIG)
        mhb     = np.full(len(idx_b), MH_B-1, dtype=np.int32)
        all_b   = np.column_stack([exhst_b, inv_b, mhb])
        eb_b    = np.clip(all_b[np.arange(len(idx_b)), np.argmin(all_b,1)], 0, MH_B-1)
        ep_b_pnl= pr_b[np.arange(len(idx_b)), eb_b]
        won_b   = ep_b_pnl > 0
        hb_b    = eb_b + 1

        # All trades (ungated B)
        b_all_n = len(idx_b); b_all_w = int(won_b.sum())
        b_all_gw = float(ep_b_pnl[won_b].sum()) if won_b.any() else 0.0
        b_all_gl = float((-ep_b_pnl[~won_b]).sum()) if (~won_b).any() else 0.0
        b_all_hs = int(hb_b.sum())

        # Hour-gated trades
        h_mask = np.isin(h_entry, list(GOOD_HOURS))
        if h_mask.any():
            b_hour_n = int(h_mask.sum()); b_hour_w = int(won_b[h_mask].sum())
            b_hour_gw = float(ep_b_pnl[h_mask & won_b].sum()) if (h_mask & won_b).any() else 0.0
            b_hour_gl = float((-ep_b_pnl[h_mask & ~won_b]).sum()) if (h_mask & ~won_b).any() else 0.0
            b_hour_hs = int(hb_b[h_mask].sum())

        # dvol-mid + hour gated (A_OR_B uses B here)
        mid_ok = dvol_regime == 'MID' or np.isnan(dvol)
        if mid_ok:
            dmh_mask = h_mask  # also require good hour when mid-dvol
            if dmh_mask.any():
                b_mid_n = int(dmh_mask.sum()); b_mid_w = int(won_b[dmh_mask].sum())
                b_mid_gw = float(ep_b_pnl[dmh_mask & won_b].sum()) if (dmh_mask & won_b).any() else 0.0
                b_mid_gl = float((-ep_b_pnl[dmh_mask & ~won_b]).sum()) if (dmh_mask & ~won_b).any() else 0.0
                b_mid_hs = int(hb_b[dmh_mask].sum())

        # Hour breakdown for B_UNGATED
        for h in np.unique(h_entry):
            hm = (h_entry == h)
            hn = int(hm.sum())
            hw = int(won_b[hm].sum())
            hgw = float(ep_b_pnl[hm & won_b].sum()) if (hm & won_b).any() else 0.0
            hgl = float((-ep_b_pnl[hm & ~won_b]).sum()) if (hm & ~won_b).any() else 0.0
            hhs = int(hb_b[hm].sum())
            accum_h(('B_UNGATED','B',int(h)), hn, hw, hgw, hgl, hhs)

    # ── Accumulate stats per mode ─────────────────────────────────────────
    # A_ONLY (gated by dvol HIGH/LOW)
    accum(('A_ONLY','A',year), a_trades_n, a_wins, a_gw, a_gl, a_hs)

    # B_ONLY (hour-gated)
    accum(('B_ONLY','B',year), b_hour_n, b_hour_w, b_hour_gw, b_hour_gl, b_hour_hs)

    # A_AND_B (both simultaneously, additive PnL)
    # A: gated by dvol; B: hour-gated. They may partially overlap in entry bars.
    # Combined: just sum both sets of trades.
    comb_n  = a_trades_n + b_hour_n
    comb_w  = a_wins + b_hour_w
    comb_gw = a_gw + b_hour_gw
    comb_gl = a_gl + b_hour_gl
    comb_hs = a_hs + b_hour_hs
    accum(('A_AND_B','combined',year), comb_n, comb_w, comb_gw, comb_gl, comb_hs)
    accum(('A_AND_B','A',year), a_trades_n, a_wins, a_gw, a_gl, a_hs)
    accum(('A_AND_B','B',year), b_hour_n, b_hour_w, b_hour_gw, b_hour_gl, b_hour_hs)

    # A_OR_B (regime-switched: A when dvol extreme, B when mid+good-hour, else FLAT)
    aorb_n  = a_trades_n + b_mid_n
    aorb_w  = a_wins + b_mid_w
    aorb_gw = a_gw + b_mid_gw
    aorb_gl = a_gl + b_mid_gl
    aorb_hs = a_hs + b_mid_hs
    accum(('A_OR_B','combined',year), aorb_n, aorb_w, aorb_gw, aorb_gl, aorb_hs)
    accum(('A_OR_B','A',year), a_trades_n, a_wins, a_gw, a_gl, a_hs)
    accum(('A_OR_B','B',year), b_mid_n, b_mid_w, b_mid_gw, b_mid_gl, b_mid_hs)

    # B_UNGATED (crossover without any gate)
    accum(('B_UNGATED','B',year), b_all_n, b_all_w, b_all_gw, b_all_gl, b_all_hs)

    # A_UNGATED (directional LONG at all dvol levels)
    accum(('A_UNGATED','A',year), a_ung_n, a_ung_w, a_ung_gw, a_ung_gl, a_ung_hs)

    # HOUR_SWITCH (B hour-gated, no dvol gate)
    accum(('HOUR_SWITCH','B',year), b_hour_n, b_hour_w, b_hour_gw, b_hour_gl, b_hour_hs)

    # dvol_stats for B_UNGATED
    accum_d((dvol_bkt,'B_ung'), b_all_n, b_all_w, b_all_gw, b_all_gl, b_all_hs)
    accum_d((dvol_bkt,'A_ung'), a_ung_n, a_ung_w, a_ung_gw, a_ung_gl, a_ung_hs)

    # Daily overlap log
    overlap_log.append({
        'date': ds, 'year': year,
        'dvol': round(dvol,2) if np.isfinite(dvol) else None,
        'dvol_regime': dvol_regime,
        'A_trades': a_trades_n, 'B_hour_trades': b_hour_n, 'B_all_trades': b_all_n,
        'A_gw': round(a_gw,6), 'A_gl': round(a_gl,6),
        'B_hour_gw': round(b_hour_gw,6), 'B_hour_gl': round(b_hour_gl,6),
        'combined_gw': round(a_gw+b_hour_gw,6), 'combined_gl': round(a_gl+b_hour_gl,6),
    })

    del vd, btc, vd_win, btc_win

    if (i_file+1) % 300 == 0:
        gc.collect()
        e = time.time()-t0
        print(f"  [{i_file+1}/{total}] {ds}  {e:.0f}s  eta={e/(i_file+1)*(total-i_file-1):.0f}s")

elapsed = time.time()-t0
print(f"\nPass complete: {elapsed:.0f}s\n")

# ── Build output rows ──────────────────────────────────────────────────────
def met(s):
    n=s['n']; w=s['wins']; gw=s['gw']; gl=s['gl']; hs=s['hold_sum']
    wr = w/n*100 if n else float('nan')
    pf = gw/gl if gl>0 else (999.0 if gw>0 else float('nan'))
    ah = hs/n if n else float('nan')
    return n, round(wr,3), round(pf,4), round(ah,3)

# Summary: per (mode, component) across all years
summary = []
for mode in MODES:
    # main component(s)
    comps = ['A','B','combined'] if mode in ('A_AND_B','A_OR_B') else \
            ['A'] if mode in ('A_ONLY','A_UNGATED') else ['B']
    for comp in comps:
        agg = make_s()
        for yr in YEARS:
            s = stats.get((mode,comp,yr))
            if s:
                for f in ['n','wins','hold_sum']: agg[f]+=s[f]
                for f in ['gw','gl']: agg[f]+=s[f]
        n,wr,pf,ah = met(agg)
        summary.append({'mode':mode,'component':comp,'n_trades':n,'wr':wr,'pf':pf,'avg_hold':ah,
                        'gw':round(agg['gw'],2),'gl':round(agg['gl'],2)})

# Per-year rows
year_rows = []
for mode in MODES:
    comps = ['A','B','combined'] if mode in ('A_AND_B','A_OR_B') else \
            ['A'] if mode in ('A_ONLY','A_UNGATED') else ['B']
    for comp in comps:
        for yr in YEARS:
            s = stats.get((mode,comp,yr), make_s())
            n,wr,pf,ah = met(s)
            year_rows.append({'mode':mode,'component':comp,'year':yr,
                              'n_trades':n,'wr':wr,'pf':pf,'avg_hold':ah})

# Hour rows (B_UNGATED)
hour_rows = []
for h in range(24):
    s = hour_stats.get(('B_UNGATED','B',h), make_s())
    n,wr,pf,ah = met(s)
    hour_rows.append({'hour_utc':h,'n_trades':n,'wr':wr,'pf':pf,'avg_hold':ah})

# dvol rows
dvol_rows = []
for bkt in DVOL_BKTS:
    for comp in ('B_ung','A_ung'):
        s = dvol_stats.get((bkt,comp), make_s())
        n,wr,pf,ah = met(s)
        dvol_rows.append({'dvol_bucket':bkt,'strategy':comp,'n_trades':n,'wr':wr,'pf':pf,'avg_hold':ah})

# ── Save CSVs ──────────────────────────────────────────────────────────────
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
def save_csv(rows, name):
    if not rows: return
    path = LOG_DIR / f"combined_strategy_{name}_{ts}.csv"
    with open(path,'w',newline='',encoding='utf-8') as f:
        w = csv.DictWriter(f, fieldnames=rows[0].keys())
        w.writeheader(); w.writerows(rows)
    print(f"  → {path}  ({len(rows)} rows)")

print("Saving CSVs...")
save_csv(summary,     'summary')
save_csv(year_rows,   'byyear')
save_csv(hour_rows,   'byhour')
save_csv(dvol_rows,   'bydvol')
save_csv(overlap_log, 'overlap')

# ── Console output ─────────────────────────────────────────────────────────
def pf_str(pf):
    if np.isnan(pf): return '   nan'
    if pf>=999: return '   inf'
    m = '***' if pf>1.0 else ('** ' if pf>0.8 else '*  ')
    return f'{pf:6.3f}{m}'

print(f"\n{'='*95}")
print(f"  COMBINED TWO-STRATEGY ARCHITECTURE — FULL RESULTS")
print(f"  Strategy A: directional (95bps TP, {MH_A}-bar max), dvol-gated (HIGH→SHORT, LOW→LONG)")
print(f"  Strategy B: crossover scalp (vel_div ±{ENTRY_T} cross), hour-gated ({sorted(GOOD_HOURS)}h UTC)")
print(f"  dvol: p25={dvol_p25:.1f}  p50={dvol_p50:.1f}  p75={dvol_p75:.1f}")
print(f"{'='*95}")
hdr = f"  {'Mode':<14} {'Comp':<10} {'N':>9} {'WR%':>7} {'PF':>10} {'AvgHold':>8}"
print(hdr)
print(f"  {'-'*63}")
for r in summary:
    print(f"  {r['mode']:<14} {r['component']:<10} {r['n_trades']:>9,} "
          f"{r['wr']:>7.1f}% {pf_str(r['pf']):>10} {r['avg_hold']:>8.2f}b")

# Per-year for key modes
KEY_MODES = ['A_ONLY','B_ONLY','A_AND_B','A_OR_B','B_UNGATED','A_UNGATED']
print(f"\n{'='*95}")
print(f"  PER-YEAR BREAKDOWN")
print(f"{'='*95}")
for mode in KEY_MODES:
    comps = ['combined'] if mode in ('A_AND_B','A_OR_B') else \
            ['A'] if mode in ('A_ONLY','A_UNGATED') else ['B']
    comp = comps[0]
    ydata = {r['year']: r for r in year_rows if r['mode']==mode and r['component']==comp}
    print(f"\n  {mode} ({comp}):")
    print(f"  {'Year':<6} {'N':>9} {'WR%':>7} {'PF':>10} {'AvgHold':>8}")
    print(f"  {'-'*45}")
    tot = make_s()
    for yr in YEARS:
        d = ydata.get(yr)
        if d and d['n_trades']>0:
            print(f"  {yr:<6} {d['n_trades']:>9,} {d['wr']:>7.1f}% {pf_str(d['pf']):>10} {d['avg_hold']:>8.2f}b")
            s = stats.get((mode,comp,yr), make_s())
            for f in ['n','wins','hold_sum']: tot[f]+=s[f]
            for f in ['gw','gl']: tot[f]+=s[f]
    n_t,wr_t,pf_t,ah_t = met(tot)
    print(f"  {'TOTAL':<6} {n_t:>9,} {wr_t:>7.1f}% {pf_str(pf_t):>10} {ah_t:>8.2f}b")

# A+B overlap analysis
print(f"\n{'='*95}")
print(f"  A vs B TRADE OVERLAP ANALYSIS")
print(f"{'='*95}")
total_days = len(overlap_log)
a_active_days = sum(1 for r in overlap_log if r['A_trades']>0)
b_active_days = sum(1 for r in overlap_log if r['B_hour_trades']>0)
both_active   = sum(1 for r in overlap_log if r['A_trades']>0 and r['B_hour_trades']>0)
print(f"  Total days: {total_days}")
print(f"  Days with A trades: {a_active_days}  ({a_active_days/total_days*100:.1f}%)")
print(f"  Days with B trades: {b_active_days}  ({b_active_days/total_days*100:.1f}%)")
print(f"  Days with BOTH A+B: {both_active}   ({both_active/total_days*100:.1f}%) ← overlap days")
print(f"  Days with A ONLY:   {a_active_days-both_active}")
print(f"  Days with B ONLY:   {b_active_days-both_active}")
print(f"  Days with NEITHER:  {total_days-a_active_days-b_active_days+both_active}")

# PnL contribution analysis
total_comb_gw = sum(r['combined_gw'] for r in overlap_log)
total_comb_gl = sum(r['combined_gl'] for r in overlap_log)
total_a_gw    = sum(r['A_gw'] for r in overlap_log)
total_a_gl    = sum(r['A_gl'] for r in overlap_log)
total_bh_gw   = sum(r['B_hour_gw'] for r in overlap_log)
total_bh_gl   = sum(r['B_hour_gl'] for r in overlap_log)
print(f"\n  PnL contribution (A + B_hour):")
print(f"    A:      GW={total_a_gw:.4f}  GL={total_a_gl:.4f}  PF={total_a_gw/total_a_gl:.4f}" if total_a_gl>0 else "    A: no trades")
print(f"    B_hour: GW={total_bh_gw:.4f}  GL={total_bh_gl:.4f}  PF={total_bh_gw/total_bh_gl:.4f}" if total_bh_gl>0 else "    B_hour: no trades")
print(f"    COMB:   GW={total_comb_gw:.4f}  GL={total_comb_gl:.4f}  PF={total_comb_gw/total_comb_gl:.4f}" if total_comb_gl>0 else "    COMB: no gl")

# A vs B PF on overlap days vs non-overlap days
a_overlap_gw = a_overlap_gl = b_overlap_gw = b_overlap_gl = 0.0
a_nonoverlap_gw = a_nonoverlap_gl = 0.0
for r in overlap_log:
    if r['A_trades']>0 and r['B_hour_trades']>0:
        a_overlap_gw += r['A_gw']; a_overlap_gl += r['A_gl']
        b_overlap_gw += r['B_hour_gw']; b_overlap_gl += r['B_hour_gl']
    elif r['A_trades']>0:
        a_nonoverlap_gw += r['A_gw']; a_nonoverlap_gl += r['A_gl']

print(f"\n  A PF on OVERLAP days    (both A+B active): {a_overlap_gw/a_overlap_gl:.4f}" if a_overlap_gl>0 else "")
print(f"  B PF on OVERLAP days    (both A+B active): {b_overlap_gw/b_overlap_gl:.4f}" if b_overlap_gl>0 else "")
print(f"  A PF on NON-OVERLAP days (only A active):  {a_nonoverlap_gw/a_nonoverlap_gl:.4f}" if a_nonoverlap_gl>0 else "")

# Best combined scenario conclusion
print(f"\n{'='*95}")
print(f"  CONCLUSION: ARE THEY ADDITIVE?")
print(f"{'='*95}")
b_ung = next((r for r in summary if r['mode']=='B_UNGATED' and r['component']=='B'), None)
b_only = next((r for r in summary if r['mode']=='B_ONLY' and r['component']=='B'), None)
a_only = next((r for r in summary if r['mode']=='A_ONLY' and r['component']=='A'), None)
a_and_b = next((r for r in summary if r['mode']=='A_AND_B' and r['component']=='combined'), None)
a_or_b  = next((r for r in summary if r['mode']=='A_OR_B' and r['component']=='combined'), None)
a_ung   = next((r for r in summary if r['mode']=='A_UNGATED' and r['component']=='A'), None)
if all([b_ung, b_only, a_only, a_and_b, a_or_b]):
    print(f"  B_UNGATED  (crossover, no gate):    PF={pf_str(b_ung['pf'])}  N={b_ung['n_trades']:,}")
    print(f"  B_ONLY     (crossover, hour-gated): PF={pf_str(b_only['pf'])}  N={b_only['n_trades']:,}")
    print(f"  A_ONLY     (directional, dvol-gated):PF={pf_str(a_only['pf'])}  N={a_only['n_trades']:,}")
    print(f"  A_UNGATED  (directional, no gate):  PF={pf_str(a_ung['pf'])}  N={a_ung['n_trades']:,}")
    print(f"  A_AND_B    (both simultaneous):     PF={pf_str(a_and_b['pf'])}  N={a_and_b['n_trades']:,}")
    print(f"  A_OR_B     (regime-switched):       PF={pf_str(a_or_b['pf'])}  N={a_or_b['n_trades']:,}")
    best_pf = max([b_ung['pf'],b_only['pf'],a_only['pf'],a_and_b['pf'],a_or_b['pf']])
    best_nm = ['B_UNGATED','B_ONLY','A_ONLY','A_AND_B','A_OR_B'][[b_ung['pf'],b_only['pf'],a_only['pf'],a_and_b['pf'],a_or_b['pf']].index(best_pf)]
    print(f"\n  → BEST: {best_nm}  PF={pf_str(best_pf)}")

print(f"\n  Runtime: {elapsed:.0f}s")

# Save top-summary text
top_path = LOG_DIR / f"combined_strategy_top_{ts}.txt"
with open(top_path,'w',encoding='utf-8') as f:
    f.write(f"COMBINED TWO-STRATEGY ARCHITECTURE\n")
    f.write(f"Generated: {ts}  Runtime: {elapsed:.0f}s\n")
    f.write(f"Strategy A: directional 95bps TP {MH_A}-bar hold dvol-gated\n")
    f.write(f"Strategy B: crossover scalp {ENTRY_T} cross hour-gated {sorted(GOOD_HOURS)}h UTC\n")
    f.write(f"dvol: p25={dvol_p25:.1f} p50={dvol_p50:.1f} p75={dvol_p75:.1f}\n\n")
    f.write(hdr+"\n"+"-"*63+"\n")
    for r in summary:
        f.write(f"  {r['mode']:<14} {r['component']:<10} {r['n_trades']:>9,} "
                f"{r['wr']:>7.1f}% {pf_str(r['pf']):>10} {r['avg_hold']:>8.2f}b\n")
print(f"\n  → {top_path}")