DOLPHIN/prod/tools/parity_test.py

#!/usr/bin/env python3
"""
Parity Harness: BLUE vs GREEN engine functional identity test
=============================================================
Creates two identical create_d_liq_engine instances with BLUE's gold ENGINE_KWARGS,
feeds them an identical sequence of step_bar() inputs, and reports any divergence.

NO production code is modified. Reads from HZ DOLPHIN_STATE_BLUE for real bar history
if available, otherwise generates synthetic scan data.

Usage:
    cd /mnt/dolphinng5_predict
    siloqy-env python prod/tools/parity_test.py [--n-bars 200] [--synthetic] [--verbose]
"""
import sys
import argparse
import json
import math
import random
from pathlib import Path

PROJECT_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin'))
sys.path.insert(0, str(PROJECT_ROOT / 'prod'))
sys.path.insert(0, str(PROJECT_ROOT))

from nautilus_dolphin.nautilus.proxy_boost_engine import create_d_liq_engine

# ── Gold ENGINE_KWARGS — exactly BLUE's production config ────────────────────
ENGINE_KWARGS = dict(
    initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
    min_leverage=0.5, max_leverage=8.0,
    leverage_convexity=3.0,
    fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=250,
    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
    use_asset_selection=True, min_irp_alignment=0.0,
    use_sp_fees=True, use_sp_slippage=True,
    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)

ASSETS = [
    "BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT",
    "ADAUSDT", "DOGEUSDT", "TRXUSDT", "DOTUSDT", "MATICUSDT",
    "LTCUSDT", "AVAXUSDT", "LINKUSDT", "UNIUSDT", "ATOMUSDT",
    "ETCUSDT", "XLMUSDT", "BCHUSDT", "NEARUSDT", "ALGOUSDT",
]

VOL_P60 = 0.00009868


def _synthetic_prices(rng, n_assets: int, base: float = 100.0):
    """Random walk prices for all assets."""
    prices = {}
    for i, sym in enumerate(ASSETS[:n_assets]):
        prices[sym] = base * (1 + rng.uniform(-0.001, 0.001))
    return prices


def _vol_ok(bar_idx: int) -> bool:
    """Replicate BLUE vol_ok gate: True when bar_idx >= 100."""
    return bar_idx >= 100


def _vel_div(rng, bar_idx: int) -> float:
    """Generate realistic vel_div; trigger entries occasionally."""
    # Every ~30 bars inject a signal below threshold
    if bar_idx > 50 and bar_idx % 30 == 0:
        return rng.uniform(-0.06, -0.022)
    return rng.uniform(-0.015, 0.05)


def build_scan_sequence(n_bars: int, seed: int = 7) -> list:
    """Generate reproducible synthetic scan sequence.

    Signal injection strategy:
    - Every 50 bars (starting bar 110), run a 10-bar downtrend on all prices
      so direction-confirm (dc_lookback_bars=7) sees consistent bearish movement.
    - On the 10th bar of the downtrend, fire vel_div = -0.035 (below threshold).
    - All other bars: gentle random walk + neutral vel_div.
    """
    rng = random.Random(seed)
    sequence = []
    prices = {sym: 100.0 + rng.uniform(0, 50) for sym in ASSETS}

    SIGNAL_CYCLE   = 50    # bars between signal attempts
    SIGNAL_START   = 110   # first signal at bar 110 (well past lookback=100)
    TREND_BARS     = 10    # bars of forced downtrend before vel_div fires
    TREND_DROP     = 0.0012  # per-bar drop per asset (~0.12%, enough for dc_magnitude_bps=0.75)

    def _in_trend_window(bar_idx):
        if bar_idx < SIGNAL_START:
            return False, False
        offset = (bar_idx - SIGNAL_START) % SIGNAL_CYCLE
        in_trend  = offset < TREND_BARS
        fire_bar  = offset == TREND_BARS - 1
        return in_trend, fire_bar

    for bar_idx in range(n_bars):
        in_trend, fire_bar = _in_trend_window(bar_idx)

        if in_trend:
            # Downtrend: push all prices down slightly
            for sym in prices:
                prices[sym] *= (1 - TREND_DROP + rng.uniform(-0.0002, 0.0002))
        else:
            # Normal random walk
            for sym in prices:
                prices[sym] *= (1 + rng.uniform(-0.002, 0.002))

        prices_snap = dict(prices)

        # vel_div
        if fire_bar:
            vd = -0.035   # strong signal
        elif in_trend:
            vd = -0.010   # mild bearish, not yet threshold
        else:
            vd = rng.uniform(-0.010, 0.040)

        # v50/v750 — negative during trend (consistent with short signal)
        if in_trend:
            v50  = -0.005 + rng.uniform(-0.001, 0.001)
            v750 = -0.002 + rng.uniform(-0.0005, 0.0005)
        else:
            v50  = rng.uniform(-0.01, 0.01)
            v750 = rng.uniform(-0.005, 0.005)

        sequence.append({
            'bar_idx':      bar_idx,
            'vel_div':      vd,
            'prices':       prices_snap,
            'vol_regime_ok': _vol_ok(bar_idx),
            'v50_vel':      v50,
            'v750_vel':     v750,
        })
    return sequence


_SKIP_KEYS = {'trade_id'}   # UUID — differs per instance by design; not functional


def _compare_dicts(label: str, da: dict, db: dict, tol: float = 1e-9) -> list:
    """Compare two dicts (entry or exit sub-dict). Return list of divergence strings."""
    divs = []
    if da is None and db is None:
        return []
    if da is None or db is None:
        return [f"  {label}: A={da!r} B={db!r}"]

    all_keys = (set(da) | set(db)) - _SKIP_KEYS
    for k in sorted(all_keys):
        va, vb = da.get(k), db.get(k)
        if va == vb:
            continue
        try:
            if math.isclose(float(va), float(vb), rel_tol=tol, abs_tol=tol):
                continue
        except (TypeError, ValueError):
            pass
        divs.append(f"  {label}[{k!r}]: A={va!r} B={vb!r}")
    return divs


def _compare_results(idx: int, r_a: dict, r_b: dict, verbose: bool) -> list:
    """Compare two step_bar() return dicts {'exit':..., 'entry':...}."""
    divs = []
    divs += _compare_dicts('entry', r_a.get('entry'), r_b.get('entry'))
    divs += _compare_dicts('exit',  r_a.get('exit'),  r_b.get('exit'))

    if divs and verbose:
        print(f"\n[bar {idx}] DIVERGENCE:")
        for d in divs:
            print(d)
        print(f"  A: {r_a}")
        print(f"  B: {r_b}")
    elif not divs and verbose:
        entry_a = r_a.get('entry')
        exit_a  = r_a.get('exit')
        if entry_a is not None:
            print(f"[bar {idx}] MATCH ENTRY asset={entry_a.get('asset')} lev={entry_a.get('leverage'):.3f}")
        if exit_a is not None:
            print(f"[bar {idx}] MATCH EXIT  asset={exit_a.get('asset')} reason={exit_a.get('reason')} pnl={exit_a.get('pnl_pct',0)*100:.4f}%")

    return divs


def run_parity(n_bars: int = 300, verbose: bool = False, seed: int = 7):
    print(f"Parity harness: n_bars={n_bars} seed={seed}")
    print("Creating engine A (reference) ...")
    eng_a = create_d_liq_engine(**ENGINE_KWARGS)
    print("Creating engine B (dut) ...")
    eng_b = create_d_liq_engine(**ENGINE_KWARGS)

    sequence = build_scan_sequence(n_bars=n_bars, seed=seed)
    print(f"Scan sequence generated: {len(sequence)} bars, {len(ASSETS)} assets")
    print("Running step_bar() on both engines with identical inputs ...")
    print()

    total_divs = 0
    diverged_bars = []
    n_entries_a = n_exits_a = 0
    n_entries_b = n_exits_b = 0

    for scan in sequence:
        bar_idx       = scan['bar_idx']
        vel_div       = scan['vel_div']
        prices        = scan['prices']
        vol_regime_ok = scan['vol_regime_ok']
        v50_vel       = scan['v50_vel']
        v750_vel      = scan['v750_vel']

        r_a = eng_a.step_bar(
            bar_idx=bar_idx, vel_div=vel_div, prices=prices,
            vol_regime_ok=vol_regime_ok, v50_vel=v50_vel, v750_vel=v750_vel,
        )
        r_b = eng_b.step_bar(
            bar_idx=bar_idx, vel_div=vel_div, prices=prices,
            vol_regime_ok=vol_regime_ok, v50_vel=v50_vel, v750_vel=v750_vel,
        )

        if r_a.get('entry') is not None: n_entries_a += 1
        if r_a.get('exit')  is not None: n_exits_a   += 1
        if r_b.get('entry') is not None: n_entries_b += 1
        if r_b.get('exit')  is not None: n_exits_b   += 1

        divs = _compare_results(bar_idx, r_a, r_b, verbose=verbose)
        if divs:
            total_divs += 1
            diverged_bars.append(bar_idx)

    # Summary
    print("=" * 60)
    print(f"Engine A: {n_entries_a} entries, {n_exits_a} exits")
    print(f"Engine B: {n_entries_b} entries, {n_exits_b} exits")
    print()

    if total_divs == 0 and n_entries_a > 0:
        print(f"RESULT: PASS — engines are functionally identical ({n_entries_a} trades exercised).")
    elif total_divs == 0 and n_entries_a == 0:
        print("RESULT: PASS (no entries exercised — increase --n-bars or check signal injection)")
    else:
        print(f"RESULT: FAIL — {total_divs} diverged bars: {diverged_bars[:20]}")
        sys.exit(1)


def main():
    ap = argparse.ArgumentParser(description="BLUE/GREEN engine parity test")
    ap.add_argument('--n-bars',    type=int,  default=300,  help='number of bars to replay')
    ap.add_argument('--seed',      type=int,  default=7,    help='RNG seed for synthetic scans')
    ap.add_argument('--verbose',   action='store_true',     help='print each bar match/divergence')
    args = ap.parse_args()

    run_parity(n_bars=args.n_bars, verbose=args.verbose, seed=args.seed)


if __name__ == '__main__':
    main()
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`#!/usr/bin/env python3`
			`"""`
			`Parity Harness: BLUE vs GREEN engine functional identity test`
			`=============================================================`
			`Creates two identical create_d_liq_engine instances with BLUE's gold ENGINE_KWARGS,`
			`feeds them an identical sequence of step_bar() inputs, and reports any divergence.`

			`NO production code is modified. Reads from HZ DOLPHIN_STATE_BLUE for real bar history`
			`if available, otherwise generates synthetic scan data.`

			`Usage:`
			`cd /mnt/dolphinng5_predict`
			`siloqy-env python prod/tools/parity_test.py [--n-bars 200] [--synthetic] [--verbose]`
			`"""`
			`import sys`
			`import argparse`
			`import json`
			`import math`
			`import random`
			`from pathlib import Path`

			`PROJECT_ROOT = Path(__file__).parent.parent.parent`
			`sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin'))`
			`sys.path.insert(0, str(PROJECT_ROOT / 'prod'))`
			`sys.path.insert(0, str(PROJECT_ROOT))`

			`from nautilus_dolphin.nautilus.proxy_boost_engine import create_d_liq_engine`

			`# ── Gold ENGINE_KWARGS — exactly BLUE's production config ────────────────────`
			`ENGINE_KWARGS = dict(`
			`initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,`
			`min_leverage=0.5, max_leverage=8.0,`
			`leverage_convexity=3.0,`
			`fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=250,`
			`use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,`
			`dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,`
			`use_asset_selection=True, min_irp_alignment=0.0,`
			`use_sp_fees=True, use_sp_slippage=True,`
			`sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,`
			`use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,`
			`lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,`
			`)`

			`ASSETS = [`
			`"BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT",`
			`"ADAUSDT", "DOGEUSDT", "TRXUSDT", "DOTUSDT", "MATICUSDT",`
			`"LTCUSDT", "AVAXUSDT", "LINKUSDT", "UNIUSDT", "ATOMUSDT",`
			`"ETCUSDT", "XLMUSDT", "BCHUSDT", "NEARUSDT", "ALGOUSDT",`
			`]`

			`VOL_P60 = 0.00009868`


			`def _synthetic_prices(rng, n_assets: int, base: float = 100.0):`
			`"""Random walk prices for all assets."""`
			`prices = {}`
			`for i, sym in enumerate(ASSETS[:n_assets]):`
			`prices[sym] = base * (1 + rng.uniform(-0.001, 0.001))`
			`return prices`


			`def _vol_ok(bar_idx: int) -> bool:`
			`"""Replicate BLUE vol_ok gate: True when bar_idx >= 100."""`
			`return bar_idx >= 100`


			`def _vel_div(rng, bar_idx: int) -> float:`
			`"""Generate realistic vel_div; trigger entries occasionally."""`
			`# Every ~30 bars inject a signal below threshold`
			`if bar_idx > 50 and bar_idx % 30 == 0:`
			`return rng.uniform(-0.06, -0.022)`
			`return rng.uniform(-0.015, 0.05)`


			`def build_scan_sequence(n_bars: int, seed: int = 7) -> list:`
			`"""Generate reproducible synthetic scan sequence.`

			`Signal injection strategy:`
			`- Every 50 bars (starting bar 110), run a 10-bar downtrend on all prices`
			`so direction-confirm (dc_lookback_bars=7) sees consistent bearish movement.`
			`- On the 10th bar of the downtrend, fire vel_div = -0.035 (below threshold).`
			`- All other bars: gentle random walk + neutral vel_div.`
			`"""`
			`rng = random.Random(seed)`
			`sequence = []`
			`prices = {sym: 100.0 + rng.uniform(0, 50) for sym in ASSETS}`

			`SIGNAL_CYCLE = 50 # bars between signal attempts`
			`SIGNAL_START = 110 # first signal at bar 110 (well past lookback=100)`
			`TREND_BARS = 10 # bars of forced downtrend before vel_div fires`
			`TREND_DROP = 0.0012 # per-bar drop per asset (~0.12%, enough for dc_magnitude_bps=0.75)`

			`def _in_trend_window(bar_idx):`
			`if bar_idx < SIGNAL_START:`
			`return False, False`
			`offset = (bar_idx - SIGNAL_START) % SIGNAL_CYCLE`
			`in_trend = offset < TREND_BARS`
			`fire_bar = offset == TREND_BARS - 1`
			`return in_trend, fire_bar`

			`for bar_idx in range(n_bars):`
			`in_trend, fire_bar = _in_trend_window(bar_idx)`

			`if in_trend:`
			`# Downtrend: push all prices down slightly`
			`for sym in prices:`
			`prices[sym] *= (1 - TREND_DROP + rng.uniform(-0.0002, 0.0002))`
			`else:`
			`# Normal random walk`
			`for sym in prices:`
			`prices[sym] *= (1 + rng.uniform(-0.002, 0.002))`

			`prices_snap = dict(prices)`

			`# vel_div`
			`if fire_bar:`
			`vd = -0.035 # strong signal`
			`elif in_trend:`
			`vd = -0.010 # mild bearish, not yet threshold`
			`else:`
			`vd = rng.uniform(-0.010, 0.040)`

			`# v50/v750 — negative during trend (consistent with short signal)`
			`if in_trend:`
			`v50 = -0.005 + rng.uniform(-0.001, 0.001)`
			`v750 = -0.002 + rng.uniform(-0.0005, 0.0005)`
			`else:`
			`v50 = rng.uniform(-0.01, 0.01)`
			`v750 = rng.uniform(-0.005, 0.005)`

			`sequence.append({`
			`'bar_idx': bar_idx,`
			`'vel_div': vd,`
			`'prices': prices_snap,`
			`'vol_regime_ok': _vol_ok(bar_idx),`
			`'v50_vel': v50,`
			`'v750_vel': v750,`
			`})`
			`return sequence`


			`_SKIP_KEYS = {'trade_id'} # UUID — differs per instance by design; not functional`


			`def _compare_dicts(label: str, da: dict, db: dict, tol: float = 1e-9) -> list:`
			`"""Compare two dicts (entry or exit sub-dict). Return list of divergence strings."""`
			`divs = []`
			`if da is None and db is None:`
			`return []`
			`if da is None or db is None:`
			`return [f" {label}: A={da!r} B={db!r}"]`

			`all_keys = (set(da) \| set(db)) - _SKIP_KEYS`
			`for k in sorted(all_keys):`
			`va, vb = da.get(k), db.get(k)`
			`if va == vb:`
			`continue`
			`try:`
			`if math.isclose(float(va), float(vb), rel_tol=tol, abs_tol=tol):`
			`continue`
			`except (TypeError, ValueError):`
			`pass`
			`divs.append(f" {label}[{k!r}]: A={va!r} B={vb!r}")`
			`return divs`


			`def _compare_results(idx: int, r_a: dict, r_b: dict, verbose: bool) -> list:`
			`"""Compare two step_bar() return dicts {'exit':..., 'entry':...}."""`
			`divs = []`
			`divs += _compare_dicts('entry', r_a.get('entry'), r_b.get('entry'))`
			`divs += _compare_dicts('exit', r_a.get('exit'), r_b.get('exit'))`

			`if divs and verbose:`
			`print(f"\n[bar {idx}] DIVERGENCE:")`
			`for d in divs:`
			`print(d)`
			`print(f" A: {r_a}")`
			`print(f" B: {r_b}")`
			`elif not divs and verbose:`
			`entry_a = r_a.get('entry')`
			`exit_a = r_a.get('exit')`
			`if entry_a is not None:`
			`print(f"[bar {idx}] MATCH ENTRY asset={entry_a.get('asset')} lev={entry_a.get('leverage'):.3f}")`
			`if exit_a is not None:`
			`print(f"[bar {idx}] MATCH EXIT asset={exit_a.get('asset')} reason={exit_a.get('reason')} pnl={exit_a.get('pnl_pct',0)*100:.4f}%")`

			`return divs`


			`def run_parity(n_bars: int = 300, verbose: bool = False, seed: int = 7):`
			`print(f"Parity harness: n_bars={n_bars} seed={seed}")`
			`print("Creating engine A (reference) ...")`
			`eng_a = create_d_liq_engine(**ENGINE_KWARGS)`
			`print("Creating engine B (dut) ...")`
			`eng_b = create_d_liq_engine(**ENGINE_KWARGS)`

			`sequence = build_scan_sequence(n_bars=n_bars, seed=seed)`
			`print(f"Scan sequence generated: {len(sequence)} bars, {len(ASSETS)} assets")`
			`print("Running step_bar() on both engines with identical inputs ...")`
			`print()`

			`total_divs = 0`
			`diverged_bars = []`
			`n_entries_a = n_exits_a = 0`
			`n_entries_b = n_exits_b = 0`

			`for scan in sequence:`
			`bar_idx = scan['bar_idx']`
			`vel_div = scan['vel_div']`
			`prices = scan['prices']`
			`vol_regime_ok = scan['vol_regime_ok']`
			`v50_vel = scan['v50_vel']`
			`v750_vel = scan['v750_vel']`

			`r_a = eng_a.step_bar(`
			`bar_idx=bar_idx, vel_div=vel_div, prices=prices,`
			`vol_regime_ok=vol_regime_ok, v50_vel=v50_vel, v750_vel=v750_vel,`
			`)`
			`r_b = eng_b.step_bar(`
			`bar_idx=bar_idx, vel_div=vel_div, prices=prices,`
			`vol_regime_ok=vol_regime_ok, v50_vel=v50_vel, v750_vel=v750_vel,`
			`)`

			`if r_a.get('entry') is not None: n_entries_a += 1`
			`if r_a.get('exit') is not None: n_exits_a += 1`
			`if r_b.get('entry') is not None: n_entries_b += 1`
			`if r_b.get('exit') is not None: n_exits_b += 1`

			`divs = _compare_results(bar_idx, r_a, r_b, verbose=verbose)`
			`if divs:`
			`total_divs += 1`
			`diverged_bars.append(bar_idx)`

			`# Summary`
			`print("=" * 60)`
			`print(f"Engine A: {n_entries_a} entries, {n_exits_a} exits")`
			`print(f"Engine B: {n_entries_b} entries, {n_exits_b} exits")`
			`print()`

			`if total_divs == 0 and n_entries_a > 0:`
			`print(f"RESULT: PASS — engines are functionally identical ({n_entries_a} trades exercised).")`
			`elif total_divs == 0 and n_entries_a == 0:`
			`print("RESULT: PASS (no entries exercised — increase --n-bars or check signal injection)")`
			`else:`
			`print(f"RESULT: FAIL — {total_divs} diverged bars: {diverged_bars[:20]}")`
			`sys.exit(1)`


			`def main():`
			`ap = argparse.ArgumentParser(description="BLUE/GREEN engine parity test")`
			`ap.add_argument('--n-bars', type=int, default=300, help='number of bars to replay')`
			`ap.add_argument('--seed', type=int, default=7, help='RNG seed for synthetic scans')`
			`ap.add_argument('--verbose', action='store_true', help='print each bar match/divergence')`
			`args = ap.parse_args()`

			`run_parity(n_bars=args.n_bars, verbose=args.verbose, seed=args.seed)`


			`if __name__ == '__main__':`
			`main()`