DOLPHIN/Observability/esof_gate.py

#!/usr/bin/env python3
"""
DOLPHIN EsoF Gate — Advisory-only (NOT wired into BLUE).

Six gating / modulation strategies derived from EsoF advisory data.
All functions are pure (no side effects, no I/O, no HZ/CH deps).
Import-safe; designed to be wired into nautilus_event_trader.py when ready.

Strategies
──────────
A  LEV_SCALE       Soft leverage reduction on negative advisory score
B  HARD_BLOCK      Block entries on UNFAVORABLE + worst-session combo / Monday
C  DOW_BLOCK       Block Monday only (WR=27.2%, n=81, high confidence)
D  SESSION_BLOCK   Block NY_AFTERNOON only (WR=35.4%, n=127, high confidence)
E  COMBINED        C + D (Monday OR NY_AFTERNOON)
F  S6_BUCKET       EsoF-modulated S6 bucket sizing multipliers (main research target)
   S6_IRP          EsoF-modulated IRP filter thresholds (needs full backtest to evaluate)

S6 Bucket Multiplier Tables
───────────────────────────
Source: prod/docs/CRITICAL_ASSET_PICKING_BRACKETS_VS._ROI_WR_AT_TRADES.md
Base ("NEUTRAL") = Scenario S6 from that doc:
  B3 2.0×, B6 1.5×, B5 0.5×, B0 0.4×, B1 0.3×, B4 0×, B2 0×

EsoF modulates these: favorable → widen selection (higher mult on weak buckets,
allow B4 back at reduced sizing); unfavorable → concentrate (S2-like, B3+B6 only).

Theory: In high-WR periods (FAVORABLE), even weaker buckets (B0/B1/B5) contribute
gross alpha. In low-WR periods (UNFAVORABLE), concentrate on the only reliably
profitable buckets (B3, B6) and minimise drag from the rest.

IRP ARS Constitutive Coefficients (S6_IRP, for reference)
──────────────────────────────────────────────────────────
  ARS = 0.5×log1p(efficiency) + 0.35×alignment − 0.15×noise×1000
  Filter thresholds (gold spec): ALIGNMENT_MIN=0.20, NOISE_MAX=500, LATENCY_MAX=20
  Source: nautilus_dolphin/nautilus/alpha_asset_selector.py

EsoF modulates the thresholds: favorable → relax (more assets qualify);
unfavorable → tighten (only highest-quality assets pass).
This strategy CANNOT be evaluated against existing CH trades — it changes WHICH
asset is selected, requiring a full IRP replay on historical klines.

Online Calibration Protocol (no feedback loop)
──────────────────────────────────────────────
ALWAYS calibrate EsoF tables from ungated BLUE trades only.
NEVER update EsoF expectancy tables using trades that were gated by EsoF.
Running gated trades through the calibration loop creates a positive/negative
feedback that tightens advisory bands until they lose real-world validity.
The baseline BLUE system (no gate) must always run in shadow to accumulate
out-of-sample calibration data.
"""
from __future__ import annotations

import math
from dataclasses import dataclass, field
from typing import Dict, Optional


# ── Known bucket assignments (from bucket_assignments.pkl / ASSET_BUCKETS.md) ─
# Runtime: prefer loading from pkl; this map is the authoritative fallback.
BUCKET_MAP: Dict[str, int] = {
    # B2 — Macro Anchors
    "BTCUSDT": 2, "ETHUSDT": 2,
    # B4 — Blue-Chip Alts (WORST bucket — net-negative even gross)
    "LTCUSDT": 4, "BNBUSDT": 4, "NEOUSDT": 4, "ETCUSDT": 4, "LINKUSDT": 4,
    # B0 — Mid-Vol Established Alts (fee-drag losers, gross-positive)
    "ONGUSDT": 0, "WANUSDT": 0, "ONTUSDT": 0, "MTLUSDT": 0, "BANDUSDT": 0,
    "TFUELUSDT": 0, "ICXUSDT": 0, "QTUMUSDT": 0, "RVNUSDT": 0, "XTZUSDT": 0,
    "VETUSDT": 0, "COSUSDT": 0, "HOTUSDT": 0, "STXUSDT": 0,
    # B5 — Low-BTC-Relevance Alts (gross-positive, large fee victim)
    "TRXUSDT": 5, "IOSTUSDT": 5, "CVCUSDT": 5, "BATUSDT": 5, "ATOMUSDT": 5,
    "ANKRUSDT": 5, "IOTAUSDT": 5, "CHZUSDT": 5, "ALGOUSDT": 5, "DUSKUSDT": 5,
    # B3 — High-Vol Alts (STAR bucket — only structurally profitable)
    "WINUSDT": 3, "ADAUSDT": 3, "ENJUSDT": 3, "ZILUSDT": 3, "DOGEUSDT": 3,
    "DENTUSDT": 3, "THETAUSDT": 3, "ONEUSDT": 3,
    # B1 — Extreme Low-Corr (marginal, fee-drag)
    "DASHUSDT": 1, "XRPUSDT": 1, "XLMUSDT": 1, "CELRUSDT": 1, "ZECUSDT": 1,
    "HBARUSDT": 1, "FUNUSDT": 1,
    # B6 — Extreme Vol Mid-Corr (good, small sample)
    "ZRXUSDT": 6, "FETUSDT": 6,
}


def get_bucket(asset: str, pkl_assignments: Optional[Dict[str, int]] = None) -> int:
    """Resolve bucket_id for asset. Prefers pkl_assignments over built-in map."""
    if pkl_assignments and asset in pkl_assignments:
        return pkl_assignments[asset]
    return BUCKET_MAP.get(asset, 0)  # B0 fallback for unknown assets


# ── S6 bucket multiplier tables keyed by advisory_label ───────────────────────
#
# Base (NEUTRAL) = Scenario S6 from CRITICAL_ASSET_PICKING doc:
#   B3 2.0×  B6 1.5×  B5 0.5×  B0 0.4×  B1 0.3×  B4 0×  B2 0×
#
# FAVORABLE / MILD_POSITIVE → wider selection: more assets qualify,
#   even B4 re-admitted at very low sizing (0.2×) because in high-WR periods
#   even B4's 34.8% WR is partially redeemed by signal quality uplift.
#
# MILD_NEGATIVE / UNFAVORABLE → concentrate: pull back to S2-like config
#   (B3+B6 only) to minimise drag during periods where signal quality degrades.

S6_MULT: Dict[str, Dict[int, float]] = {
    #             B0    B1    B2    B3    B4    B5    B6
    "FAVORABLE":     {0: 0.65, 1: 0.50, 2: 0.0, 3: 2.0, 4: 0.20, 5: 0.75, 6: 1.5},
    "MILD_POSITIVE": {0: 0.50, 1: 0.35, 2: 0.0, 3: 2.0, 4: 0.10, 5: 0.60, 6: 1.5},
    # UNKNOWN replaces NEUTRAL (constituent signals in conflict — empirically the
    # worst-ROI state). Keep NEUTRAL as alias so historical CH replays still resolve.
    "UNKNOWN":       {0: 0.40, 1: 0.30, 2: 0.0, 3: 2.0, 4: 0.0,  5: 0.50, 6: 1.5},
    "NEUTRAL":       {0: 0.40, 1: 0.30, 2: 0.0, 3: 2.0, 4: 0.0,  5: 0.50, 6: 1.5},
    "MILD_NEGATIVE": {0: 0.20, 1: 0.20, 2: 0.0, 3: 1.5, 4: 0.0,  5: 0.30, 6: 1.2},
    "UNFAVORABLE":   {0: 0.0,  1: 0.0,  2: 0.0, 3: 1.5, 4: 0.0,  5: 0.0,  6: 1.2},
}

# Base S6 — UNKNOWN/NEUTRAL rows above are identical (alias)
S6_BASE: Dict[int, float] = S6_MULT["UNKNOWN"]


# ── IRP filter threshold tables keyed by advisory_label (Strategy S6_IRP) ─────
# Gold spec (NEUTRAL): ALIGNMENT_MIN=0.20, NOISE_MAX=500, LATENCY_MAX=20
# Widening during FAVORABLE: more assets pass IRP → wider selection surface
# Tightening during UNFAVORABLE: only highest-quality assets enter

IRP_PARAMS: Dict[str, Dict[str, float]] = {
    "FAVORABLE":     {"alignment_min": 0.15, "noise_max": 640.0, "latency_max": 24},
    "MILD_POSITIVE": {"alignment_min": 0.17, "noise_max": 560.0, "latency_max": 22},
    "UNKNOWN":       {"alignment_min": 0.20, "noise_max": 500.0, "latency_max": 20},
    "NEUTRAL":       {"alignment_min": 0.20, "noise_max": 500.0, "latency_max": 20},  # alias
    "MILD_NEGATIVE": {"alignment_min": 0.22, "noise_max": 440.0, "latency_max": 18},
    "UNFAVORABLE":   {"alignment_min": 0.25, "noise_max": 380.0, "latency_max": 15},
}

# Gold-spec thresholds (UNKNOWN/NEUTRAL row)
IRP_GOLD: Dict[str, float] = IRP_PARAMS["UNKNOWN"]


# ── GateResult ─────────────────────────────────────────────────────────────────

@dataclass
class GateResult:
    action:    str    # 'ALLOW' | 'BLOCK' | 'SCALE'
    lev_mult:  float  # leverage multiplier: 1.0=no change, 0=block, 0.5=halve
    reason:    str    # human-readable label for logging
    s6_mult:   Dict[int, float] = field(default_factory=lambda: dict(S6_BASE))
    irp_params: Dict[str, float] = field(default_factory=lambda: dict(IRP_GOLD))

    @property
    def is_blocked(self) -> bool:
        return self.action == 'BLOCK' or self.lev_mult == 0.0


# ── Strategy implementations ───────────────────────────────────────────────────

def strategy_A_lev_scale(adv: dict) -> GateResult:
    """
    Strategy A — LEV_SCALE
    Soft leverage reduction proportional to advisory label.
    Never boosts beyond gold spec (no mult > 1.0).
    """
    label = adv["advisory_label"]
    mult_map = {
        "UNFAVORABLE":    0.50,
        "MILD_NEGATIVE":  0.75,
        "UNKNOWN":        1.00,
        "NEUTRAL":        1.00,  # alias — historical CH replays
        "MILD_POSITIVE":  1.00,
        "FAVORABLE":      1.00,
    }
    mult = mult_map.get(label, 1.0)
    action = "SCALE" if mult < 1.0 else "ALLOW"
    return GateResult(action=action, lev_mult=mult,
                      reason=f"A_LEV_SCALE({label},{mult:.2f}x)")


def strategy_B_hard_block(adv: dict) -> GateResult:
    """
    Strategy B — HARD_BLOCK
    Block entry when UNFAVORABLE in the two worst sessions.
    Monday: reduce to 60% (WR=27.2%, not blocking entirely to maintain diversity).
    """
    label   = adv["advisory_label"]
    session = adv["session"]
    dow     = adv["dow"]

    BAD_SESSIONS = {"NY_AFTERNOON", "LOW_LIQUIDITY"}

    if label == "UNFAVORABLE" and session in BAD_SESSIONS:
        return GateResult("BLOCK", 0.0,
                          f"B_HARD_BLOCK(UNFAVORABLE+{session})")
    if dow == 0:  # Monday
        return GateResult("SCALE", 0.60,
                          "B_HARD_BLOCK(Monday,0.60x)")
    return GateResult("ALLOW", 1.0, "B_HARD_BLOCK(ALLOW)")


def strategy_C_dow_block(adv: dict) -> GateResult:
    """
    Strategy C — DOW_BLOCK
    Block ALL entries on Monday (WR=27.2%, n=81, most robust single signal).
    """
    if adv["dow"] == 0:
        return GateResult("BLOCK", 0.0, "C_DOW_BLOCK(Monday)")
    return GateResult("ALLOW", 1.0, "C_DOW_BLOCK(ALLOW)")


def strategy_D_session_block(adv: dict) -> GateResult:
    """
    Strategy D — SESSION_BLOCK
    Block ALL entries during NY_AFTERNOON (WR=35.4%, n=127, net=-$3,857).
    """
    if adv["session"] == "NY_AFTERNOON":
        return GateResult("BLOCK", 0.0, "D_SESSION_BLOCK(NY_AFTERNOON)")
    return GateResult("ALLOW", 1.0, "D_SESSION_BLOCK(ALLOW)")


def strategy_E_combined(adv: dict) -> GateResult:
    """
    Strategy E — COMBINED
    Block Monday OR NY_AFTERNOON. The two highest-confidence single-factor signals.
    Together they cover 208 of 637 trades at WR=32.2% (heavy combined drag).
    """
    if adv["dow"] == 0:
        return GateResult("BLOCK", 0.0, "E_COMBINED(Monday)")
    if adv["session"] == "NY_AFTERNOON":
        return GateResult("BLOCK", 0.0, "E_COMBINED(NY_AFTERNOON)")
    return GateResult("ALLOW", 1.0, "E_COMBINED(ALLOW)")


def strategy_F_s6_bucket(adv: dict) -> GateResult:
    """
    Strategy F — S6_BUCKET_MODULATION (primary research target)

    Returns EsoF-modulated S6 bucket multipliers.
    The ALLOW action + lev_mult=1.0 means: use the returned s6_mult table
    to scale position size per bucket at the routing layer.

    During FAVORABLE: widen selection (B4 back at 0.2×, B5/B0/B1 boosted)
    During NEUTRAL:   base S6 (gold scenario from CRITICAL_ASSET_PICKING doc)
    During UNFAVORABLE: concentrate (B3+B6 only, S2-like)

    IMPORTANT: This strategy cannot be evaluated against historical PnL alone
    because it changes WHICH trades occur (more/fewer assets qualify at the
    routing layer). The counterfactual below assumes the SAME trades execute
    with scaled sizing — a lower-bound estimate of the real effect.
    """
    label = adv["advisory_label"]
    mults = S6_MULT.get(label, S6_BASE)
    params = IRP_PARAMS.get(label, IRP_GOLD)
    return GateResult("ALLOW", 1.0,
                      f"F_S6_BUCKET({label})",
                      s6_mult=dict(mults),
                      irp_params=dict(params))


# ── Unified dispatcher ─────────────────────────────────────────────────────────

STRATEGY_NAMES = {
    "A": "A_LEV_SCALE",
    "B": "B_HARD_BLOCK",
    "C": "C_DOW_BLOCK",
    "D": "D_SESSION_BLOCK",
    "E": "E_COMBINED",
    "F": "F_S6_BUCKET",
}

_STRATEGY_FNS = {
    "A": strategy_A_lev_scale,
    "B": strategy_B_hard_block,
    "C": strategy_C_dow_block,
    "D": strategy_D_session_block,
    "E": strategy_E_combined,
    "F": strategy_F_s6_bucket,
}


def apply_gate(strategy: str, advisory: dict) -> GateResult:
    """
    Apply a named gate strategy to an advisory dict.

    Args:
        strategy:  Key from STRATEGY_NAMES ('A'..'F')
        advisory:  Dict returned by compute_esof() from esof_advisor.py

    Returns:
        GateResult with action, lev_mult, reason, s6_mult, irp_params.

    Raises:
        KeyError: if strategy key is unknown.
    """
    fn = _STRATEGY_FNS.get(strategy)
    if fn is None:
        raise KeyError(f"Unknown strategy '{strategy}'. Valid: {list(_STRATEGY_FNS)}")
    return fn(advisory)


def get_s6_mult(advisory: dict, bucket_id: int) -> float:
    """Convenience: return S6 bucket multiplier for a specific advisory + bucket."""
    label = advisory["advisory_label"]
    return S6_MULT.get(label, S6_BASE).get(bucket_id, 0.4)


def get_irp_params(advisory: dict) -> Dict[str, float]:
    """Convenience: return IRP filter params for a specific advisory."""
    return dict(IRP_PARAMS.get(advisory["advisory_label"], IRP_GOLD))