siloqy/prod/clean_arch/violet/parity_harness.py

"""VIOLET V3d: base-sizer parity harness vs recorded live BLUE.

Validates that the V3a ``VioletBetSizer`` reproduces BLUE's BASE conviction curve,
measured against recorded ``dolphin.trade_events`` (vel_div_entry -> leverage).

Per-trade ``leverage`` is NOT base alone — it is ``base_sizer(vel_div) +- modulation``
(SC-haircut / ACB / OB-cascade / "gold"; see
prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md). The modulation is a
DEFERRED organ. So the parity GATE here is the BASE / MEDIAN curve: per vel_div bin, the
recorded MEDIAN leverage must track the base sizer's conviction at the bin midpoint —
proving L1 reproduces BLUE's central tendency. Per-trade exact parity is expected to be
low (~1/3) by construction and is NOT the gate.
"""

from __future__ import annotations

import math
from typing import Dict, List, Optional, Tuple

from pydantic import Field

from .alpha_wrappers import VioletBetSizer
from .domain import StrictModel


class BinParity(StrictModel):
    vd_bin: float
    n: int = Field(ge=1)
    recorded_median_leverage: float = Field(ge=0.0, allow_inf_nan=False)
    base_conviction: float = Field(ge=0.0, allow_inf_nan=False)
    abs_err: float = Field(ge=0.0, allow_inf_nan=False)


class ParityReport(StrictModel):
    n_samples: int = Field(ge=0)
    n_bins: int = Field(ge=0)
    max_abs_err: float = Field(ge=0.0, allow_inf_nan=False)
    pearson_r: float = Field(allow_inf_nan=False)
    max_abs_err_budget: float
    pearson_budget: float
    bins: List[BinParity]
    passed: bool


def _median(xs: List[float]) -> float:
    s = sorted(xs)
    k = len(s)
    if k == 0:
        return 0.0
    mid = k // 2
    return s[mid] if k % 2 else 0.5 * (s[mid - 1] + s[mid])


def _pearson(xs: List[float], ys: List[float]) -> float:
    n = len(xs)
    if n < 2:
        return 1.0
    mx, my = sum(xs) / n, sum(ys) / n
    sxy = sum((x - mx) * (y - my) for x, y in zip(xs, ys))
    sxx = sum((x - mx) ** 2 for x in xs)
    syy = sum((y - my) ** 2 for y in ys)
    if sxx <= 0 or syy <= 0:
        return 1.0
    return sxy / math.sqrt(sxx * syy)


def base_curve_parity(
    samples: List[Tuple[float, float]],          # (vel_div, recorded_leverage)
    sizer: VioletBetSizer,
    *,
    bin_width: float = 0.01,
    min_n: int = 8,
    vel_div_threshold: float = -0.02,
    max_abs_err_budget: float = 1.0,
    pearson_budget: float = 0.95,
) -> ParityReport:
    """Bin by vel_div; compare recorded MEDIAN leverage to the base sizer's
    conviction at the bin midpoint. Gate: max bin abs-err <= budget AND
    recorded-vs-base Pearson r >= budget across bins.

    Restricted to the SHORT-signal domain (vd <= vel_div_threshold): outside it
    the base short sizer floors at min_leverage and the recorded trades are
    long-side / edge cases the short base curve does not govern.
    """
    buckets: Dict[float, List[float]] = {}
    for vd, lev in samples:
        if vd > vel_div_threshold:
            continue
        b = round(round(vd / bin_width) * bin_width, 4)
        buckets.setdefault(b, []).append(float(lev))

    bins: List[BinParity] = []
    rec_meds: List[float] = []
    base_vals: List[float] = []
    for b in sorted(buckets):
        levs = buckets[b]
        if len(levs) < min_n:
            continue
        rec_med = _median(levs)
        base = sizer.calculate(capital=1.0, vel_div=b, trade_direction=-1).conviction_leverage
        bins.append(BinParity(
            vd_bin=b, n=len(levs), recorded_median_leverage=rec_med,
            base_conviction=base, abs_err=abs(rec_med - base),
        ))
        rec_meds.append(rec_med)
        base_vals.append(base)

    max_abs_err = max((bp.abs_err for bp in bins), default=0.0)
    r = _pearson(base_vals, rec_meds)
    passed = bool(bins) and max_abs_err <= max_abs_err_budget and r >= pearson_budget
    return ParityReport(
        n_samples=len(samples), n_bins=len(bins),
        max_abs_err=max_abs_err, pearson_r=r,
        max_abs_err_budget=max_abs_err_budget, pearson_budget=pearson_budget,
        bins=bins, passed=passed,
    )


def load_recorded_samples_from_ch(
    *, limit: int = 5000, ch_url: str = "http://localhost:8123/",
    user: str = "dolphin", key: str = "dolphin_ch_2026",
) -> List[Tuple[float, float]]:
    """Pull clean (vel_div_entry, leverage) pairs from recorded BLUE trades."""
    import urllib.request

    sql = (
        "WITH dedup AS (SELECT trade_id, any(vel_div_entry) vd, any(leverage) lev, "
        "any(exit_reason) er, any(bars_held) bh FROM dolphin.trade_events GROUP BY trade_id) "
        "SELECT vd, lev FROM dedup WHERE er!='HIBERNATE_HALT' AND bh>0 AND lev>0 "
        f"LIMIT {int(limit)} FORMAT TSV"
    )
    req = urllib.request.Request(
        ch_url, data=sql.encode(),
        headers={"X-ClickHouse-User": user, "X-ClickHouse-Key": key},
    )
    with urllib.request.urlopen(req, timeout=30) as resp:
        body = resp.read().decode()
    out: List[Tuple[float, float]] = []
    for line in body.splitlines():
        if not line.strip():
            continue
        a, b = line.split("\t")
        out.append((float(a), float(b)))
    return out


def live_blue_sizer() -> VioletBetSizer:
    """The sizer parameterized to live BLUE's BASE curve (pinned 2026-06-13 from the
    recorded median curve: max_leverage 9.0, thr -0.02, extreme -0.05, convexity 3)."""
    return VioletBetSizer(
        base_fraction=0.20, min_leverage=0.5, max_leverage=9.0,
        vel_div_threshold=-0.02, vel_div_extreme=-0.05, leverage_convexity=3.0,
    )
VIOLET V3d: base-sizer parity harness + gate vs recorded BLUE parity_harness.py: median-curve parity of V3a VioletBetSizer vs recorded dolphin.trade_events (vel_div->leverage), restricted to short-signal domain. GATE PASSES on prod host: pearson 0.9998, max_abs_err 0.238 (budget 1.0) over 23 bins -> base conviction sizer reproduces BLUE's central tendency. Per-trade scatter is the deferred SC/ACB/OB/gold modulation layer (separate finding doc). 3 unit + 1 gate green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> 2026-06-13 20:26:08 +02:00			`"""VIOLET V3d: base-sizer parity harness vs recorded live BLUE.`

			Validates that the V3a ``VioletBetSizer`` reproduces BLUE's BASE conviction curve,
			measured against recorded ``dolphin.trade_events`` (vel_div_entry -> leverage).

			Per-trade ``leverage`` is NOT base alone — it is ``base_sizer(vel_div) +- modulation``
			`(SC-haircut / ACB / OB-cascade / "gold"; see`
			`prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md). The modulation is a`
			`DEFERRED organ. So the parity GATE here is the BASE / MEDIAN curve: per vel_div bin, the`
			`recorded MEDIAN leverage must track the base sizer's conviction at the bin midpoint —`
			`proving L1 reproduces BLUE's central tendency. Per-trade exact parity is expected to be`
			`low (~1/3) by construction and is NOT the gate.`
			`"""`

			`from __future__ import annotations`

			`import math`
			`from typing import Dict, List, Optional, Tuple`

			`from pydantic import Field`

			`from .alpha_wrappers import VioletBetSizer`
			`from .domain import StrictModel`


			`class BinParity(StrictModel):`
			`vd_bin: float`
			`n: int = Field(ge=1)`
			`recorded_median_leverage: float = Field(ge=0.0, allow_inf_nan=False)`
			`base_conviction: float = Field(ge=0.0, allow_inf_nan=False)`
			`abs_err: float = Field(ge=0.0, allow_inf_nan=False)`


			`class ParityReport(StrictModel):`
			`n_samples: int = Field(ge=0)`
			`n_bins: int = Field(ge=0)`
			`max_abs_err: float = Field(ge=0.0, allow_inf_nan=False)`
			`pearson_r: float = Field(allow_inf_nan=False)`
			`max_abs_err_budget: float`
			`pearson_budget: float`
			`bins: List[BinParity]`
			`passed: bool`


			`def _median(xs: List[float]) -> float:`
			`s = sorted(xs)`
			`k = len(s)`
			`if k == 0:`
			`return 0.0`
			`mid = k // 2`
			`return s[mid] if k % 2 else 0.5 * (s[mid - 1] + s[mid])`


			`def _pearson(xs: List[float], ys: List[float]) -> float:`
			`n = len(xs)`
			`if n < 2:`
			`return 1.0`
			`mx, my = sum(xs) / n, sum(ys) / n`
			`sxy = sum((x - mx) * (y - my) for x, y in zip(xs, ys))`
			`sxx = sum((x - mx) ** 2 for x in xs)`
			`syy = sum((y - my) ** 2 for y in ys)`
			`if sxx <= 0 or syy <= 0:`
			`return 1.0`
			`return sxy / math.sqrt(sxx * syy)`


			`def base_curve_parity(`
			`samples: List[Tuple[float, float]], # (vel_div, recorded_leverage)`
			`sizer: VioletBetSizer,`
			`*,`
			`bin_width: float = 0.01,`
			`min_n: int = 8,`
			`vel_div_threshold: float = -0.02,`
			`max_abs_err_budget: float = 1.0,`
			`pearson_budget: float = 0.95,`
			`) -> ParityReport:`
			`"""Bin by vel_div; compare recorded MEDIAN leverage to the base sizer's`
			`conviction at the bin midpoint. Gate: max bin abs-err <= budget AND`
			`recorded-vs-base Pearson r >= budget across bins.`

			`Restricted to the SHORT-signal domain (vd <= vel_div_threshold): outside it`
			`the base short sizer floors at min_leverage and the recorded trades are`
			`long-side / edge cases the short base curve does not govern.`
			`"""`
			`buckets: Dict[float, List[float]] = {}`
			`for vd, lev in samples:`
			`if vd > vel_div_threshold:`
			`continue`
			`b = round(round(vd / bin_width) * bin_width, 4)`
			`buckets.setdefault(b, []).append(float(lev))`

			`bins: List[BinParity] = []`
			`rec_meds: List[float] = []`
			`base_vals: List[float] = []`
			`for b in sorted(buckets):`
			`levs = buckets[b]`
			`if len(levs) < min_n:`
			`continue`
			`rec_med = _median(levs)`
			`base = sizer.calculate(capital=1.0, vel_div=b, trade_direction=-1).conviction_leverage`
			`bins.append(BinParity(`
			`vd_bin=b, n=len(levs), recorded_median_leverage=rec_med,`
			`base_conviction=base, abs_err=abs(rec_med - base),`
			`))`
			`rec_meds.append(rec_med)`
			`base_vals.append(base)`

			`max_abs_err = max((bp.abs_err for bp in bins), default=0.0)`
			`r = _pearson(base_vals, rec_meds)`
			`passed = bool(bins) and max_abs_err <= max_abs_err_budget and r >= pearson_budget`
			`return ParityReport(`
			`n_samples=len(samples), n_bins=len(bins),`
			`max_abs_err=max_abs_err, pearson_r=r,`
			`max_abs_err_budget=max_abs_err_budget, pearson_budget=pearson_budget,`
			`bins=bins, passed=passed,`
			`)`


			`def load_recorded_samples_from_ch(`
			`*, limit: int = 5000, ch_url: str = "http://localhost:8123/",`
			`user: str = "dolphin", key: str = "dolphin_ch_2026",`
			`) -> List[Tuple[float, float]]:`
			`"""Pull clean (vel_div_entry, leverage) pairs from recorded BLUE trades."""`
			`import urllib.request`

			`sql = (`
			`"WITH dedup AS (SELECT trade_id, any(vel_div_entry) vd, any(leverage) lev, "`
			`"any(exit_reason) er, any(bars_held) bh FROM dolphin.trade_events GROUP BY trade_id) "`
			`"SELECT vd, lev FROM dedup WHERE er!='HIBERNATE_HALT' AND bh>0 AND lev>0 "`
			`f"LIMIT {int(limit)} FORMAT TSV"`
			`)`
			`req = urllib.request.Request(`
			`ch_url, data=sql.encode(),`
			`headers={"X-ClickHouse-User": user, "X-ClickHouse-Key": key},`
			`)`
			`with urllib.request.urlopen(req, timeout=30) as resp:`
			`body = resp.read().decode()`
			`out: List[Tuple[float, float]] = []`
			`for line in body.splitlines():`
			`if not line.strip():`
			`continue`
			`a, b = line.split("\t")`
			`out.append((float(a), float(b)))`
			`return out`


			`def live_blue_sizer() -> VioletBetSizer:`
			`"""The sizer parameterized to live BLUE's BASE curve (pinned 2026-06-13 from the`
			`recorded median curve: max_leverage 9.0, thr -0.02, extreme -0.05, convexity 3)."""`
			`return VioletBetSizer(`
			`base_fraction=0.20, min_leverage=0.5, max_leverage=9.0,`
			`vel_div_threshold=-0.02, vel_div_extreme=-0.05, leverage_convexity=3.0,`
			`)`