diff --git a/prod/VIOLET_dev/reports/violet_v3_parity_20260613_181458.json b/prod/VIOLET_dev/reports/violet_v3_parity_20260613_181458.json new file mode 100644 index 0000000..d37577d --- /dev/null +++ b/prod/VIOLET_dev/reports/violet_v3_parity_20260613_181458.json @@ -0,0 +1,172 @@ +{ + "n_samples": 2111, + "n_bins": 23, + "max_abs_err": 0.23778841851851729, + "pearson_r": 0.9998403353285148, + "max_abs_err_budget": 1.0, + "pearson_budget": 0.95, + "bins": [ + { + "vd_bin": -0.35, + "n": 8, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.24, + "n": 8, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.23, + "n": 10, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.22, + "n": 9, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.21, + "n": 8, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.19, + "n": 9, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.18, + "n": 8, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.17, + "n": 8, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.16, + "n": 9, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.15, + "n": 16, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.14, + "n": 11, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.13, + "n": 19, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.12, + "n": 28, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.11, + "n": 17, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.1, + "n": 26, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.09, + "n": 34, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.08, + "n": 53, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.07, + "n": 71, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.06, + "n": 114, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.05, + "n": 162, + "recorded_median_leverage": 9.0, + "base_conviction": 9.0, + "abs_err": 0.0 + }, + { + "vd_bin": -0.04, + "n": 278, + "recorded_median_leverage": 2.7807301000000004, + "base_conviction": 3.0185185185185177, + "abs_err": 0.23778841851851729 + }, + { + "vd_bin": -0.03, + "n": 519, + "recorded_median_leverage": 0.8341213, + "base_conviction": 0.8148148148148147, + "abs_err": 0.019306485185185296 + }, + { + "vd_bin": -0.02, + "n": 396, + "recorded_median_leverage": 0.513395575, + "base_conviction": 0.5, + "abs_err": 0.013395574999999993 + } + ], + "passed": true +} \ No newline at end of file diff --git a/prod/clean_arch/violet/parity_harness.py b/prod/clean_arch/violet/parity_harness.py new file mode 100644 index 0000000..6124de4 --- /dev/null +++ b/prod/clean_arch/violet/parity_harness.py @@ -0,0 +1,153 @@ +"""VIOLET V3d: base-sizer parity harness vs recorded live BLUE. + +Validates that the V3a ``VioletBetSizer`` reproduces BLUE's BASE conviction curve, +measured against recorded ``dolphin.trade_events`` (vel_div_entry -> leverage). + +Per-trade ``leverage`` is NOT base alone — it is ``base_sizer(vel_div) +- modulation`` +(SC-haircut / ACB / OB-cascade / "gold"; see +prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md). The modulation is a +DEFERRED organ. So the parity GATE here is the BASE / MEDIAN curve: per vel_div bin, the +recorded MEDIAN leverage must track the base sizer's conviction at the bin midpoint — +proving L1 reproduces BLUE's central tendency. Per-trade exact parity is expected to be +low (~1/3) by construction and is NOT the gate. +""" + +from __future__ import annotations + +import math +from typing import Dict, List, Optional, Tuple + +from pydantic import Field + +from .alpha_wrappers import VioletBetSizer +from .domain import StrictModel + + +class BinParity(StrictModel): + vd_bin: float + n: int = Field(ge=1) + recorded_median_leverage: float = Field(ge=0.0, allow_inf_nan=False) + base_conviction: float = Field(ge=0.0, allow_inf_nan=False) + abs_err: float = Field(ge=0.0, allow_inf_nan=False) + + +class ParityReport(StrictModel): + n_samples: int = Field(ge=0) + n_bins: int = Field(ge=0) + max_abs_err: float = Field(ge=0.0, allow_inf_nan=False) + pearson_r: float = Field(allow_inf_nan=False) + max_abs_err_budget: float + pearson_budget: float + bins: List[BinParity] + passed: bool + + +def _median(xs: List[float]) -> float: + s = sorted(xs) + k = len(s) + if k == 0: + return 0.0 + mid = k // 2 + return s[mid] if k % 2 else 0.5 * (s[mid - 1] + s[mid]) + + +def _pearson(xs: List[float], ys: List[float]) -> float: + n = len(xs) + if n < 2: + return 1.0 + mx, my = sum(xs) / n, sum(ys) / n + sxy = sum((x - mx) * (y - my) for x, y in zip(xs, ys)) + sxx = sum((x - mx) ** 2 for x in xs) + syy = sum((y - my) ** 2 for y in ys) + if sxx <= 0 or syy <= 0: + return 1.0 + return sxy / math.sqrt(sxx * syy) + + +def base_curve_parity( + samples: List[Tuple[float, float]], # (vel_div, recorded_leverage) + sizer: VioletBetSizer, + *, + bin_width: float = 0.01, + min_n: int = 8, + vel_div_threshold: float = -0.02, + max_abs_err_budget: float = 1.0, + pearson_budget: float = 0.95, +) -> ParityReport: + """Bin by vel_div; compare recorded MEDIAN leverage to the base sizer's + conviction at the bin midpoint. Gate: max bin abs-err <= budget AND + recorded-vs-base Pearson r >= budget across bins. + + Restricted to the SHORT-signal domain (vd <= vel_div_threshold): outside it + the base short sizer floors at min_leverage and the recorded trades are + long-side / edge cases the short base curve does not govern. + """ + buckets: Dict[float, List[float]] = {} + for vd, lev in samples: + if vd > vel_div_threshold: + continue + b = round(round(vd / bin_width) * bin_width, 4) + buckets.setdefault(b, []).append(float(lev)) + + bins: List[BinParity] = [] + rec_meds: List[float] = [] + base_vals: List[float] = [] + for b in sorted(buckets): + levs = buckets[b] + if len(levs) < min_n: + continue + rec_med = _median(levs) + base = sizer.calculate(capital=1.0, vel_div=b, trade_direction=-1).conviction_leverage + bins.append(BinParity( + vd_bin=b, n=len(levs), recorded_median_leverage=rec_med, + base_conviction=base, abs_err=abs(rec_med - base), + )) + rec_meds.append(rec_med) + base_vals.append(base) + + max_abs_err = max((bp.abs_err for bp in bins), default=0.0) + r = _pearson(base_vals, rec_meds) + passed = bool(bins) and max_abs_err <= max_abs_err_budget and r >= pearson_budget + return ParityReport( + n_samples=len(samples), n_bins=len(bins), + max_abs_err=max_abs_err, pearson_r=r, + max_abs_err_budget=max_abs_err_budget, pearson_budget=pearson_budget, + bins=bins, passed=passed, + ) + + +def load_recorded_samples_from_ch( + *, limit: int = 5000, ch_url: str = "http://localhost:8123/", + user: str = "dolphin", key: str = "dolphin_ch_2026", +) -> List[Tuple[float, float]]: + """Pull clean (vel_div_entry, leverage) pairs from recorded BLUE trades.""" + import urllib.request + + sql = ( + "WITH dedup AS (SELECT trade_id, any(vel_div_entry) vd, any(leverage) lev, " + "any(exit_reason) er, any(bars_held) bh FROM dolphin.trade_events GROUP BY trade_id) " + "SELECT vd, lev FROM dedup WHERE er!='HIBERNATE_HALT' AND bh>0 AND lev>0 " + f"LIMIT {int(limit)} FORMAT TSV" + ) + req = urllib.request.Request( + ch_url, data=sql.encode(), + headers={"X-ClickHouse-User": user, "X-ClickHouse-Key": key}, + ) + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read().decode() + out: List[Tuple[float, float]] = [] + for line in body.splitlines(): + if not line.strip(): + continue + a, b = line.split("\t") + out.append((float(a), float(b))) + return out + + +def live_blue_sizer() -> VioletBetSizer: + """The sizer parameterized to live BLUE's BASE curve (pinned 2026-06-13 from the + recorded median curve: max_leverage 9.0, thr -0.02, extreme -0.05, convexity 3).""" + return VioletBetSizer( + base_fraction=0.20, min_leverage=0.5, max_leverage=9.0, + vel_div_threshold=-0.02, vel_div_extreme=-0.05, leverage_convexity=3.0, + ) diff --git a/prod/clean_arch/violet/test_violet_parity.py b/prod/clean_arch/violet/test_violet_parity.py new file mode 100644 index 0000000..b75e68d --- /dev/null +++ b/prod/clean_arch/violet/test_violet_parity.py @@ -0,0 +1,80 @@ +"""V3d: base-sizer median-curve parity vs recorded BLUE (unit + @gate).""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path + +sys.path.insert(0, "/mnt/dolphinng5_predict") + +import pytest + +from prod.clean_arch.violet.parity_harness import ( + ParityReport, base_curve_parity, live_blue_sizer, load_recorded_samples_from_ch, +) + +REPORTS_DIR = Path("/mnt/dolphinng5_predict/prod/VIOLET_dev/reports") + + +def _base_samples(per_bin: int = 12): + """Synthetic samples drawn exactly from the base curve (perfect parity).""" + s = live_blue_sizer() + out = [] + for k in range(2, 13): # vd = -0.02 .. -0.12, all on 0.01 bin centers + vd = -k / 100.0 + base = s.calculate(capital=1.0, vel_div=vd, trade_direction=-1).conviction_leverage + out.extend([(vd, base)] * per_bin) + return out + + +def test_perfect_base_samples_pass(): + rep = base_curve_parity(_base_samples(), live_blue_sizer()) + assert rep.passed + assert rep.max_abs_err == pytest.approx(0.0, abs=1e-6) + assert rep.pearson_r == pytest.approx(1.0, abs=1e-6) + + +def test_median_tracks_base_under_minority_haircuts(): + # majority at base, minority haircut down -> median still tracks base -> passes. + s = live_blue_sizer() + samples = [] + for k in range(2, 13): # vd on 0.01 bin centers + vd = -k / 100.0 + base = s.calculate(capital=1.0, vel_div=vd, trade_direction=-1).conviction_leverage + samples.extend([(vd, base)] * 8) # 8 at base + samples.extend([(vd, base * 0.3)] * 3) # 3 haircut (minority) + rep = base_curve_parity(samples, s) + assert rep.passed + + +def test_decorrelated_noise_fails_gate(): + # leverage independent of vel_div -> low correlation -> gate fails (guard works). + import random + rng = random.Random(7) + samples = [(vd / 1000.0, rng.uniform(0.5, 9.0)) + for vd in range(-60, -1) for _ in range(12)] + rep = base_curve_parity(samples, live_blue_sizer()) + assert not rep.passed + + +@pytest.mark.gate +def test_base_curve_parity_vs_recorded_blue(): + """GATE (prod host): the V3a base sizer reproduces BLUE's recorded median curve.""" + samples = load_recorded_samples_from_ch(limit=5000) + assert len(samples) >= 500, f"too few recorded samples: {len(samples)}" + rep = base_curve_parity(samples, live_blue_sizer()) + + REPORTS_DIR.mkdir(parents=True, exist_ok=True) + ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + (REPORTS_DIR / f"violet_v3_parity_{ts}.json").write_text( + json.dumps(rep.model_dump(), indent=2, default=str)) + + assert isinstance(rep, ParityReport) + assert rep.n_bins >= 5 + assert rep.passed, ( + f"base-curve parity FAILED: max_abs_err={rep.max_abs_err:.3f} " + f"(budget {rep.max_abs_err_budget}), pearson={rep.pearson_r:.3f} " + f"(budget {rep.pearson_budget})" + ) diff --git a/prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md b/prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md new file mode 100644 index 0000000..24b1b6f --- /dev/null +++ b/prod/docs/VIOLET_FINDING__MODULATION_LAYER_VS_UNDERUTILIZATION.md @@ -0,0 +1,77 @@ +# VIOLET Finding — The Sizing-Modulation Layer vs. the Capital-Under-Utilization Result + +**Status:** Finding / caveat, 2026-06-13. Separate doc by operator request. Qualifies +(does NOT overturn) the [[blue_margin_envelope_study]] and constrains the +`VIOLET_STUDY_SPEC__BASE_FRACTION_SIZING.md` (#3) and any pre-V4 sizing work. + +## TL;DR + +The newly-isolated downstream **sizing-modulation layer** (SC-haircut / ACB / OB-cascade +/ "gold" — the organs that ride along ACBv6) does **not** counter the margin study: that +study used **actual recorded notionals**, which are already post-modulation. But it +**sharpens** what "under-utilized capital" means, and makes the modulation layer a +**required** pre-execution component for VIOLET — not optional. + +## 1. Why the margin study is NOT contradicted + +The margin/viability study computed `our_leverage = entry_price·quantity / capital_before` +from `dolphin.trade_events` — i.e. from the **notionals BLUE actually traded**, haircuts +and boosts already baked in. The modulation is therefore *inside* the measured numbers, +not a missed factor. Conclusions stand on the as-traded data: + +- max realized `our_leverage` ≈ **1.81** +- **100%** of trades margin-feasible at **2×** exchange leverage +- **+$47k** clean deduped edge + +## 2. How recorded leverage decomposes (the discovery) + +Recorded conviction `leverage` is **NOT** the base bet-sizer output alone: + +``` +recorded_leverage(trade) = base_sizer(vel_div) ± modulation(SC, ACB, OB, gold) +``` + +- **base_sizer(vel_div)** — cubic-convex strength³ curve; VIOLET's V3a `VioletBetSizer` + reproduces it (validated on binned MEDIANS: vd −0.03→0.83, −0.04→2.78, −0.05→9.0 with + max_leverage=9 / thr −0.02 / extreme −0.05 / convexity 3). +- **modulation** — mostly **downward haircuts** (mins far below the median per vel_div + bin), with some upward boosts to the 9 cap. This is the per-trade scatter that drops + exact per-trade parity to ~34% while the median curve matches. + +## 3. The sharpening (this is the load-bearing part) + +The study's **median wallet utilization was ~6.8%** (`our_leverage` p50 ≈ 0.068). That +low median is **largely the haircut layer at work** — BLUE deliberately sized most trades +far below the base curve. The **max 1.81** is the *un-haircut, base-saturated* tail. + +Consequences: + +1. **Margin feasibility is robust** even if the haircuts are later dropped: the study's + worst case (1.81) already equals the base-saturated value, and 2× covers it. Sizing at + full base does not break margin. +2. **"Under-utilized ⇒ free ROI" is now GUARDED.** Much of the idle ~93% is **not waste** + — it is the modulation layer **deliberately de-risking** (cutting size on + lower-conviction / higher-risk setups). It is risk knowledge encoded as size. + +## 4. Binding implications + +- **#3 base-fraction study MUST NOT read the 6.8% median as free headroom.** A large part + of it is risk-management. Any base-fraction increase must be evaluated *with* the + modulation layer modeled, conditioned on the regime-robustness work — never by naively + reclaiming the idle capital. +- **The modulation layer is REQUIRED before live execution (V4).** VIOLET running the + **base sizer alone** would size *bigger than BLUE did* on exactly the trades BLUE chose + to haircut → utilization jumps from ~7% median toward the base, and **realized + risk/return diverges from the recorded +$47k** (more variance, possibly worse + risk-adjusted). Median-curve base parity (V3d) is necessary but **not sufficient** for + faithful BLUE replication. +- **New deferred task (pre-V4):** wrap the SC-haircut / ACB / OB-cascade / "gold" + modulation organs as a VIOLET sizing-modulation layer on top of the base + `VioletBetSizer`, with its own per-trade parity gate vs recorded `leverage`. + +## 5. Related + +[[blue_margin_envelope_study]] · [[violet_v3_alpha_doctrine]] (#9 keystone, #11 parity +finding) · `VIOLET_STUDY_SPEC__BASE_FRACTION_SIZING.md` · `prod/clean_arch/violet/alpha_wrappers.py` +(base sizer) · live organs in `prod/nautilus_event_trader.py` +(`_apply_sc_entry_size_multiplier`, `_record_sc_haircut`, `AdaptiveCircuitBreaker`).