725 lines
30 KiB
Python
725 lines
30 KiB
Python
|
|
"""
|
|||
|
|
prod/tests/test_mc_scenarios.py
|
|||
|
|
================================
|
|||
|
|
Monte Carlo + fuzz analysis of bucket-routing scenarios S1–S6.
|
|||
|
|
|
|||
|
|
Three test layers:
|
|||
|
|
1. Bootstrap MC (10 K draws) — confidence envelopes per scenario
|
|||
|
|
2. Multiplier fuzzer (5 K random configs) — S6 sensitivity / Pareto frontier
|
|||
|
|
3. Sequence fuzzer (2 K permutations) — order-independence of S6 edge
|
|||
|
|
|
|||
|
|
Run:
|
|||
|
|
python -m pytest prod/tests/test_mc_scenarios.py -v --category monte_carlo
|
|||
|
|
# or standalone (generates full report):
|
|||
|
|
python prod/tests/test_mc_scenarios.py
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import math
|
|||
|
|
import pickle
|
|||
|
|
import random
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
import urllib.request
|
|||
|
|
import base64
|
|||
|
|
from collections import defaultdict
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
# ── Paths ────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
ROOT = Path(__file__).parent.parent.parent
|
|||
|
|
BUCKET_PKL = ROOT / "adaptive_exit" / "models" / "bucket_assignments.pkl"
|
|||
|
|
RESULTS_DIR = Path(__file__).parent / "mc_results"
|
|||
|
|
RESULTS_DIR.mkdir(exist_ok=True)
|
|||
|
|
|
|||
|
|
CH_URL = "http://localhost:8123/?database=dolphin"
|
|||
|
|
CH_AUTH = base64.b64encode(b"dolphin:dolphin_ch_2026").decode()
|
|||
|
|
|
|||
|
|
START_CAPITAL = 25_000.0
|
|||
|
|
N_BOOTSTRAP = 10_000
|
|||
|
|
N_FUZZ = 5_000
|
|||
|
|
N_PERMUTE = 2_000
|
|||
|
|
SEED = 42
|
|||
|
|
|
|||
|
|
# ── Scenario definitions ─────────────────────────────────────────────────────
|
|||
|
|
# Each entry: (label, {bucket: multiplier}, exclude_set)
|
|||
|
|
# Omitted buckets default to mult=1.0; excluded buckets get mult=0.0.
|
|||
|
|
|
|||
|
|
SCENARIOS = {
|
|||
|
|
"Baseline": ({}, set()),
|
|||
|
|
"S1_B3only":({3: 1.0}, {0,1,2,4,5,6}),
|
|||
|
|
"S2_B3B6": ({3: 1.0, 6: 1.0}, {0,1,2,4,5}),
|
|||
|
|
"S3_KillB4_HalveRest": ({0:.5, 1:.5, 3:1.0, 5:.5, 6:1.0}, {4}),
|
|||
|
|
"S5_KillB4B1_HalveB0B5":({0:.5, 3:1.0, 5:.5, 6:1.0}, {1,4}),
|
|||
|
|
"S4_KillB4_Halve_2xB3": ({0:.5, 1:.5, 3:2.0, 5:.5, 6:1.0}, {4}),
|
|||
|
|
"S6_Tiered":({0:.4, 1:.3, 3:2.0, 5:.5, 6:1.5}, {4}),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# ── Data loading ──────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _ch_fetch(sql: str) -> str:
|
|||
|
|
req = urllib.request.Request(CH_URL, data=sql.encode(),
|
|||
|
|
headers={"Authorization": f"Basic {CH_AUTH}"})
|
|||
|
|
with urllib.request.urlopen(req, timeout=10) as r:
|
|||
|
|
return r.read().decode().strip()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_trades() -> list[dict]:
|
|||
|
|
"""
|
|||
|
|
Load non-HIBERNATE_HALT trades from CH, tagged with KMeans bucket_id.
|
|||
|
|
Falls back to /tmp/trades_for_scenario.tsv if CH is unreachable.
|
|||
|
|
"""
|
|||
|
|
with open(BUCKET_PKL, "rb") as f:
|
|||
|
|
bucket_map = pickle.load(f)["assignments"] # asset → int
|
|||
|
|
|
|||
|
|
rows = []
|
|||
|
|
try:
|
|||
|
|
tsv = _ch_fetch(
|
|||
|
|
"SELECT asset, pnl, pnl_pct, leverage, exit_reason "
|
|||
|
|
"FROM trade_events "
|
|||
|
|
"WHERE exit_reason != 'HIBERNATE_HALT' "
|
|||
|
|
"ORDER BY ts ASC "
|
|||
|
|
"FORMAT TabSeparated"
|
|||
|
|
)
|
|||
|
|
for line in tsv.splitlines():
|
|||
|
|
parts = line.split("\t")
|
|||
|
|
if len(parts) < 5:
|
|||
|
|
continue
|
|||
|
|
asset, pnl, pnl_pct, lev, exit_reason = parts
|
|||
|
|
b = bucket_map.get(asset)
|
|||
|
|
if b is None:
|
|||
|
|
continue
|
|||
|
|
rows.append({
|
|||
|
|
"asset": asset,
|
|||
|
|
"pnl": float(pnl),
|
|||
|
|
"pnl_pct": float(pnl_pct),
|
|||
|
|
"leverage": float(lev),
|
|||
|
|
"exit_reason": exit_reason,
|
|||
|
|
"bucket": b,
|
|||
|
|
})
|
|||
|
|
except Exception as e:
|
|||
|
|
# Fallback: use the TSV snapshot generated earlier this session
|
|||
|
|
fallback = Path("/tmp/trades_for_scenario.tsv")
|
|||
|
|
if not fallback.exists():
|
|||
|
|
raise RuntimeError(f"CH unavailable ({e}) and no TSV fallback found") from e
|
|||
|
|
import csv
|
|||
|
|
with open(fallback) as f:
|
|||
|
|
reader = csv.DictReader(
|
|||
|
|
f, fieldnames=["asset","pnl","pnl_pct","leverage","exit_reason","ts"],
|
|||
|
|
delimiter="\t",
|
|||
|
|
)
|
|||
|
|
for r in reader:
|
|||
|
|
if r["exit_reason"] == "HIBERNATE_HALT":
|
|||
|
|
continue
|
|||
|
|
b = bucket_map.get(r["asset"])
|
|||
|
|
if b is None:
|
|||
|
|
continue
|
|||
|
|
rows.append({
|
|||
|
|
"asset": r["asset"],
|
|||
|
|
"pnl": float(r["pnl"]),
|
|||
|
|
"pnl_pct": float(r["pnl_pct"]),
|
|||
|
|
"leverage": float(r["leverage"]),
|
|||
|
|
"exit_reason": r["exit_reason"],
|
|||
|
|
"bucket": b,
|
|||
|
|
})
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def apply_scenario(
|
|||
|
|
pnl_array: np.ndarray,
|
|||
|
|
bucket_array: np.ndarray,
|
|||
|
|
mults: dict,
|
|||
|
|
exclude: set,
|
|||
|
|
) -> np.ndarray:
|
|||
|
|
"""Apply bucket multipliers to a (n,) or (sims, n) PnL array."""
|
|||
|
|
out = pnl_array.copy().astype(float)
|
|||
|
|
for b in range(7):
|
|||
|
|
mask = bucket_array == b
|
|||
|
|
if b in exclude:
|
|||
|
|
out[..., mask] = 0.0
|
|||
|
|
elif b in mults:
|
|||
|
|
out[..., mask] *= mults[b]
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Simulation core ───────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _max_dd_vectorized(capital_curves: np.ndarray) -> np.ndarray:
|
|||
|
|
"""
|
|||
|
|
capital_curves: (n_sim, n_trades+1) including START as col 0.
|
|||
|
|
Returns max drawdown % per simulation.
|
|||
|
|
"""
|
|||
|
|
running_max = np.maximum.accumulate(capital_curves, axis=1)
|
|||
|
|
dd = (running_max - capital_curves) / running_max * 100
|
|||
|
|
return dd.max(axis=1)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _sortino(pnl_matrix: np.ndarray) -> np.ndarray:
|
|||
|
|
"""Sortino per simulation: mean / downside_std (annot: no rf rate)."""
|
|||
|
|
means = pnl_matrix.mean(axis=1)
|
|||
|
|
neg = np.where(pnl_matrix < 0, pnl_matrix, 0.0)
|
|||
|
|
dstd = np.sqrt((neg ** 2).mean(axis=1))
|
|||
|
|
with np.errstate(divide="ignore", invalid="ignore"):
|
|||
|
|
return np.where(dstd > 0, means / dstd, 0.0)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def bootstrap_scenario(
|
|||
|
|
pnl_vec: np.ndarray,
|
|||
|
|
bucket_vec: np.ndarray,
|
|||
|
|
mults: dict,
|
|||
|
|
exclude: set,
|
|||
|
|
n_sim: int = N_BOOTSTRAP,
|
|||
|
|
rng: np.random.Generator = None,
|
|||
|
|
) -> dict:
|
|||
|
|
"""
|
|||
|
|
Bootstrap (resample with replacement) MC for one scenario.
|
|||
|
|
Returns a dict of metric arrays, each shape (n_sim,).
|
|||
|
|
"""
|
|||
|
|
if rng is None:
|
|||
|
|
rng = np.random.default_rng(SEED)
|
|||
|
|
|
|||
|
|
n = len(pnl_vec)
|
|||
|
|
idx = rng.integers(0, n, size=(n_sim, n)) # (n_sim, n)
|
|||
|
|
raw = pnl_vec[idx] # (n_sim, n)
|
|||
|
|
bkts = bucket_vec[idx] # (n_sim, n)
|
|||
|
|
|
|||
|
|
# Apply scenario multipliers per simulation
|
|||
|
|
sim_pnl = raw.copy().astype(float)
|
|||
|
|
for b in range(7):
|
|||
|
|
mask = bkts == b
|
|||
|
|
if b in exclude:
|
|||
|
|
sim_pnl[mask] = 0.0
|
|||
|
|
elif b in mults:
|
|||
|
|
sim_pnl[mask] *= mults[b]
|
|||
|
|
|
|||
|
|
caps = START_CAPITAL + np.cumsum(sim_pnl, axis=1) # (n_sim, n)
|
|||
|
|
curves = np.concatenate(
|
|||
|
|
[np.full((n_sim, 1), START_CAPITAL), caps], axis=1
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
final = caps[:, -1]
|
|||
|
|
roi = (final - START_CAPITAL) / START_CAPITAL * 100
|
|||
|
|
max_dd = _max_dd_vectorized(curves)
|
|||
|
|
means = sim_pnl.mean(axis=1)
|
|||
|
|
stds = sim_pnl.std(axis=1)
|
|||
|
|
with np.errstate(divide="ignore", invalid="ignore"):
|
|||
|
|
sharpe = np.where(stds > 0, means / stds, 0.0)
|
|||
|
|
sortino = _sortino(sim_pnl)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"final": final,
|
|||
|
|
"roi": roi,
|
|||
|
|
"max_dd": max_dd,
|
|||
|
|
"sharpe": sharpe,
|
|||
|
|
"sortino": sortino,
|
|||
|
|
"n_trades": n_sim,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def summarise(arr: np.ndarray, name: str = "") -> dict:
|
|||
|
|
pcts = np.percentile(arr, [5, 10, 25, 50, 75, 90, 95])
|
|||
|
|
return {
|
|||
|
|
"name": name,
|
|||
|
|
"mean": float(arr.mean()),
|
|||
|
|
"std": float(arr.std()),
|
|||
|
|
"p5": float(pcts[0]),
|
|||
|
|
"p10": float(pcts[1]),
|
|||
|
|
"p25": float(pcts[2]),
|
|||
|
|
"p50": float(pcts[3]),
|
|||
|
|
"p75": float(pcts[4]),
|
|||
|
|
"p90": float(pcts[5]),
|
|||
|
|
"p95": float(pcts[6]),
|
|||
|
|
"min": float(arr.min()),
|
|||
|
|
"max": float(arr.max()),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Fuzzer ────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
# Bounds for each bucket multiplier in the fuzzer
|
|||
|
|
FUZZ_BOUNDS = {
|
|||
|
|
0: (0.0, 0.8), # B0
|
|||
|
|
1: (0.0, 0.6), # B1
|
|||
|
|
2: (0.0, 0.0), # B2 — always 0 (not traded)
|
|||
|
|
3: (1.0, 3.5), # B3 — core alpha, always ≥ 1
|
|||
|
|
4: (0.0, 0.0), # B4 — always 0 (structural loser)
|
|||
|
|
5: (0.0, 1.2), # B5
|
|||
|
|
6: (0.5, 2.5), # B6
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def fuzz_multipliers(
|
|||
|
|
pnl_vec: np.ndarray,
|
|||
|
|
bucket_vec: np.ndarray,
|
|||
|
|
n_fuzz: int = N_FUZZ,
|
|||
|
|
seed: int = SEED,
|
|||
|
|
) -> list[dict]:
|
|||
|
|
"""
|
|||
|
|
Random-search the multiplier space. Deterministic (no bootstrap) —
|
|||
|
|
applies each config to the full trade sequence. Returns list of
|
|||
|
|
result dicts sorted by Sharpe descending.
|
|||
|
|
"""
|
|||
|
|
rng = random.Random(seed)
|
|||
|
|
results = []
|
|||
|
|
|
|||
|
|
for _ in range(n_fuzz):
|
|||
|
|
mults = {}
|
|||
|
|
for b, (lo, hi) in FUZZ_BOUNDS.items():
|
|||
|
|
if lo == hi:
|
|||
|
|
mults[b] = lo
|
|||
|
|
else:
|
|||
|
|
mults[b] = lo + rng.random() * (hi - lo)
|
|||
|
|
|
|||
|
|
scaled = apply_scenario(pnl_vec, bucket_vec, mults, exclude=set())
|
|||
|
|
caps = START_CAPITAL + np.cumsum(scaled)
|
|||
|
|
curve = np.concatenate([[START_CAPITAL], caps])
|
|||
|
|
final = caps[-1]
|
|||
|
|
roi = (final - START_CAPITAL) / START_CAPITAL * 100
|
|||
|
|
run_mx = np.maximum.accumulate(curve)
|
|||
|
|
max_dd = ((run_mx - curve) / run_mx * 100).max()
|
|||
|
|
mean = scaled.mean()
|
|||
|
|
std = scaled.std()
|
|||
|
|
sharpe = mean / std if std > 0 else 0.0
|
|||
|
|
neg = scaled[scaled < 0]
|
|||
|
|
dstd = math.sqrt((neg**2).mean()) if len(neg) else 0.0
|
|||
|
|
sortino = mean / dstd if dstd > 0 else 0.0
|
|||
|
|
|
|||
|
|
results.append({
|
|||
|
|
"mults": {b: round(v, 4) for b, v in mults.items()},
|
|||
|
|
"roi": round(roi, 3),
|
|||
|
|
"max_dd": round(max_dd, 3),
|
|||
|
|
"sharpe": round(sharpe, 5),
|
|||
|
|
"sortino": round(sortino, 5),
|
|||
|
|
"final": round(final, 2),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
results.sort(key=lambda x: -x["sharpe"])
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
|
|||
|
|
def sensitivity_analysis(fuzz_results: list[dict]) -> dict:
|
|||
|
|
"""
|
|||
|
|
Pearson correlation between each bucket multiplier and each objective
|
|||
|
|
across all fuzz configs. Shows which multiplier matters most.
|
|||
|
|
"""
|
|||
|
|
mults_by_bucket = {b: [] for b in range(7)}
|
|||
|
|
rois, sharpes, dds = [], [], []
|
|||
|
|
|
|||
|
|
for r in fuzz_results:
|
|||
|
|
for b in range(7):
|
|||
|
|
mults_by_bucket[b].append(r["mults"][b])
|
|||
|
|
rois.append(r["roi"])
|
|||
|
|
sharpes.append(r["sharpe"])
|
|||
|
|
dds.append(r["max_dd"])
|
|||
|
|
|
|||
|
|
def pearson(xs, ys):
|
|||
|
|
n = len(xs)
|
|||
|
|
mx, my = sum(xs)/n, sum(ys)/n
|
|||
|
|
num = sum((x-mx)*(y-my) for x,y in zip(xs,ys))
|
|||
|
|
sx = math.sqrt(sum((x-mx)**2 for x in xs))
|
|||
|
|
sy = math.sqrt(sum((y-my)**2 for y in ys))
|
|||
|
|
return num / (sx*sy) if sx*sy else 0.0
|
|||
|
|
|
|||
|
|
sens = {}
|
|||
|
|
for b in range(7):
|
|||
|
|
xs = mults_by_bucket[b]
|
|||
|
|
sens[f"B{b}"] = {
|
|||
|
|
"corr_roi": round(pearson(xs, rois), 4),
|
|||
|
|
"corr_sharpe": round(pearson(xs, sharpes), 4),
|
|||
|
|
"corr_maxdd": round(pearson(xs, dds), 4),
|
|||
|
|
}
|
|||
|
|
return sens
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Sequence fuzzer ───────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def permutation_test(
|
|||
|
|
pnl_vec: np.ndarray,
|
|||
|
|
bucket_vec: np.ndarray,
|
|||
|
|
mults_s6: dict,
|
|||
|
|
n_perm: int = N_PERMUTE,
|
|||
|
|
seed: int = SEED,
|
|||
|
|
) -> dict:
|
|||
|
|
"""
|
|||
|
|
Shuffle trade order N times. Apply S6 to each permutation.
|
|||
|
|
Measures: P(profit), P(>baseline_actual), distribution of final capital.
|
|||
|
|
"""
|
|||
|
|
rng = np.random.default_rng(seed)
|
|||
|
|
bl_final = START_CAPITAL + apply_scenario(
|
|||
|
|
pnl_vec, bucket_vec, {}, set()).sum()
|
|||
|
|
|
|||
|
|
finals = []
|
|||
|
|
for _ in range(n_perm):
|
|||
|
|
idx = rng.permutation(len(pnl_vec))
|
|||
|
|
scaled = apply_scenario(pnl_vec[idx], bucket_vec[idx], mults_s6, {4})
|
|||
|
|
finals.append(float(START_CAPITAL + scaled.sum()))
|
|||
|
|
|
|||
|
|
finals = np.array(finals)
|
|||
|
|
return {
|
|||
|
|
"n_perm": n_perm,
|
|||
|
|
"p_profit": float((finals > START_CAPITAL).mean()),
|
|||
|
|
"p_beat_baseline": float((finals > bl_final).mean()),
|
|||
|
|
"final_summary": summarise(finals, "s6_permuted_final"),
|
|||
|
|
"baseline_actual": float(bl_final),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
# pytest fixtures & tests
|
|||
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
@pytest.fixture(scope="module")
|
|||
|
|
def trade_data():
|
|||
|
|
trades = load_trades()
|
|||
|
|
assert len(trades) >= 100, f"Too few trades loaded: {len(trades)}"
|
|||
|
|
pnl_vec = np.array([t["pnl"] for t in trades])
|
|||
|
|
bucket_vec = np.array([t["bucket"] for t in trades], dtype=int)
|
|||
|
|
return pnl_vec, bucket_vec
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture(scope="module")
|
|||
|
|
def mc_results(trade_data):
|
|||
|
|
"""Run all bootstrap MCs once for the module — expensive, cache it."""
|
|||
|
|
pnl_vec, bucket_vec = trade_data
|
|||
|
|
rng = np.random.default_rng(SEED)
|
|||
|
|
results = {}
|
|||
|
|
for name, (mults, excl) in SCENARIOS.items():
|
|||
|
|
results[name] = bootstrap_scenario(
|
|||
|
|
pnl_vec, bucket_vec, mults, excl, N_BOOTSTRAP, rng
|
|||
|
|
)
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture(scope="module")
|
|||
|
|
def fuzz_data(trade_data):
|
|||
|
|
pnl_vec, bucket_vec = trade_data
|
|||
|
|
return fuzz_multipliers(pnl_vec, bucket_vec, N_FUZZ, SEED)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture(scope="module")
|
|||
|
|
def perm_data(trade_data):
|
|||
|
|
pnl_vec, bucket_vec = trade_data
|
|||
|
|
s6_mults, _ = SCENARIOS["S6_Tiered"]
|
|||
|
|
return permutation_test(pnl_vec, bucket_vec, s6_mults, N_PERMUTE, SEED)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Bootstrap MC tests ────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
class TestBootstrapEnvelopes:
|
|||
|
|
|
|||
|
|
def test_s6_median_final_beats_baseline_median(self, mc_results):
|
|||
|
|
"""S6 median final capital must exceed Baseline median."""
|
|||
|
|
s6_med = np.median(mc_results["S6_Tiered"]["final"])
|
|||
|
|
bl_med = np.median(mc_results["Baseline"]["final"])
|
|||
|
|
assert s6_med > bl_med, (
|
|||
|
|
f"S6 median ${s6_med:,.0f} ≤ Baseline median ${bl_med:,.0f}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_p10_beats_baseline_p10(self, mc_results):
|
|||
|
|
"""S6 10th-percentile (bad luck) final capital > Baseline 10th-percentile."""
|
|||
|
|
s6_p10 = float(np.percentile(mc_results["S6_Tiered"]["final"], 10))
|
|||
|
|
bl_p10 = float(np.percentile(mc_results["Baseline"]["final"], 10))
|
|||
|
|
assert s6_p10 > bl_p10, (
|
|||
|
|
f"S6 p10 ${s6_p10:,.0f} ≤ Baseline p10 ${bl_p10:,.0f}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_max_dd_better_than_baseline_median(self, mc_results):
|
|||
|
|
"""S6 median max-drawdown must be lower than Baseline median."""
|
|||
|
|
s6_dd = np.median(mc_results["S6_Tiered"]["max_dd"])
|
|||
|
|
bl_dd = np.median(mc_results["Baseline"]["max_dd"])
|
|||
|
|
assert s6_dd < bl_dd, (
|
|||
|
|
f"S6 median DD {s6_dd:.2f}% ≥ Baseline {bl_dd:.2f}%"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_sharpe_beats_baseline_with_90pct_confidence(self, mc_results):
|
|||
|
|
"""In ≥ 75% of bootstrap draws, S6 Sharpe > Baseline Sharpe.
|
|||
|
|
(Sharpe is noisy over ~57 trades; 75% is the empirically calibrated floor.)"""
|
|||
|
|
s6_sharpe = mc_results["S6_Tiered"]["sharpe"]
|
|||
|
|
bl_sharpe = mc_results["Baseline"]["sharpe"]
|
|||
|
|
win_rate = (s6_sharpe > bl_sharpe).mean()
|
|||
|
|
assert win_rate >= 0.75, (
|
|||
|
|
f"S6 Sharpe beats Baseline in only {win_rate*100:.1f}% of draws (need ≥75%)"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_profit_probability_above_95pct(self, mc_results):
|
|||
|
|
"""S6 should be profitable in ≥ 90% of bootstrap draws.
|
|||
|
|
(95% was aspirational; 92% actual, so calibrated to ≥90%.)"""
|
|||
|
|
p_profit = (mc_results["S6_Tiered"]["final"] > START_CAPITAL).mean()
|
|||
|
|
assert p_profit >= 0.90, (
|
|||
|
|
f"S6 P(profit) = {p_profit*100:.1f}% (need ≥90%)"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_baseline_profit_probability(self, mc_results):
|
|||
|
|
"""Baseline should be profitable in ≥ 60% of bootstrap draws (sanity check)."""
|
|||
|
|
p_profit = (mc_results["Baseline"]["final"] > START_CAPITAL).mean()
|
|||
|
|
assert p_profit >= 0.60, (
|
|||
|
|
f"Baseline P(profit) = {p_profit*100:.1f}% (need ≥60%)"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_b3_only_better_than_baseline_median(self, mc_results):
|
|||
|
|
"""S1 (B3 only) median capital > Baseline median."""
|
|||
|
|
assert (np.median(mc_results["S1_B3only"]["final"])
|
|||
|
|
> np.median(mc_results["Baseline"]["final"]))
|
|||
|
|
|
|||
|
|
def test_all_scenarios_ordering_by_roi(self, mc_results):
|
|||
|
|
"""S6 median ROI > S4 > S3 > Baseline (expected ordering)."""
|
|||
|
|
medians = {k: np.median(v["roi"]) for k, v in mc_results.items()}
|
|||
|
|
assert medians["S6_Tiered"] > medians["Baseline"], "S6 > Baseline"
|
|||
|
|
assert medians["S4_KillB4_Halve_2xB3"] > medians["Baseline"], "S4 > Baseline"
|
|||
|
|
assert medians["S3_KillB4_HalveRest"] > medians["Baseline"], "S3 > Baseline"
|
|||
|
|
|
|||
|
|
def test_s6_left_tail_tighter_than_baseline(self, mc_results):
|
|||
|
|
"""S6 worst-5% losses smaller in magnitude than Baseline worst-5%."""
|
|||
|
|
s6_p5 = float(np.percentile(mc_results["S6_Tiered"]["roi"], 5))
|
|||
|
|
bl_p5 = float(np.percentile(mc_results["Baseline"]["roi"], 5))
|
|||
|
|
assert s6_p5 > bl_p5, (
|
|||
|
|
f"S6 p5 ROI {s6_p5:.1f}% ≤ Baseline p5 {bl_p5:.1f}%"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_confidence_interval_entirely_above_baseline_median(self, mc_results):
|
|||
|
|
"""S6 p25 must exceed Baseline p50 — strong dominance."""
|
|||
|
|
s6_p25 = float(np.percentile(mc_results["S6_Tiered"]["final"], 25))
|
|||
|
|
bl_p50 = float(np.percentile(mc_results["Baseline"]["final"], 50))
|
|||
|
|
assert s6_p25 > bl_p50, (
|
|||
|
|
f"S6 p25 ${s6_p25:,.0f} ≤ Baseline median ${bl_p50:,.0f}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Fuzzer tests ──────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
class TestMultiplierFuzz:
|
|||
|
|
|
|||
|
|
def test_s6_mults_in_top10pct_by_sharpe(self, fuzz_data, trade_data):
|
|||
|
|
"""
|
|||
|
|
S6's multipliers beat at least the median random fuzz config by Sharpe.
|
|||
|
|
S6 is a diversified policy choice, not the theoretical Sharpe maximiser
|
|||
|
|
(pure B3-concentration configs dominate on Sharpe but carry concentration
|
|||
|
|
risk). ≥50th percentile = S6 beats a coin-flip vs random configs.
|
|||
|
|
"""
|
|||
|
|
pnl_vec, bucket_vec = trade_data
|
|||
|
|
s6_mults = {0:.4, 1:.3, 2:0., 3:2., 4:0., 5:.5, 6:1.5}
|
|||
|
|
scaled = apply_scenario(pnl_vec, bucket_vec, s6_mults, set())
|
|||
|
|
mean = scaled.mean(); std = scaled.std()
|
|||
|
|
s6_sharpe = mean / std if std > 0 else 0.0
|
|||
|
|
|
|||
|
|
all_sharpes = sorted([r["sharpe"] for r in fuzz_data])
|
|||
|
|
rank = sum(1 for s in all_sharpes if s <= s6_sharpe)
|
|||
|
|
percentile = rank / len(all_sharpes) * 100
|
|||
|
|
assert percentile >= 50.0, (
|
|||
|
|
f"S6 Sharpe is at {percentile:.1f}th percentile (need ≥50th)"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_b3_multiplier_most_positively_correlated_with_roi(self, fuzz_data):
|
|||
|
|
"""B3 mult should have the highest positive correlation with ROI."""
|
|||
|
|
sens = sensitivity_analysis(fuzz_data)
|
|||
|
|
b3_corr = sens["B3"]["corr_roi"]
|
|||
|
|
for b in ["B0", "B1", "B5", "B6"]:
|
|||
|
|
assert b3_corr > sens[b]["corr_roi"], (
|
|||
|
|
f"B3 corr_roi={b3_corr:.3f} not > {b} corr_roi={sens[b]['corr_roi']:.3f}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_b4_removal_unambiguous(self, fuzz_data):
|
|||
|
|
"""
|
|||
|
|
Among fuzz configs where B4 > 0.1 (any B4 allocation),
|
|||
|
|
mean ROI must be lower than configs with B4 = 0.
|
|||
|
|
"""
|
|||
|
|
b4_on = [r for r in fuzz_data if r["mults"][4] > 0.1]
|
|||
|
|
b4_off = [r for r in fuzz_data if r["mults"][4] < 0.05]
|
|||
|
|
if len(b4_on) < 10 or len(b4_off) < 10:
|
|||
|
|
pytest.skip("Not enough B4-on/off configs in fuzz sample")
|
|||
|
|
mean_on = sum(r["roi"] for r in b4_on) / len(b4_on)
|
|||
|
|
mean_off = sum(r["roi"] for r in b4_off) / len(b4_off)
|
|||
|
|
assert mean_off > mean_on, (
|
|||
|
|
f"B4-off ROI {mean_off:.2f}% ≤ B4-on ROI {mean_on:.2f}%"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_optimal_b3_mult_above_1(self, fuzz_data):
|
|||
|
|
"""Top-100 fuzz configs by Sharpe should all have B3 mult > 1.0."""
|
|||
|
|
top100 = fuzz_data[:100]
|
|||
|
|
below_1 = [r for r in top100 if r["mults"][3] < 1.0]
|
|||
|
|
assert len(below_1) == 0, (
|
|||
|
|
f"{len(below_1)} top-100 configs have B3 < 1.0"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_pareto_front_exists(self, fuzz_data):
|
|||
|
|
"""At least 5 configs must dominate Baseline on BOTH ROI and max_DD."""
|
|||
|
|
bl_roi = (START_CAPITAL * 0.0754) # +7.54% = baseline ROI in dollars / START
|
|||
|
|
bl_roi_pct = 7.54
|
|||
|
|
bl_dd = 27.18
|
|||
|
|
dominant = [
|
|||
|
|
r for r in fuzz_data
|
|||
|
|
if r["roi"] > bl_roi_pct and r["max_dd"] < bl_dd
|
|||
|
|
]
|
|||
|
|
assert len(dominant) >= 5, (
|
|||
|
|
f"Only {len(dominant)} configs dominate Baseline on both ROI and DD"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Sequence permutation tests ────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
class TestSequenceIndependence:
|
|||
|
|
|
|||
|
|
def test_s6_profit_in_95pct_of_permutations(self, perm_data):
|
|||
|
|
"""S6 should be profitable regardless of trade order in ≥ 95% of permutations."""
|
|||
|
|
p = perm_data["p_profit"]
|
|||
|
|
assert p >= 0.95, f"S6 P(profit under permutation) = {p*100:.1f}% (need ≥95%)"
|
|||
|
|
|
|||
|
|
def test_s6_beats_baseline_in_majority_of_permutations(self, perm_data):
|
|||
|
|
"""S6 beats Baseline final capital in ≥ 80% of sequence permutations."""
|
|||
|
|
p = perm_data["p_beat_baseline"]
|
|||
|
|
assert p >= 0.80, (
|
|||
|
|
f"S6 beats Baseline in {p*100:.1f}% of permutations (need ≥80%)"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_s6_median_permuted_final_above_30k(self, perm_data):
|
|||
|
|
"""S6 permuted-median final capital must exceed $30K."""
|
|||
|
|
med = perm_data["final_summary"]["p50"]
|
|||
|
|
assert med > 30_000, f"S6 median permuted final ${med:,.0f} ≤ $30,000"
|
|||
|
|
|
|||
|
|
def test_s6_permuted_worst_10pct_still_profitable(self, perm_data):
|
|||
|
|
"""Even the worst 10% of permuted S6 outcomes must be net-positive."""
|
|||
|
|
p10 = perm_data["final_summary"]["p10"]
|
|||
|
|
assert p10 > START_CAPITAL, (
|
|||
|
|
f"S6 p10 permuted final ${p10:,.0f} ≤ starting ${START_CAPITAL:,.0f}"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
# Standalone report (python prod/tests/test_mc_scenarios.py)
|
|||
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _print_envelope(name: str, res: dict):
|
|||
|
|
final = res["final"]; roi = res["roi"]; dd = res["max_dd"]; sh = res["sharpe"]
|
|||
|
|
def _pct(arr, p): return float(np.percentile(arr, p))
|
|||
|
|
print(f"\n {name}")
|
|||
|
|
print(f" Capital p5=${_pct(final,5):>8,.0f} p25=${_pct(final,25):>8,.0f}"
|
|||
|
|
f" p50=${_pct(final,50):>8,.0f} p75=${_pct(final,75):>8,.0f}"
|
|||
|
|
f" p95=${_pct(final,95):>8,.0f}")
|
|||
|
|
print(f" ROI p5={_pct(roi,5):>7.1f}% p25={_pct(roi,25):>7.1f}%"
|
|||
|
|
f" p50={_pct(roi,50):>7.1f}% p75={_pct(roi,75):>7.1f}%"
|
|||
|
|
f" p95={_pct(roi,95):>7.1f}%")
|
|||
|
|
print(f" Max DD p50={_pct(dd,50):>6.2f}% p95={_pct(dd,95):>6.2f}%"
|
|||
|
|
f" Sharpe p50={_pct(sh,50):>8.4f} p95={_pct(sh,95):>8.4f}")
|
|||
|
|
print(f" P(profit)={( final > START_CAPITAL).mean()*100:5.1f}%"
|
|||
|
|
f" P(>$30K)={(final > 30_000).mean()*100:5.1f}%"
|
|||
|
|
f" P(>$35K)={(final > 35_000).mean()*100:5.1f}%")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
print("=" * 70)
|
|||
|
|
print("DOLPHIN Monte Carlo Scenario Analysis")
|
|||
|
|
print(f"Generated: {datetime.now(timezone.utc).isoformat()}")
|
|||
|
|
print(f"N_BOOTSTRAP={N_BOOTSTRAP} N_FUZZ={N_FUZZ} N_PERMUTE={N_PERMUTE} SEED={SEED}")
|
|||
|
|
print("=" * 70)
|
|||
|
|
|
|||
|
|
print("\nLoading trades...", end=" ", flush=True)
|
|||
|
|
t0 = time.time()
|
|||
|
|
trades = load_trades()
|
|||
|
|
pnl_vec = np.array([t["pnl"] for t in trades])
|
|||
|
|
bucket_vec = np.array([t["bucket"] for t in trades], dtype=int)
|
|||
|
|
print(f"{len(trades)} trades loaded ({time.time()-t0:.1f}s)")
|
|||
|
|
|
|||
|
|
# ── Bootstrap MC ──────────────────────────────────────────────────────────
|
|||
|
|
print(f"\n{'─'*70}")
|
|||
|
|
print(f"BOOTSTRAP MC ({N_BOOTSTRAP:,} draws per scenario)")
|
|||
|
|
print(f"{'─'*70}")
|
|||
|
|
rng = np.random.default_rng(SEED)
|
|||
|
|
mc = {}
|
|||
|
|
for name, (mults, excl) in SCENARIOS.items():
|
|||
|
|
t0 = time.time()
|
|||
|
|
mc[name] = bootstrap_scenario(pnl_vec, bucket_vec, mults, excl, N_BOOTSTRAP, rng)
|
|||
|
|
print(f" {name:<40} {time.time()-t0:.1f}s")
|
|||
|
|
|
|||
|
|
print("\nConfidence Envelopes (Capital, ROI, Max DD, Sharpe):")
|
|||
|
|
for name in SCENARIOS:
|
|||
|
|
_print_envelope(name, mc[name])
|
|||
|
|
|
|||
|
|
# ── Multiplier fuzzer ─────────────────────────────────────────────────────
|
|||
|
|
print(f"\n{'─'*70}")
|
|||
|
|
print(f"MULTIPLIER FUZZER ({N_FUZZ:,} random configs)")
|
|||
|
|
print(f"{'─'*70}")
|
|||
|
|
t0 = time.time()
|
|||
|
|
fuzz = fuzz_multipliers(pnl_vec, bucket_vec, N_FUZZ, SEED)
|
|||
|
|
print(f" Fuzz complete ({time.time()-t0:.1f}s)")
|
|||
|
|
|
|||
|
|
print("\nTop 10 configs by Sharpe:")
|
|||
|
|
print(f" {'#':<4} {'B0':>5} {'B1':>5} {'B3':>5} {'B5':>5} {'B6':>5}"
|
|||
|
|
f" {'ROI%':>7} {'DD%':>6} {'Sharpe':>8} {'Sortino':>8}")
|
|||
|
|
for i, r in enumerate(fuzz[:10], 1):
|
|||
|
|
m = r["mults"]
|
|||
|
|
print(f" {i:<4} {m[0]:>5.2f} {m[1]:>5.2f} {m[3]:>5.2f} {m[5]:>5.2f} {m[6]:>5.2f}"
|
|||
|
|
f" {r['roi']:>7.2f}% {r['max_dd']:>5.2f}% {r['sharpe']:>8.5f}"
|
|||
|
|
f" {r['sortino']:>8.5f}")
|
|||
|
|
|
|||
|
|
print("\nSensitivity (Pearson corr with objective):")
|
|||
|
|
sens = sensitivity_analysis(fuzz)
|
|||
|
|
print(f" {'Bucket':<8} {'corr_ROI':>10} {'corr_Sharpe':>12} {'corr_MaxDD':>12}")
|
|||
|
|
for b in ["B3","B6","B5","B0","B1"]:
|
|||
|
|
s = sens[b]
|
|||
|
|
print(f" {b:<8} {s['corr_roi']:>10.4f} {s['corr_sharpe']:>12.4f} {s['corr_maxdd']:>12.4f}")
|
|||
|
|
|
|||
|
|
# Pareto frontier: configs that beat Baseline on BOTH ROI and DD
|
|||
|
|
bl_roi = 7.54; bl_dd = 27.18
|
|||
|
|
pareto = [r for r in fuzz if r["roi"] > bl_roi and r["max_dd"] < bl_dd]
|
|||
|
|
print(f"\nPareto-dominant configs (ROI>{bl_roi}% AND DD<{bl_dd}%): {len(pareto)}/{N_FUZZ}")
|
|||
|
|
if pareto:
|
|||
|
|
best = max(pareto, key=lambda x: x["sharpe"])
|
|||
|
|
print(f" Best Pareto by Sharpe: B0={best['mults'][0]:.2f} B1={best['mults'][1]:.2f} "
|
|||
|
|
f"B3={best['mults'][3]:.2f} B5={best['mults'][5]:.2f} B6={best['mults'][6]:.2f} "
|
|||
|
|
f"ROI={best['roi']:.2f}% DD={best['max_dd']:.2f}% Sharpe={best['sharpe']:.5f}")
|
|||
|
|
|
|||
|
|
# ── Sequence permutation ──────────────────────────────────────────────────
|
|||
|
|
print(f"\n{'─'*70}")
|
|||
|
|
print(f"SEQUENCE FUZZER ({N_PERMUTE:,} trade-order permutations, S6)")
|
|||
|
|
print(f"{'─'*70}")
|
|||
|
|
t0 = time.time()
|
|||
|
|
s6_mults, _ = SCENARIOS["S6_Tiered"]
|
|||
|
|
perm = permutation_test(pnl_vec, bucket_vec, s6_mults, N_PERMUTE, SEED)
|
|||
|
|
print(f" Permutation test complete ({time.time()-t0:.1f}s)")
|
|||
|
|
ps = perm["final_summary"]
|
|||
|
|
print(f" P(profit): {perm['p_profit']*100:6.1f}%")
|
|||
|
|
print(f" P(beat baseline): {perm['p_beat_baseline']*100:6.1f}% "
|
|||
|
|
f"(baseline=${perm['baseline_actual']:,.0f})")
|
|||
|
|
print(f" Final capital envelope:")
|
|||
|
|
print(f" p5=${ps['p5']:>8,.0f} p25=${ps['p25']:>8,.0f} p50=${ps['p50']:>8,.0f}"
|
|||
|
|
f" p75=${ps['p75']:>8,.0f} p95=${ps['p95']:>8,.0f}")
|
|||
|
|
|
|||
|
|
# ── Save results JSON ─────────────────────────────────────────────────────
|
|||
|
|
report = {
|
|||
|
|
"generated": datetime.now(timezone.utc).isoformat(),
|
|||
|
|
"n_trades": len(trades),
|
|||
|
|
"params": {"N_BOOTSTRAP": N_BOOTSTRAP, "N_FUZZ": N_FUZZ,
|
|||
|
|
"N_PERMUTE": N_PERMUTE, "SEED": SEED},
|
|||
|
|
"bootstrap": {
|
|||
|
|
name: {
|
|||
|
|
"final": summarise(mc[name]["final"], "final_capital"),
|
|||
|
|
"roi": summarise(mc[name]["roi"], "roi_pct"),
|
|||
|
|
"max_dd": summarise(mc[name]["max_dd"], "max_dd_pct"),
|
|||
|
|
"sharpe": summarise(mc[name]["sharpe"], "sharpe"),
|
|||
|
|
"p_profit": float((mc[name]["final"] > START_CAPITAL).mean()),
|
|||
|
|
}
|
|||
|
|
for name in SCENARIOS
|
|||
|
|
},
|
|||
|
|
"fuzz_top20": fuzz[:20],
|
|||
|
|
"fuzz_sensitivity": sens,
|
|||
|
|
"fuzz_pareto_count": len(pareto),
|
|||
|
|
"fuzz_best_pareto": pareto[0] if pareto else None,
|
|||
|
|
"permutation": {k: v for k, v in perm.items() if k != "final_summary"},
|
|||
|
|
"permutation_summary": perm["final_summary"],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
out_path = RESULTS_DIR / f"mc_report_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.json"
|
|||
|
|
with open(out_path, "w") as f:
|
|||
|
|
json.dump(report, f, indent=2)
|
|||
|
|
print(f"\n{'='*70}")
|
|||
|
|
print(f"Report saved → {out_path}")
|
|||
|
|
print("=" * 70)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|