Files
DOLPHIN/nautilus_dolphin/dvae/run_trace_backtest.py

469 lines
18 KiB
Python
Raw Normal View History

"""
run_trace_backtest.py D_LIQ_GOLD full 56-day backtest with painstaking per-tick
and per-trade state logging. Streams to CSV; never accumulates more than one day
of ticks in RAM (~1.2 MB). No PyTorch, no dvae/__init__, no ensemble warmup.
Outputs (dvae/trace/):
tick_trace.csv one row per valid bar (346k+ rows)
trade_trace.csv one row per closed trade (~2155 rows)
daily_trace.csv one row per day (56 rows)
summary.json ROI / DD / T / Calmar at end
Tick CSV columns:
date, bar_ri, global_bar, btc_price, vel_div, vol_ok,
proxy_b, base_max_lev, sizer_max_lev, regime_size_mult,
capital, pos_open, pos_dir, pos_entry_price, pos_entry_bar,
pos_entry_lev, acb_boost, day_beta, mc_status, dd_halt, event
Trade CSV columns:
trade_id, asset, date_exit, direction, entry_price, exit_price,
entry_bar, exit_bar, notional, leverage, pnl_pct, pnl_abs,
exit_reason, bars_held, proxy_b_at_entry, proxy_b_at_exit,
capital_before_exit, capital_after_exit
Daily CSV columns:
date, cap_start, cap_end, pnl, n_trades, acb_boost, day_beta,
mc_status, max_dd_intraday
Run after freeing RAM. Prints progress every 5 days.
"""
import sys, os, csv, json, gc, math, time
from pathlib import Path
import numpy as np
import pandas as pd
# ------ Path setup (MUST come before any nautilus_dolphin import) ----------
_HERE = Path(__file__).resolve().parent # dvae/
_ND_ROOT = _HERE.parent # nautilus_dolphin/
sys.path.insert(0, str(_ND_ROOT))
# Add dvae/ directly so we can import exp_shared without triggering dvae/__init__
sys.path.insert(0, str(_HERE))
# ------ Imports (NO dvae package, NO torch) --------------------------------
from exp_shared import ENGINE_KWARGS, MC_BASE_CFG, VBT_DIR, MC_MODELS_DIR, META_COLS
from nautilus_dolphin.nautilus.proxy_boost_engine import (
create_d_liq_engine, LiquidationGuardEngine,
)
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
# ------ Output directory ---------------------------------------------------
TRACE_DIR = _HERE / 'trace'
TRACE_DIR.mkdir(exist_ok=True)
TICK_CSV = TRACE_DIR / 'tick_trace.csv'
TRADE_CSV = TRACE_DIR / 'trade_trace.csv'
DAILY_CSV = TRACE_DIR / 'daily_trace.csv'
SUMMARY = TRACE_DIR / 'summary.json'
TICK_HEADER = [
'date', 'bar_ri', 'global_bar', 'btc_price', 'vel_div', 'vol_ok',
'proxy_b', 'base_max_lev', 'sizer_max_lev', 'regime_size_mult',
'capital', 'pos_open', 'pos_dir', 'pos_entry_price', 'pos_entry_bar',
'pos_entry_lev', 'acb_boost', 'day_beta', 'mc_status', 'dd_halt', 'event',
]
TRADE_HEADER = [
'trade_id', 'asset', 'date_exit', 'direction', 'entry_price', 'exit_price',
'entry_bar', 'exit_bar', 'notional', 'leverage', 'pnl_pct', 'pnl_abs',
'exit_reason', 'bars_held',
'proxy_b_at_entry', 'proxy_b_at_exit',
'capital_before_exit', 'capital_after_exit',
]
DAILY_HEADER = [
'date', 'cap_start', 'cap_end', 'pnl', 'n_trades',
'acb_boost', 'day_beta', 'mc_status', 'max_dd_intraday',
]
# ===========================================================================
# TracingLiquidationEngine — thin subclass that instruments step_bar
# ===========================================================================
class TracingLiquidationEngine(LiquidationGuardEngine):
"""
Wraps LiquidationGuardEngine with per-bar state capture.
Overrides step_bar() to record tick + trade events.
All trace data streamed to CSV via external writers (no RAM accumulation).
"""
def __init__(self, tick_writer, trade_writer, **kwargs):
super().__init__(**kwargs)
self._tw = tick_writer
self._trd = trade_writer
self._cur_date = ''
self._entry_pb_cache: dict = {} # entry_bar → proxy_b at entry
self._tick_buf: list = [] # per-day buffer, flushed at end_day
self._trade_buf: list = [] # per-day buffer, flushed at end_day
self._day_cap_hi = 0.0
self._day_cap_lo = float('inf')
# ── Override begin_day to track date and intraday extremes ───────────────
def begin_day(self, date_str, posture='APEX', direction=None):
super().begin_day(date_str, posture=posture, direction=direction)
self._cur_date = date_str
self._tick_buf = []
self._trade_buf = []
self._day_cap_hi = self.capital
self._day_cap_lo = self.capital
# ── Override step_bar to capture tick state ──────────────────────────────
def step_bar(self, bar_idx, vel_div, prices, vol_regime_ok=True,
price_histories=None, v50_vel=0.0, v750_vel=0.0):
# --- Pre-call state snapshot ---
proxy_b_now = float(self._current_proxy_b)
n_before = len(self.trade_history)
pos_before = self.position # None or NDPosition
cap_before = self.capital
# --- Actual engine work ---
result = super().step_bar(
bar_idx=bar_idx, vel_div=vel_div, prices=prices,
vol_regime_ok=vol_regime_ok, price_histories=price_histories,
v50_vel=v50_vel, v750_vel=v750_vel,
)
# --- Post-call state snapshot ---
n_after = len(self.trade_history)
pos_after = self.position
cap_after = self.capital
# Track intraday capital range
if cap_after > self._day_cap_hi: self._day_cap_hi = cap_after
if cap_after < self._day_cap_lo: self._day_cap_lo = cap_after
# Determine event
new_entry = (pos_before is None and pos_after is not None)
new_exit = (n_after > n_before)
event = 'NONE'
if new_entry and new_exit:
event = 'ENTRY+EXIT' # same-bar close+reopen (rare)
elif new_entry:
event = 'ENTRY'
elif new_exit:
event = 'EXIT'
# Cache proxy_B at entry time
if new_entry and pos_after is not None:
self._entry_pb_cache[getattr(pos_after, 'entry_bar', bar_idx)] = proxy_b_now
# Capture trade rows for completed trades
if new_exit:
for t in self.trade_history[n_before:]:
entry_pb = self._entry_pb_cache.pop(getattr(t, 'entry_bar', bar_idx), proxy_b_now)
self._trade_buf.append((
t.trade_id,
t.asset,
self._cur_date,
t.direction,
round(t.entry_price, 4),
round(t.exit_price, 4),
t.entry_bar,
t.exit_bar,
round(t.notional, 2),
round(t.leverage, 4),
round(t.pnl_pct, 6),
round(t.pnl_absolute,4),
t.exit_reason,
t.bars_held,
round(entry_pb, 6),
round(proxy_b_now, 6),
round(cap_before, 4),
round(cap_after, 4),
))
# BTC close price (primary signal asset)
btc_price = prices.get('BTCUSDT', 0.0)
# Position fields
pos_dir = pos_entry_price = pos_entry_lev = 0.0
pos_entry_bar_i = -1
pos_open = 0
if pos_after is not None:
pos_open = 1
pos_dir = int(getattr(pos_after, 'direction', 0))
pos_entry_price = round(float(getattr(pos_after, 'entry_price', 0.0)), 4)
pos_entry_bar_i = int(getattr(pos_after, 'entry_bar', -1))
pos_entry_lev = round(float(getattr(pos_after, 'leverage', 0.0)), 4)
# global_bar was incremented INSIDE super().step_bar, so -1 gives current
global_bar = self._global_bar_idx - 1
self._tick_buf.append((
self._cur_date,
bar_idx,
global_bar,
round(btc_price, 4),
round(vel_div, 6),
int(vol_regime_ok),
round(proxy_b_now, 6),
round(self.base_max_leverage, 2),
round(self.bet_sizer.max_leverage, 2),
round(getattr(self, 'regime_size_mult', 1.0), 4),
round(cap_after, 4),
pos_open,
pos_dir,
pos_entry_price,
pos_entry_bar_i,
pos_entry_lev,
round(getattr(self, '_day_base_boost', 1.0), 4),
round(getattr(self, '_day_beta', 0.0), 4),
getattr(self, '_day_mc_status', 'OK'),
int(getattr(self, 'regime_dd_halt', False)),
event,
))
return result
# ── Override end_day to flush buffers ────────────────────────────────────
def end_day(self):
result = super().end_day()
# Flush tick buffer
if self._tick_buf:
self._tw.writerows(self._tick_buf)
# Flush trade buffer
if self._trade_buf:
self._trd.writerows(self._trade_buf)
self._tick_buf = []
self._trade_buf = []
return result
# ===========================================================================
# Main
# ===========================================================================
def main():
t_start = time.time()
print("=== D_LIQ_GOLD Trace Backtest ===")
print(f" Outputs → {TRACE_DIR}")
# -- Load data (GOLD method: float64 pq_data, seg-based vol_p60) -----------
print(" Loading data (gold method)...")
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
date_strings = [p.stem for p in parquet_files]
print(f" {len(parquet_files)} parquet files")
# Gold vol_p60: 2 files, range(60), seg-based, v>0 filter
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' in df.columns:
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) < 10: continue
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0: all_vols.append(v)
del df
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0002
print(f" vol_p60 (gold) = {vol_p60:.8f}")
# Get asset list from first file
df0 = pd.read_parquet(parquet_files[0])
OB_ASSETS = sorted([c for c in df0.columns if c not in META_COLS])
del df0
print(f" OB assets: {len(OB_ASSETS)}")
mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
# -- Load MC-Forewarner (optional) ----------------------------------------
fw = None
try:
from mc.mc_ml import DolphinForewarner
fw = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" MC-Forewarner: loaded")
except Exception as e:
print(f" MC-Forewarner: unavailable ({e})")
# -- Open output CSVs (streaming) -----------------------------------------
tick_fh = open(TICK_CSV, 'w', newline='', encoding='utf-8')
trade_fh = open(TRADE_CSV, 'w', newline='', encoding='utf-8')
daily_fh = open(DAILY_CSV, 'w', newline='', encoding='utf-8')
tick_w = csv.writer(tick_fh)
trade_w = csv.writer(trade_fh)
daily_w = csv.writer(daily_fh)
tick_w.writerow(TICK_HEADER)
trade_w.writerow(TRADE_HEADER)
daily_w.writerow(DAILY_HEADER)
# -- Build engine ---------------------------------------------------------
print(" Building TracingLiquidationEngine...")
acb = AdaptiveCircuitBreaker()
acb.preload_w750(date_strings)
eng = TracingLiquidationEngine(
tick_writer=tick_w, trade_writer=trade_w,
**ENGINE_KWARGS,
)
eng.set_ob_engine(ob_eng)
eng.set_acb(acb)
if fw is not None:
eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0) # fixed function: respects _extended_soft_cap=8.0
print(f" base_max_lev={eng.base_max_leverage} abs_max_lev={eng.abs_max_leverage} "
f"sizer_max_lev={eng.bet_sizer.max_leverage}")
# -- Run 56-day backtest (lazy loading, float32, gc per day) --------------
print(" Running backtest (lazy load, float32 per day)...")
print()
daily_caps = []
running_vols = []
peak_cap = 25000.0
max_dd = 0.0
for i, pf in enumerate(parquet_files):
ds = pf.stem
# Lazy load + cast to float32 to save RAM
df = pd.read_parquet(pf)
for c in df.columns:
if df[c].dtype == 'float64':
df[c] = df[c].astype('float32')
acols = [c for c in df.columns if c not in META_COLS]
# Per-day OB preload
ob_eng.preload_date(ds, OB_ASSETS)
# Compute dvol for this day
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dvol = np.zeros(len(df), dtype=np.float32)
if bp is not None:
rets = np.diff(bp.astype('float64')) / (bp[:-1].astype('float64') + 1e-9)
for j in range(50, len(rets)):
v = np.std(rets[j-50:j])
dvol[j+1] = v
if v > 0:
running_vols.append(v)
vp60 = np.percentile(running_vols, 60) if len(running_vols) > 1000 else vol_p60
vol_ok = np.where(dvol > 0, dvol > vp60, False)
cap_before = eng.capital
n_before = len(eng.trade_history)
# process_day (which internally calls begin_day → step_bar loop → end_day)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
cap_after = eng.capital
n_after = len(eng.trade_history)
daily_caps.append(cap_after)
# Compute intraday DD (rough)
peak_cap = max(peak_cap, cap_after)
max_dd = max(max_dd, (peak_cap - cap_after) / peak_cap * 100.0)
intra_dd_pct = (eng._day_cap_hi - eng._day_cap_lo) / max(eng._day_cap_hi, 1e-9) * 100.0
# Daily row
daily_w.writerow((
ds,
round(cap_before, 4),
round(cap_after, 4),
round(cap_after - cap_before, 4),
n_after - n_before,
round(getattr(eng, '_day_base_boost', 1.0), 4),
round(getattr(eng, '_day_beta', 0.0), 4),
getattr(eng, '_day_mc_status', 'OK'),
round(intra_dd_pct, 4),
))
# Progress every 5 days
if (i + 1) % 5 == 0 or i == len(parquet_files) - 1:
roi_now = (cap_after - 25000.0) / 25000.0 * 100.0
elapsed = time.time() - t_start
print(f" Day {i+1:3d}/{len(parquet_files)} {ds} "
f"cap={cap_after:.0f} ROI={roi_now:+.2f}% T={n_after} "
f"DD={max_dd:.2f}% ({elapsed:.0f}s)")
# Clear OB cache + GC
if eng.ob_engine is not None:
for attr in ('_preloaded_placement', '_preloaded_signal',
'_preloaded_market', '_ts_to_idx'):
try: getattr(eng.ob_engine, attr).clear()
except Exception: pass
del df
gc.collect()
# -- Close CSV files -------------------------------------------------------
tick_fh.flush(); tick_fh.close()
trade_fh.flush(); trade_fh.close()
daily_fh.flush(); daily_fh.close()
# -- Final stats -----------------------------------------------------------
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
def _abs(t):
return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0 if n > 0 else 0.0
pf = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)
dr = np.array([(c - 25000.0 if i == 0 else daily_caps[i] - daily_caps[i-1]) / 25000.0 * 100.0
for i, c in enumerate(daily_caps)])
sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0
calmar = roi / max(max_dd, 1e-9)
liq_stops = getattr(eng, 'liquidation_stops', 0)
summary = dict(
roi=round(roi, 4),
dd=round(max_dd, 4),
calmar=round(calmar, 4),
pf=round(pf, 4),
wr=round(wr, 4),
sharpe=round(sharpe, 4),
trades=n,
liq_stops=liq_stops,
capital_final=round(eng.capital, 4),
elapsed_s=round(time.time() - t_start, 1),
tick_csv=str(TICK_CSV),
trade_csv=str(TRADE_CSV),
daily_csv=str(DAILY_CSV),
gold_roi=181.81,
gold_dd=17.65,
gold_trades=2155,
)
with open(SUMMARY, 'w', encoding='utf-8') as f:
json.dump(summary, f, indent=2)
print()
print("=== RESULTS ===")
print(f" ROI = {roi:+.2f}% (gold 181.81%)")
print(f" DD = {max_dd:.2f}% (gold 17.65%)")
print(f" Calmar = {calmar:.2f} (gold 10.30)")
print(f" PF = {pf:.4f} (gold ~1.55)")
print(f" WR = {wr:.2f}%")
print(f" T = {n} (gold 2155)")
print(f" liq_stops = {liq_stops}")
print(f" Time = {time.time()-t_start:.0f}s")
print()
roi_ok = abs(roi - 181.81) < 1.0
dd_ok = abs(max_dd - 17.65) < 0.5
t_ok = n == 2155
print(f" ROI match: {'✓ PASS' if roi_ok else '✗ FAIL'} (diff={roi-181.81:+.2f}pp)")
print(f" DD match: {'✓ PASS' if dd_ok else '✗ FAIL'} (diff={max_dd-17.65:+.2f}pp)")
print(f" T match: {'✓ PASS' if t_ok else '✗ FAIL'} (got {n})")
print()
print(f" Tick trace → {TICK_CSV} ({TICK_CSV.stat().st_size//1024}KB)")
print(f" Trade trace → {TRADE_CSV}")
print(f" Daily trace → {DAILY_CSV}")
print(f" Summary → {SUMMARY}")
if __name__ == '__main__':
main()