initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

File diff suppressed because it is too large Load Diff

32
prod/ops/SESSION_INFO.txt Executable file
View File

@@ -0,0 +1,32 @@
=== KIMI SESSION BACKUP ===
Session ID: c23a69c5-ba4a-41c4-8624-05114e8fd9ea
Description: 4 Services + Prefect Architecture (Pre-Reboot)
Status: Stuck system, kernel deadlock, required reboot
=== FILES ===
- KIMI_Session_Rearch_Services-Prefect.md (684K) - Full human-readable transcript
- kimi_session_backup/ - Raw session files (wire.jsonl, context.jsonl)
=== LOCATIONS ===
1. /mnt/dolphinng5_predict/prod/ops/KIMI_Session_Rearch_Services-Prefect.md
2. /mnt/vids/KIMI_Session_Rearch_Services-Prefect.md (mirror)
=== RESUME SESSION ===
Option 1: Use helper script
./resume_session.sh
Option 2: Manual
cd /mnt/dolphinng5_predict/prod/ops
kimi --session c23a69c5-ba4a-41c4-8624-05114e8fd9ea
Option 3: Auto-continue (uses last_session_id from kimi.json)
cd /mnt/dolphinng5_predict/prod/ops
kimi --continue
=== SESSION STATS ===
- Total timeouts: 905
- "check" commands: 1244
- "again" attempts: 52
- Final state: Kernel deadlock, forced reboot

20
prod/ops/go_trade.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
# ==============================================================================
# DOLPHIN NATIVE EXECUTION CERTIFICATION LAUNCHER
# Executing 100% compliant Nautilus Native Engine logic.
# ==============================================================================
echo "================================================================"
echo " Launching DOLPHIN NATIVE 56-Day Backtest Certification "
echo "================================================================"
echo "[INFO] Running with full 48-asset native Event Injection"
echo "[INFO] Simulated Events: ~22,000,000 ticks"
echo "[INFO] Expected Runtime: ~20 Minutes (Native Rust Event Loop)"
# Switch to Siloqy environment Python and execute the native harness
cd ../..
"C:/Users/Lenovo/Documents/- Siloqy/Scripts/python.exe" prod/nautilus_native_backtest.py
echo "================================================================"
echo " CERTIFICATION COMPLETE "
echo "================================================================"

View File

@@ -0,0 +1,22 @@
@echo off
REM ==============================================================================
REM DOLPHIN CONTINUOUS NATIVE BACKTEST LAUNCHER
REM Requires 32GB+ RAM. Runs single continuous BacktestEngine state over 56 Days.
REM ==============================================================================
echo ================================================================
echo Launching CONTINUOUS Native 56-Day Backtest
echo ================================================================
echo [INFO] Running with full 48-asset native Event Injection
echo [INFO] Simulated Events: ~22,000,000 ticks in ONE continuous batch!
echo [INFO] Memory footprint: ~15GB expected
echo [INFO] This solves the -42%% state loss by preventing Engine re-initialization
REM Execute the native harness directly from the parent dir
cd ..\..
"C:\Users\Lenovo\Documents\- Siloqy\Scripts\python.exe" prod\nautilus_native_continuous.py
echo ================================================================
echo CONTINUOUS CERTIFICATION COMPLETE
echo ================================================================
pause

52
prod/ops/go_trade_continuous.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/bin/bash
# ==============================================================================
# DOLPHIN CONTINUOUS NATIVE BACKTEST LAUNCHER
# Cross-platform: detects Windows (Git Bash / WSL) vs Linux automatically.
# Requires: siloqy-env Python, 32GB+ RAM.
# Runs single continuous BacktestEngine state over 56 Days.
# ==============================================================================
echo "================================================================"
echo " Launching CONTINUOUS Native 56-Day Backtest "
echo "================================================================"
echo "[INFO] Running with full 48-asset native Event Injection"
echo "[INFO] Simulated Events: ~22,000,000 ticks in ONE continuous batch!"
echo "[INFO] Memory footprint: ~15GB expected"
echo "[INFO] vol threshold : 0.00026414 (gold standard)"
echo "[INFO] min_irp_align : 0.0 (gold standard)"
# Resolve project root (two levels up from prod/ops/)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
echo "[INFO] Project root: ${PROJECT_ROOT}"
# Detect Python interpreter
# Priority: DOLPHIN_PYTHON env var > siloqy venv > system python3
if [ -n "${DOLPHIN_PYTHON}" ]; then
PYTHON="${DOLPHIN_PYTHON}"
elif [ -f "${PROJECT_ROOT}/../siloqy-env/bin/python" ]; then
PYTHON="${PROJECT_ROOT}/../siloqy-env/bin/python"
elif [ -f "/mnt/dolphinng5_predict/../siloqy-env/bin/python" ]; then
PYTHON="/mnt/dolphinng5_predict/../siloqy-env/bin/python"
elif command -v python3 &>/dev/null; then
PYTHON="python3"
else
echo "[ERROR] No Python interpreter found. Set DOLPHIN_PYTHON env var."
exit 1
fi
echo "[INFO] Python: ${PYTHON}"
"${PYTHON}" --version 2>&1
cd "${PROJECT_ROOT}"
"${PYTHON}" prod/nautilus_native_continuous.py
EXIT_CODE=$?
echo "================================================================"
if [ ${EXIT_CODE} -eq 0 ]; then
echo " CONTINUOUS CERTIFICATION COMPLETE (OK) "
else
echo " CONTINUOUS CERTIFICATION FAILED (exit ${EXIT_CODE})"
fi
echo "================================================================"
exit ${EXIT_CODE}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env python3
"""Wrapper for launch_paper_portfolio.py - now located in prod/ directory."""
import os
import sys
from pathlib import Path
# Run the main file with proper __file__ context
main_file = Path(__file__).parent.parent / "launch_paper_portfolio.py"
os.chdir(main_file.parent)
sys.argv[0] = str(main_file)
exec(open(main_file).read(), {'__file__': str(main_file), '__name__': '__main__'})

7
prod/ops/resume_session.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
# Resume the 4 Services + Prefect Architecture Session
echo "Resuming session: c23a69c5-ba4a-41c4-8624-05114e8fd9ea"
echo "Description: 4 Services + Prefect Architecture (Pre-Reboot)"
echo ""
cd /mnt/dolphinng5_predict/prod/ops
kimi --session c23a69c5-ba4a-41c4-8624-05114e8fd9ea

257
prod/ops/supervisord_restart.py Executable file
View File

@@ -0,0 +1,257 @@
#!/usr/bin/env python3
"""
Dolphin Supervisord Full Restart
=================================
Performs a clean, full supervisord restart — the only way to fix broken
stdout log pipes after individual process restarts.
Usage:
python3 prod/ops/supervisord_restart.py [--stop-only] [--start-only]
What it does:
1. Snapshot current HZ state (capital, posture) for safety
2. Gracefully stop all supervised programs (SIGTERM → wait)
3. Shutdown supervisord itself
4. Wait for PID file to disappear (confirms clean exit)
5. Relaunch supervisord as daemon
6. Wait for all expected programs to reach RUNNING
7. Verify HZ state is intact (capital preserved)
8. Print final status report
Safety:
- Never touches HZ data, CH data, or trade logs
- Verifies capital checkpoint survives restart
- Aborts if supervisord doesn't come up within timeout
"""
import sys
import os
import time
import subprocess
import json
import argparse
from pathlib import Path
# ── Config ────────────────────────────────────────────────────────────────────
SUPERVISORD_BIN = "/usr/local/bin/supervisord"
SUPERVISORCTL = "/usr/local/bin/supervisorctl" # or same as above via args
CONFIG = "/mnt/dolphinng5_predict/prod/supervisor/dolphin-supervisord.conf"
PIDFILE = "/mnt/dolphinng5_predict/prod/supervisor/run/supervisord.pid"
LOGDIR = Path("/mnt/dolphinng5_predict/prod/supervisor/logs")
# Programs that must reach RUNNING state before we declare success
EXPECTED_RUNNING = [
"dolphin:nautilus_trader",
"dolphin:scan_bridge",
"dolphin_data:acb_processor",
"dolphin_data:exf_fetcher",
"dolphin_data:meta_health",
"dolphin_data:obf_universe",
"dolphin_data:system_stats",
]
STOP_TIMEOUT_S = 30 # max seconds to wait for clean stop
START_TIMEOUT_S = 60 # max seconds to wait for all programs RUNNING
# ── Helpers ───────────────────────────────────────────────────────────────────
def log(msg: str):
ts = time.strftime("%H:%M:%S")
print(f"[{ts}] {msg}", flush=True)
def ctl(*args) -> tuple[int, str, str]:
"""Run supervisorctl with our config. Returns (rc, stdout, stderr)."""
cmd = [SUPERVISORCTL, "-c", CONFIG] + list(args)
r = subprocess.run(cmd, capture_output=True, text=True)
return r.returncode, r.stdout.strip(), r.stderr.strip()
def supervisord_pid() -> int | None:
try:
pid = int(Path(PIDFILE).read_text().strip())
os.kill(pid, 0) # check alive
return pid
except (FileNotFoundError, ValueError, ProcessLookupError, PermissionError):
return None
def parse_status(output: str) -> dict[str, str]:
"""Parse supervisorctl status output → {name: state}."""
states = {}
for line in output.splitlines():
parts = line.split()
if len(parts) >= 2:
states[parts[0]] = parts[1]
return states
def hz_capital() -> float | None:
"""Read capital_checkpoint from HZ. Returns None on any failure."""
try:
sys.path.insert(0, "/mnt/dolphinng5_predict")
import hazelcast
hz = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["localhost:5701"],
connection_timeout=3.0,
)
raw = hz.get_map("DOLPHIN_STATE_BLUE").blocking().get("capital_checkpoint")
hz.shutdown()
return json.loads(raw)["capital"] if raw else None
except Exception as e:
return None
# ── Main phases ───────────────────────────────────────────────────────────────
def phase_snapshot():
log("=== Phase 0: Pre-restart HZ snapshot ===")
cap = hz_capital()
if cap is not None:
log(f" Capital checkpoint: ${cap:,.2f}")
else:
log(" WARNING: Could not read HZ capital (will verify post-start)")
return cap
def phase_stop():
log("=== Phase 1: Stopping all programs ===")
pid = supervisord_pid()
if pid is None:
log(" Supervisord not running — nothing to stop")
return
# Stop all supervised programs gracefully
log(f" supervisorctl stop all (supervisord PID={pid})")
rc, out, err = ctl("stop", "all")
log(f" {out or err or 'ok'}")
# Wait for all to stop
deadline = time.time() + STOP_TIMEOUT_S
while time.time() < deadline:
rc, out, _ = ctl("status")
states = parse_status(out)
running = [n for n, s in states.items() if s == "RUNNING"]
if not running:
break
log(f" Still running: {running}")
time.sleep(2)
log("=== Phase 2: Shutting down supervisord ===")
rc, out, err = ctl("shutdown")
log(f" {out or err or 'ok'}")
# Wait for PID to disappear
deadline = time.time() + STOP_TIMEOUT_S
while time.time() < deadline:
if supervisord_pid() is None:
break
time.sleep(1)
if supervisord_pid() is None:
log(" Supervisord stopped cleanly.")
else:
log(" WARNING: Supervisord PID still alive — may need manual kill")
def phase_start():
log("=== Phase 3: Launching supervisord ===")
if supervisord_pid() is not None:
log(f" Already running (PID={supervisord_pid()}) — skipping launch")
return
cmd = [SUPERVISORD_BIN, "-c", CONFIG]
log(f" {' '.join(cmd)}")
r = subprocess.run(cmd, capture_output=True, text=True)
if r.returncode != 0:
log(f" ERROR launching supervisord: {r.stderr}")
sys.exit(1)
# Wait for PID file
deadline = time.time() + 10
while time.time() < deadline:
if supervisord_pid() is not None:
break
time.sleep(0.5)
pid = supervisord_pid()
if pid:
log(f" Supervisord started (PID={pid})")
else:
log(" ERROR: supervisord did not start")
sys.exit(1)
def phase_wait_running():
log(f"=== Phase 4: Waiting for programs to reach RUNNING (timeout={START_TIMEOUT_S}s) ===")
deadline = time.time() + START_TIMEOUT_S
last_states = {}
while time.time() < deadline:
rc, out, _ = ctl("status")
states = parse_status(out)
not_running = [n for n in EXPECTED_RUNNING if states.get(n) != "RUNNING"]
if not not_running:
log(" All expected programs RUNNING.")
break
if states != last_states:
for name, state in sorted(states.items()):
marker = "" if state == "RUNNING" else "" if state in ("STARTING", "BACKOFF") else ""
log(f" {marker} {name:<40} {state}")
last_states = states
time.sleep(3)
else:
log(" WARNING: Timeout waiting for programs. Final state:")
rc, out, _ = ctl("status")
print(out)
def phase_verify(pre_capital: float | None):
log("=== Phase 5: Post-restart verification ===")
# Status
rc, out, _ = ctl("status")
states = parse_status(out)
all_ok = True
for name, state in sorted(states.items()):
if name in ("dolphin:clean_arch_trader", "dolphin:paper_portfolio"):
continue # expected STOPPED
marker = "" if state == "RUNNING" else ""
log(f" {marker} {name:<40} {state}")
if state not in ("RUNNING", "STOPPED"):
all_ok = False
# HZ capital
cap = hz_capital()
if cap is not None:
match = "" if (pre_capital is None or abs(cap - pre_capital) < 0.01) else "⚠ CHANGED"
log(f" Capital: ${cap:,.2f} {match}")
else:
log(" WARNING: Could not verify HZ capital post-start")
if all_ok:
log("=== Restart COMPLETE — all services nominal ===")
else:
log("=== Restart done — some services need attention (see above) ===")
# ── Entry point ───────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Dolphin supervisord full restart")
parser.add_argument("--stop-only", action="store_true", help="Only stop, don't relaunch")
parser.add_argument("--start-only", action="store_true", help="Only start, don't stop first")
args = parser.parse_args()
log("Dolphin Supervisord Restart")
log(f" Config : {CONFIG}")
log(f" PID now: {supervisord_pid()}")
pre_cap = phase_snapshot()
if not args.start_only:
phase_stop()
if not args.stop_only:
time.sleep(1) # brief pause before relaunch
phase_start()
phase_wait_running()
phase_verify(pre_cap)
if __name__ == "__main__":
main()