initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
19023
prod/ops/KIMI_Session_Rearch_Services-Prefect.md
Executable file
19023
prod/ops/KIMI_Session_Rearch_Services-Prefect.md
Executable file
File diff suppressed because it is too large
Load Diff
32
prod/ops/SESSION_INFO.txt
Executable file
32
prod/ops/SESSION_INFO.txt
Executable file
@@ -0,0 +1,32 @@
|
||||
=== KIMI SESSION BACKUP ===
|
||||
|
||||
Session ID: c23a69c5-ba4a-41c4-8624-05114e8fd9ea
|
||||
Description: 4 Services + Prefect Architecture (Pre-Reboot)
|
||||
Status: Stuck system, kernel deadlock, required reboot
|
||||
|
||||
=== FILES ===
|
||||
- KIMI_Session_Rearch_Services-Prefect.md (684K) - Full human-readable transcript
|
||||
- kimi_session_backup/ - Raw session files (wire.jsonl, context.jsonl)
|
||||
|
||||
=== LOCATIONS ===
|
||||
1. /mnt/dolphinng5_predict/prod/ops/KIMI_Session_Rearch_Services-Prefect.md
|
||||
2. /mnt/vids/KIMI_Session_Rearch_Services-Prefect.md (mirror)
|
||||
|
||||
=== RESUME SESSION ===
|
||||
|
||||
Option 1: Use helper script
|
||||
./resume_session.sh
|
||||
|
||||
Option 2: Manual
|
||||
cd /mnt/dolphinng5_predict/prod/ops
|
||||
kimi --session c23a69c5-ba4a-41c4-8624-05114e8fd9ea
|
||||
|
||||
Option 3: Auto-continue (uses last_session_id from kimi.json)
|
||||
cd /mnt/dolphinng5_predict/prod/ops
|
||||
kimi --continue
|
||||
|
||||
=== SESSION STATS ===
|
||||
- Total timeouts: 905
|
||||
- "check" commands: 1244
|
||||
- "again" attempts: 52
|
||||
- Final state: Kernel deadlock, forced reboot
|
||||
20
prod/ops/go_trade.sh
Executable file
20
prod/ops/go_trade.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
# ==============================================================================
|
||||
# DOLPHIN NATIVE EXECUTION CERTIFICATION LAUNCHER
|
||||
# Executing 100% compliant Nautilus Native Engine logic.
|
||||
# ==============================================================================
|
||||
|
||||
echo "================================================================"
|
||||
echo " Launching DOLPHIN NATIVE 56-Day Backtest Certification "
|
||||
echo "================================================================"
|
||||
echo "[INFO] Running with full 48-asset native Event Injection"
|
||||
echo "[INFO] Simulated Events: ~22,000,000 ticks"
|
||||
echo "[INFO] Expected Runtime: ~20 Minutes (Native Rust Event Loop)"
|
||||
|
||||
# Switch to Siloqy environment Python and execute the native harness
|
||||
cd ../..
|
||||
"C:/Users/Lenovo/Documents/- Siloqy/Scripts/python.exe" prod/nautilus_native_backtest.py
|
||||
|
||||
echo "================================================================"
|
||||
echo " CERTIFICATION COMPLETE "
|
||||
echo "================================================================"
|
||||
22
prod/ops/go_trade_continuous.bat
Executable file
22
prod/ops/go_trade_continuous.bat
Executable file
@@ -0,0 +1,22 @@
|
||||
@echo off
|
||||
REM ==============================================================================
|
||||
REM DOLPHIN CONTINUOUS NATIVE BACKTEST LAUNCHER
|
||||
REM Requires 32GB+ RAM. Runs single continuous BacktestEngine state over 56 Days.
|
||||
REM ==============================================================================
|
||||
|
||||
echo ================================================================
|
||||
echo Launching CONTINUOUS Native 56-Day Backtest
|
||||
echo ================================================================
|
||||
echo [INFO] Running with full 48-asset native Event Injection
|
||||
echo [INFO] Simulated Events: ~22,000,000 ticks in ONE continuous batch!
|
||||
echo [INFO] Memory footprint: ~15GB expected
|
||||
echo [INFO] This solves the -42%% state loss by preventing Engine re-initialization
|
||||
|
||||
REM Execute the native harness directly from the parent dir
|
||||
cd ..\..
|
||||
"C:\Users\Lenovo\Documents\- Siloqy\Scripts\python.exe" prod\nautilus_native_continuous.py
|
||||
|
||||
echo ================================================================
|
||||
echo CONTINUOUS CERTIFICATION COMPLETE
|
||||
echo ================================================================
|
||||
pause
|
||||
52
prod/ops/go_trade_continuous.sh
Executable file
52
prod/ops/go_trade_continuous.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
# ==============================================================================
|
||||
# DOLPHIN CONTINUOUS NATIVE BACKTEST LAUNCHER
|
||||
# Cross-platform: detects Windows (Git Bash / WSL) vs Linux automatically.
|
||||
# Requires: siloqy-env Python, 32GB+ RAM.
|
||||
# Runs single continuous BacktestEngine state over 56 Days.
|
||||
# ==============================================================================
|
||||
|
||||
echo "================================================================"
|
||||
echo " Launching CONTINUOUS Native 56-Day Backtest "
|
||||
echo "================================================================"
|
||||
echo "[INFO] Running with full 48-asset native Event Injection"
|
||||
echo "[INFO] Simulated Events: ~22,000,000 ticks in ONE continuous batch!"
|
||||
echo "[INFO] Memory footprint: ~15GB expected"
|
||||
echo "[INFO] vol threshold : 0.00026414 (gold standard)"
|
||||
echo "[INFO] min_irp_align : 0.0 (gold standard)"
|
||||
|
||||
# Resolve project root (two levels up from prod/ops/)
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
echo "[INFO] Project root: ${PROJECT_ROOT}"
|
||||
|
||||
# Detect Python interpreter
|
||||
# Priority: DOLPHIN_PYTHON env var > siloqy venv > system python3
|
||||
if [ -n "${DOLPHIN_PYTHON}" ]; then
|
||||
PYTHON="${DOLPHIN_PYTHON}"
|
||||
elif [ -f "${PROJECT_ROOT}/../siloqy-env/bin/python" ]; then
|
||||
PYTHON="${PROJECT_ROOT}/../siloqy-env/bin/python"
|
||||
elif [ -f "/mnt/dolphinng5_predict/../siloqy-env/bin/python" ]; then
|
||||
PYTHON="/mnt/dolphinng5_predict/../siloqy-env/bin/python"
|
||||
elif command -v python3 &>/dev/null; then
|
||||
PYTHON="python3"
|
||||
else
|
||||
echo "[ERROR] No Python interpreter found. Set DOLPHIN_PYTHON env var."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[INFO] Python: ${PYTHON}"
|
||||
"${PYTHON}" --version 2>&1
|
||||
|
||||
cd "${PROJECT_ROOT}"
|
||||
"${PYTHON}" prod/nautilus_native_continuous.py
|
||||
EXIT_CODE=$?
|
||||
|
||||
echo "================================================================"
|
||||
if [ ${EXIT_CODE} -eq 0 ]; then
|
||||
echo " CONTINUOUS CERTIFICATION COMPLETE (OK) "
|
||||
else
|
||||
echo " CONTINUOUS CERTIFICATION FAILED (exit ${EXIT_CODE})"
|
||||
fi
|
||||
echo "================================================================"
|
||||
exit ${EXIT_CODE}
|
||||
558
prod/ops/kimi_session_backup/context.jsonl
Executable file
558
prod/ops/kimi_session_backup/context.jsonl
Executable file
File diff suppressed because one or more lines are too long
1423
prod/ops/kimi_session_backup/context_1.jsonl
Executable file
1423
prod/ops/kimi_session_backup/context_1.jsonl
Executable file
File diff suppressed because one or more lines are too long
1593
prod/ops/kimi_session_backup/context_2.jsonl
Executable file
1593
prod/ops/kimi_session_backup/context_2.jsonl
Executable file
File diff suppressed because one or more lines are too long
3812
prod/ops/kimi_session_backup/wire.jsonl
Executable file
3812
prod/ops/kimi_session_backup/wire.jsonl
Executable file
File diff suppressed because one or more lines are too long
11
prod/ops/launch_paper_portfolio.py
Executable file
11
prod/ops/launch_paper_portfolio.py
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Wrapper for launch_paper_portfolio.py - now located in prod/ directory."""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Run the main file with proper __file__ context
|
||||
main_file = Path(__file__).parent.parent / "launch_paper_portfolio.py"
|
||||
os.chdir(main_file.parent)
|
||||
sys.argv[0] = str(main_file)
|
||||
exec(open(main_file).read(), {'__file__': str(main_file), '__name__': '__main__'})
|
||||
7
prod/ops/resume_session.sh
Executable file
7
prod/ops/resume_session.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Resume the 4 Services + Prefect Architecture Session
|
||||
echo "Resuming session: c23a69c5-ba4a-41c4-8624-05114e8fd9ea"
|
||||
echo "Description: 4 Services + Prefect Architecture (Pre-Reboot)"
|
||||
echo ""
|
||||
cd /mnt/dolphinng5_predict/prod/ops
|
||||
kimi --session c23a69c5-ba4a-41c4-8624-05114e8fd9ea
|
||||
257
prod/ops/supervisord_restart.py
Executable file
257
prod/ops/supervisord_restart.py
Executable file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dolphin Supervisord Full Restart
|
||||
=================================
|
||||
Performs a clean, full supervisord restart — the only way to fix broken
|
||||
stdout log pipes after individual process restarts.
|
||||
|
||||
Usage:
|
||||
python3 prod/ops/supervisord_restart.py [--stop-only] [--start-only]
|
||||
|
||||
What it does:
|
||||
1. Snapshot current HZ state (capital, posture) for safety
|
||||
2. Gracefully stop all supervised programs (SIGTERM → wait)
|
||||
3. Shutdown supervisord itself
|
||||
4. Wait for PID file to disappear (confirms clean exit)
|
||||
5. Relaunch supervisord as daemon
|
||||
6. Wait for all expected programs to reach RUNNING
|
||||
7. Verify HZ state is intact (capital preserved)
|
||||
8. Print final status report
|
||||
|
||||
Safety:
|
||||
- Never touches HZ data, CH data, or trade logs
|
||||
- Verifies capital checkpoint survives restart
|
||||
- Aborts if supervisord doesn't come up within timeout
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import subprocess
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# ── Config ────────────────────────────────────────────────────────────────────
|
||||
|
||||
SUPERVISORD_BIN = "/usr/local/bin/supervisord"
|
||||
SUPERVISORCTL = "/usr/local/bin/supervisorctl" # or same as above via args
|
||||
CONFIG = "/mnt/dolphinng5_predict/prod/supervisor/dolphin-supervisord.conf"
|
||||
PIDFILE = "/mnt/dolphinng5_predict/prod/supervisor/run/supervisord.pid"
|
||||
LOGDIR = Path("/mnt/dolphinng5_predict/prod/supervisor/logs")
|
||||
|
||||
# Programs that must reach RUNNING state before we declare success
|
||||
EXPECTED_RUNNING = [
|
||||
"dolphin:nautilus_trader",
|
||||
"dolphin:scan_bridge",
|
||||
"dolphin_data:acb_processor",
|
||||
"dolphin_data:exf_fetcher",
|
||||
"dolphin_data:meta_health",
|
||||
"dolphin_data:obf_universe",
|
||||
"dolphin_data:system_stats",
|
||||
]
|
||||
|
||||
STOP_TIMEOUT_S = 30 # max seconds to wait for clean stop
|
||||
START_TIMEOUT_S = 60 # max seconds to wait for all programs RUNNING
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def log(msg: str):
|
||||
ts = time.strftime("%H:%M:%S")
|
||||
print(f"[{ts}] {msg}", flush=True)
|
||||
|
||||
def ctl(*args) -> tuple[int, str, str]:
|
||||
"""Run supervisorctl with our config. Returns (rc, stdout, stderr)."""
|
||||
cmd = [SUPERVISORCTL, "-c", CONFIG] + list(args)
|
||||
r = subprocess.run(cmd, capture_output=True, text=True)
|
||||
return r.returncode, r.stdout.strip(), r.stderr.strip()
|
||||
|
||||
def supervisord_pid() -> int | None:
|
||||
try:
|
||||
pid = int(Path(PIDFILE).read_text().strip())
|
||||
os.kill(pid, 0) # check alive
|
||||
return pid
|
||||
except (FileNotFoundError, ValueError, ProcessLookupError, PermissionError):
|
||||
return None
|
||||
|
||||
def parse_status(output: str) -> dict[str, str]:
|
||||
"""Parse supervisorctl status output → {name: state}."""
|
||||
states = {}
|
||||
for line in output.splitlines():
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
states[parts[0]] = parts[1]
|
||||
return states
|
||||
|
||||
def hz_capital() -> float | None:
|
||||
"""Read capital_checkpoint from HZ. Returns None on any failure."""
|
||||
try:
|
||||
sys.path.insert(0, "/mnt/dolphinng5_predict")
|
||||
import hazelcast
|
||||
hz = hazelcast.HazelcastClient(
|
||||
cluster_name="dolphin",
|
||||
cluster_members=["localhost:5701"],
|
||||
connection_timeout=3.0,
|
||||
)
|
||||
raw = hz.get_map("DOLPHIN_STATE_BLUE").blocking().get("capital_checkpoint")
|
||||
hz.shutdown()
|
||||
return json.loads(raw)["capital"] if raw else None
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
# ── Main phases ───────────────────────────────────────────────────────────────
|
||||
|
||||
def phase_snapshot():
|
||||
log("=== Phase 0: Pre-restart HZ snapshot ===")
|
||||
cap = hz_capital()
|
||||
if cap is not None:
|
||||
log(f" Capital checkpoint: ${cap:,.2f}")
|
||||
else:
|
||||
log(" WARNING: Could not read HZ capital (will verify post-start)")
|
||||
return cap
|
||||
|
||||
def phase_stop():
|
||||
log("=== Phase 1: Stopping all programs ===")
|
||||
pid = supervisord_pid()
|
||||
if pid is None:
|
||||
log(" Supervisord not running — nothing to stop")
|
||||
return
|
||||
|
||||
# Stop all supervised programs gracefully
|
||||
log(f" supervisorctl stop all (supervisord PID={pid})")
|
||||
rc, out, err = ctl("stop", "all")
|
||||
log(f" {out or err or 'ok'}")
|
||||
|
||||
# Wait for all to stop
|
||||
deadline = time.time() + STOP_TIMEOUT_S
|
||||
while time.time() < deadline:
|
||||
rc, out, _ = ctl("status")
|
||||
states = parse_status(out)
|
||||
running = [n for n, s in states.items() if s == "RUNNING"]
|
||||
if not running:
|
||||
break
|
||||
log(f" Still running: {running}")
|
||||
time.sleep(2)
|
||||
|
||||
log("=== Phase 2: Shutting down supervisord ===")
|
||||
rc, out, err = ctl("shutdown")
|
||||
log(f" {out or err or 'ok'}")
|
||||
|
||||
# Wait for PID to disappear
|
||||
deadline = time.time() + STOP_TIMEOUT_S
|
||||
while time.time() < deadline:
|
||||
if supervisord_pid() is None:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
if supervisord_pid() is None:
|
||||
log(" Supervisord stopped cleanly.")
|
||||
else:
|
||||
log(" WARNING: Supervisord PID still alive — may need manual kill")
|
||||
|
||||
def phase_start():
|
||||
log("=== Phase 3: Launching supervisord ===")
|
||||
if supervisord_pid() is not None:
|
||||
log(f" Already running (PID={supervisord_pid()}) — skipping launch")
|
||||
return
|
||||
|
||||
cmd = [SUPERVISORD_BIN, "-c", CONFIG]
|
||||
log(f" {' '.join(cmd)}")
|
||||
r = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if r.returncode != 0:
|
||||
log(f" ERROR launching supervisord: {r.stderr}")
|
||||
sys.exit(1)
|
||||
|
||||
# Wait for PID file
|
||||
deadline = time.time() + 10
|
||||
while time.time() < deadline:
|
||||
if supervisord_pid() is not None:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
pid = supervisord_pid()
|
||||
if pid:
|
||||
log(f" Supervisord started (PID={pid})")
|
||||
else:
|
||||
log(" ERROR: supervisord did not start")
|
||||
sys.exit(1)
|
||||
|
||||
def phase_wait_running():
|
||||
log(f"=== Phase 4: Waiting for programs to reach RUNNING (timeout={START_TIMEOUT_S}s) ===")
|
||||
deadline = time.time() + START_TIMEOUT_S
|
||||
last_states = {}
|
||||
|
||||
while time.time() < deadline:
|
||||
rc, out, _ = ctl("status")
|
||||
states = parse_status(out)
|
||||
|
||||
not_running = [n for n in EXPECTED_RUNNING if states.get(n) != "RUNNING"]
|
||||
if not not_running:
|
||||
log(" All expected programs RUNNING.")
|
||||
break
|
||||
|
||||
if states != last_states:
|
||||
for name, state in sorted(states.items()):
|
||||
marker = "✓" if state == "RUNNING" else "⏳" if state in ("STARTING", "BACKOFF") else "✗"
|
||||
log(f" {marker} {name:<40} {state}")
|
||||
last_states = states
|
||||
|
||||
time.sleep(3)
|
||||
else:
|
||||
log(" WARNING: Timeout waiting for programs. Final state:")
|
||||
rc, out, _ = ctl("status")
|
||||
print(out)
|
||||
|
||||
def phase_verify(pre_capital: float | None):
|
||||
log("=== Phase 5: Post-restart verification ===")
|
||||
|
||||
# Status
|
||||
rc, out, _ = ctl("status")
|
||||
states = parse_status(out)
|
||||
all_ok = True
|
||||
for name, state in sorted(states.items()):
|
||||
if name in ("dolphin:clean_arch_trader", "dolphin:paper_portfolio"):
|
||||
continue # expected STOPPED
|
||||
marker = "✓" if state == "RUNNING" else "✗"
|
||||
log(f" {marker} {name:<40} {state}")
|
||||
if state not in ("RUNNING", "STOPPED"):
|
||||
all_ok = False
|
||||
|
||||
# HZ capital
|
||||
cap = hz_capital()
|
||||
if cap is not None:
|
||||
match = "✓" if (pre_capital is None or abs(cap - pre_capital) < 0.01) else "⚠ CHANGED"
|
||||
log(f" Capital: ${cap:,.2f} {match}")
|
||||
else:
|
||||
log(" WARNING: Could not verify HZ capital post-start")
|
||||
|
||||
if all_ok:
|
||||
log("=== Restart COMPLETE — all services nominal ===")
|
||||
else:
|
||||
log("=== Restart done — some services need attention (see above) ===")
|
||||
|
||||
# ── Entry point ───────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Dolphin supervisord full restart")
|
||||
parser.add_argument("--stop-only", action="store_true", help="Only stop, don't relaunch")
|
||||
parser.add_argument("--start-only", action="store_true", help="Only start, don't stop first")
|
||||
args = parser.parse_args()
|
||||
|
||||
log("Dolphin Supervisord Restart")
|
||||
log(f" Config : {CONFIG}")
|
||||
log(f" PID now: {supervisord_pid()}")
|
||||
|
||||
pre_cap = phase_snapshot()
|
||||
|
||||
if not args.start_only:
|
||||
phase_stop()
|
||||
|
||||
if not args.stop_only:
|
||||
time.sleep(1) # brief pause before relaunch
|
||||
phase_start()
|
||||
phase_wait_running()
|
||||
phase_verify(pre_cap)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user