Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
425 lines
17 KiB
Python
Executable File
425 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
ESOF PERSISTENCE SERVICE v1.0
|
||
==============================
|
||
Off-hot-path persistence for Esoteric Factors snapshots.
|
||
|
||
Design mirrors ExFPersistenceService (prod/exf_persistence.py) for symmetry:
|
||
- Background thread, non-blocking to hot path
|
||
- Rolling deque of HISTORY_MAXLEN snapshots (default 60 × 5s = 5-min window)
|
||
- Flushes the full rolling block to NPZ every FLUSH_INTERVAL_S seconds
|
||
- Writing *N rolling prior data points* (not a single overwritten flat file)
|
||
- Same directory as production scan files: /mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/
|
||
- Same __Indicators.npz naming convention as scan and extf files
|
||
|
||
DOLPHIN format:
|
||
/mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/esof_snapshot_{YYYY-MM-DD_HH-MM-SS}__Indicators.npz
|
||
|
||
NPZ schema (mirrors api_names / api_indicators / api_success pattern):
|
||
esof_names : (N_IND,) U32 — indicator names (fixed, see ESOF_NAMES)
|
||
esof_values : (N_PTS, N_IND) float64 — rolling time-series rows
|
||
timestamps : (N_PTS,) U64 — ISO UTC per row
|
||
unix_times : (N_PTS,) int64 — unix epoch per row
|
||
moon_phases : (N_PTS,) U32 — categorical: moon phase name per row
|
||
sessions : (N_PTS,) U32 — categorical: liquidity session per row
|
||
_metadata : scalar str — JSON: version, service, flush_time, n_points
|
||
|
||
Numeric indicators (24 total):
|
||
moon_illumination, mercury_retrograde,
|
||
population_weighted_hour, liquidity_weighted_hour, market_cycle_position,
|
||
fib_closest_minute, fib_harmonic_strength,
|
||
cal_year, cal_month, cal_day, cal_hour, cal_minute, cal_dow, cal_woy,
|
||
rt_Americas_hour, rt_Americas_tradfi,
|
||
rt_EMEA_hour, rt_EMEA_tradfi,
|
||
rt_South_Asia_hour, rt_South_Asia_tradfi,
|
||
rt_East_Asia_hour, rt_East_Asia_tradfi,
|
||
rt_Oceania_SEA_hour, rt_Oceania_SEA_tradfi,
|
||
"""
|
||
|
||
import json
|
||
import logging
|
||
import threading
|
||
import time
|
||
import hashlib
|
||
import numpy as np
|
||
from collections import deque
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ── Configuration ─────────────────────────────────────────────────────────────
|
||
DATA_DIR = Path("/mnt/ng6_data/eigenvalues")
|
||
FLUSH_INTERVAL_S = 300 # 5 minutes — off hot path (mirrors ExF)
|
||
MAX_FILE_AGE_DAYS = 7 # keep 7 days of history (mirrors ExF)
|
||
HISTORY_MAXLEN = 60 # 60 × 5s = 5-min rolling window per flush block
|
||
|
||
# ── Indicator definition ──────────────────────────────────────────────────────
|
||
# Ordered list of numeric indicator names stored in esof_values columns.
|
||
# Order is FROZEN — do not reorder without a schema-version bump.
|
||
ESOF_NAMES: Tuple[str, ...] = (
|
||
"moon_illumination",
|
||
"mercury_retrograde",
|
||
"population_weighted_hour",
|
||
"liquidity_weighted_hour",
|
||
"market_cycle_position",
|
||
"fib_closest_minute",
|
||
"fib_harmonic_strength",
|
||
"cal_year",
|
||
"cal_month",
|
||
"cal_day",
|
||
"cal_hour",
|
||
"cal_minute",
|
||
"cal_dow",
|
||
"cal_woy",
|
||
"rt_Americas_hour",
|
||
"rt_Americas_tradfi",
|
||
"rt_EMEA_hour",
|
||
"rt_EMEA_tradfi",
|
||
"rt_South_Asia_hour",
|
||
"rt_South_Asia_tradfi",
|
||
"rt_East_Asia_hour",
|
||
"rt_East_Asia_tradfi",
|
||
"rt_Oceania_SEA_hour",
|
||
"rt_Oceania_SEA_tradfi",
|
||
)
|
||
N_IND = len(ESOF_NAMES)
|
||
_NAME_TO_IDX = {n: i for i, n in enumerate(ESOF_NAMES)}
|
||
|
||
|
||
def _extract_row(snapshot: Dict[str, Any]) -> Tuple[np.ndarray, str, str]:
|
||
"""
|
||
Flatten a raw EsoF snapshot dict into:
|
||
(values: float64 array len=N_IND, moon_phase: str, session: str)
|
||
|
||
Missing/non-numeric values become NaN.
|
||
"""
|
||
row = np.full(N_IND, np.nan, dtype=np.float64)
|
||
cal = snapshot.get("calendar", {})
|
||
fib = snapshot.get("fibonacci_time", {})
|
||
rt = snapshot.get("regional_times", {})
|
||
|
||
# Direct scalars
|
||
for name, val in (
|
||
("moon_illumination", snapshot.get("moon_illumination")),
|
||
("mercury_retrograde", snapshot.get("mercury_retrograde")),
|
||
("population_weighted_hour",snapshot.get("population_weighted_hour")),
|
||
("liquidity_weighted_hour", snapshot.get("liquidity_weighted_hour")),
|
||
("market_cycle_position", snapshot.get("market_cycle_position")),
|
||
# fibonacci
|
||
("fib_closest_minute", fib.get("closest_fib_minute")),
|
||
("fib_harmonic_strength", fib.get("harmonic_strength")),
|
||
# calendar
|
||
("cal_year", cal.get("year")),
|
||
("cal_month", cal.get("month")),
|
||
("cal_day", cal.get("day_of_month")),
|
||
("cal_hour", cal.get("hour")),
|
||
("cal_minute", cal.get("minute")),
|
||
("cal_dow", cal.get("day_of_week")),
|
||
("cal_woy", cal.get("week_of_year")),
|
||
# regional Americas
|
||
("rt_Americas_hour", rt.get("Americas", {}).get("hour")),
|
||
("rt_Americas_tradfi", int(rt.get("Americas", {}).get("is_tradfi_open", False))),
|
||
# EMEA
|
||
("rt_EMEA_hour", rt.get("EMEA", {}).get("hour")),
|
||
("rt_EMEA_tradfi", int(rt.get("EMEA", {}).get("is_tradfi_open", False))),
|
||
# South_Asia
|
||
("rt_South_Asia_hour", rt.get("South_Asia", {}).get("hour")),
|
||
("rt_South_Asia_tradfi",int(rt.get("South_Asia", {}).get("is_tradfi_open", False))),
|
||
# East_Asia
|
||
("rt_East_Asia_hour", rt.get("East_Asia", {}).get("hour")),
|
||
("rt_East_Asia_tradfi", int(rt.get("East_Asia", {}).get("is_tradfi_open", False))),
|
||
# Oceania_SEA
|
||
("rt_Oceania_SEA_hour", rt.get("Oceania_SEA", {}).get("hour")),
|
||
("rt_Oceania_SEA_tradfi", int(rt.get("Oceania_SEA", {}).get("is_tradfi_open", False))),
|
||
):
|
||
if val is not None:
|
||
try:
|
||
row[_NAME_TO_IDX[name]] = float(val)
|
||
except (TypeError, ValueError):
|
||
pass # stays NaN
|
||
|
||
moon_phase = str(snapshot.get("moon_phase_name", ""))
|
||
session = str(snapshot.get("liquidity_session", ""))
|
||
return row, moon_phase, session
|
||
|
||
|
||
# ── Stats dataclass ───────────────────────────────────────────────────────────
|
||
|
||
@dataclass
|
||
class EsoFPersistenceStats:
|
||
files_written: int = 0
|
||
files_failed: int = 0
|
||
bytes_written: int = 0
|
||
points_written: int = 0
|
||
last_write_time: float = 0.0
|
||
last_write_path: Optional[Path] = None
|
||
history: deque = field(default_factory=lambda: deque(maxlen=100))
|
||
|
||
|
||
# ── Service ───────────────────────────────────────────────────────────────────
|
||
|
||
class EsoFPersistenceService:
|
||
"""
|
||
Off-hot-path persistence service for Esoteric Factors.
|
||
|
||
Maintains a rolling deque of HISTORY_MAXLEN snapshots in memory.
|
||
Every FLUSH_INTERVAL_S seconds the full rolling block is written as a
|
||
single NPZ time-series to DATA_DIR/{YYYY-MM-DD}/.
|
||
|
||
Usage (mirrors ExFPersistenceService):
|
||
svc = EsoFPersistenceService()
|
||
svc.start()
|
||
# in hot path (called from esof_prefect_flow main loop):
|
||
svc.update_snapshot(data)
|
||
svc.stop()
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
data_dir: Path = DATA_DIR,
|
||
flush_interval_s: float = FLUSH_INTERVAL_S,
|
||
history_maxlen: int = HISTORY_MAXLEN,
|
||
):
|
||
self.data_dir = Path(data_dir)
|
||
self.flush_interval_s = flush_interval_s
|
||
self._stats = EsoFPersistenceStats()
|
||
|
||
# Rolling deque: each entry = (timestamp_iso, unix, row_array, moon_phase, session)
|
||
self._history: deque = deque(maxlen=history_maxlen)
|
||
self._lock = threading.Lock()
|
||
self._running = False
|
||
self._thread: Optional[threading.Thread] = None
|
||
self._stop_event = threading.Event()
|
||
|
||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# ── Public API (non-blocking, thread-safe) ────────────────────────────────
|
||
|
||
def update_snapshot(self, snapshot: Dict[str, Any]) -> None:
|
||
"""
|
||
Called from the hot path every 5s. Extracts and appends one row to
|
||
the rolling deque — O(1), non-blocking.
|
||
"""
|
||
ts = snapshot.get("timestamp", datetime.now(timezone.utc).isoformat())
|
||
unix = snapshot.get("unix", int(time.time()))
|
||
row, moon_phase, session = _extract_row(snapshot)
|
||
with self._lock:
|
||
self._history.append((ts, unix, row, moon_phase, session))
|
||
|
||
def get_stats(self) -> Dict[str, Any]:
|
||
with self._lock:
|
||
return {
|
||
"files_written": self._stats.files_written,
|
||
"files_failed": self._stats.files_failed,
|
||
"bytes_written": self._stats.bytes_written,
|
||
"points_written": self._stats.points_written,
|
||
"history_depth": len(self._history),
|
||
"last_write_time": self._stats.last_write_time,
|
||
"last_write_path": str(self._stats.last_write_path)
|
||
if self._stats.last_write_path else None,
|
||
}
|
||
|
||
def force_flush(self) -> Optional[Path]:
|
||
"""Immediate flush — for testing/debugging."""
|
||
with self._lock:
|
||
history_copy = list(self._history)
|
||
if history_copy:
|
||
return self._write_npz(history_copy)
|
||
return None
|
||
|
||
# ── Internal write ────────────────────────────────────────────────────────
|
||
|
||
def _write_npz(self, history: list) -> Optional[Path]:
|
||
"""
|
||
Write a rolling block of EsoF snapshots to NPZ.
|
||
|
||
NPZ arrays:
|
||
esof_names (N_IND,) str — fixed indicator name list
|
||
esof_values (N_PTS, N_IND) f64 — time-series rows
|
||
timestamps (N_PTS,) str — ISO UTC per point
|
||
unix_times (N_PTS,) i64 — unix epoch per point
|
||
moon_phases (N_PTS,) str — categorical
|
||
sessions (N_PTS,) str — categorical
|
||
_metadata scalar str — JSON: version, flush_time, n_points
|
||
"""
|
||
try:
|
||
n = len(history)
|
||
now = datetime.now(timezone.utc)
|
||
date_str = now.strftime("%Y-%m-%d")
|
||
ts_str = now.strftime("%Y-%m-%d_%H-%M-%S")
|
||
|
||
date_dir = self.data_dir / date_str
|
||
date_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
filename = f"esof_snapshot_{ts_str}__Indicators.npz"
|
||
filepath = date_dir / filename
|
||
|
||
# Unzip history tuples
|
||
timestamps = np.array([e[0] for e in history], dtype="U64")
|
||
unix_times = np.array([e[1] for e in history], dtype=np.int64)
|
||
esof_values = np.vstack([e[2] for e in history]).astype(np.float64) # (N, N_IND)
|
||
moon_phases = np.array([e[3] for e in history], dtype="U32")
|
||
sessions = np.array([e[4] for e in history], dtype="U32")
|
||
|
||
metadata = json.dumps({
|
||
"_version": "1.0",
|
||
"_service": "EsoFPersistence",
|
||
"_flush_time": now.isoformat(),
|
||
"_n_points": n,
|
||
"_n_indicators": N_IND,
|
||
"_history_maxlen": self._history.maxlen,
|
||
"_flush_interval_s": self.flush_interval_s,
|
||
})
|
||
|
||
np.savez_compressed(
|
||
filepath,
|
||
esof_names = np.array(ESOF_NAMES, dtype="U32"),
|
||
esof_values = esof_values,
|
||
timestamps = timestamps,
|
||
unix_times = unix_times,
|
||
moon_phases = moon_phases,
|
||
sessions = sessions,
|
||
_metadata = np.array(metadata),
|
||
)
|
||
|
||
# Checksum (mirrors ExF pattern)
|
||
file_bytes = filepath.read_bytes()
|
||
checksum = hashlib.md5(file_bytes).hexdigest()
|
||
(filepath.parent / (filename + ".md5")).write_text(
|
||
f"{checksum} {filename}\n"
|
||
)
|
||
|
||
size = len(file_bytes)
|
||
logger.info(
|
||
"EsoF persisted: %s (%d points × %d indicators, %d bytes)",
|
||
filename, n, N_IND, size,
|
||
)
|
||
return filepath
|
||
|
||
except Exception as exc:
|
||
logger.error("EsoF NPZ write failed: %s", exc)
|
||
self._stats.files_failed += 1
|
||
return None
|
||
|
||
def _cleanup_old_files(self) -> int:
|
||
"""Remove date directories older than MAX_FILE_AGE_DAYS."""
|
||
removed = 0
|
||
cutoff = time.time() - (MAX_FILE_AGE_DAYS * 86400)
|
||
try:
|
||
for date_dir in self.data_dir.iterdir():
|
||
if not date_dir.is_dir():
|
||
continue
|
||
try:
|
||
if date_dir.stat().st_mtime < cutoff:
|
||
import shutil
|
||
shutil.rmtree(date_dir)
|
||
removed += 1
|
||
logger.info("EsoF cleanup: removed %s", date_dir.name)
|
||
except Exception as exc:
|
||
logger.warning("EsoF cleanup error for %s: %s", date_dir, exc)
|
||
except Exception as exc:
|
||
logger.warning("EsoF cleanup scan error: %s", exc)
|
||
return removed
|
||
|
||
# ── Background flush loop ─────────────────────────────────────────────────
|
||
|
||
def _flush_loop(self) -> None:
|
||
logger.info(
|
||
"EsoFPersistenceService flush loop started "
|
||
"(interval=%ds, history_maxlen=%d)",
|
||
self.flush_interval_s, self._history.maxlen,
|
||
)
|
||
while not self._stop_event.is_set():
|
||
try:
|
||
with self._lock:
|
||
history_copy = list(self._history)
|
||
|
||
if history_copy:
|
||
filepath = self._write_npz(history_copy)
|
||
if filepath:
|
||
size = filepath.stat().st_size
|
||
self._stats.files_written += 1
|
||
self._stats.bytes_written += size
|
||
self._stats.points_written += len(history_copy)
|
||
self._stats.last_write_time = time.time()
|
||
self._stats.last_write_path = filepath
|
||
self._stats.history.append({
|
||
"flush_time": datetime.now(timezone.utc).isoformat(),
|
||
"path": str(filepath),
|
||
"n_points": len(history_copy),
|
||
"size": size,
|
||
})
|
||
|
||
# Periodic cleanup every ~20 flushes (~100 min)
|
||
if self._stats.files_written > 0 and self._stats.files_written % 20 == 0:
|
||
self._cleanup_old_files()
|
||
|
||
except Exception as exc:
|
||
logger.error("EsoF flush loop error: %s", exc)
|
||
self._stats.files_failed += 1
|
||
|
||
self._stop_event.wait(timeout=self.flush_interval_s)
|
||
|
||
logger.info("EsoFPersistenceService flush loop stopped")
|
||
|
||
# ── Lifecycle ─────────────────────────────────────────────────────────────
|
||
|
||
def start(self) -> None:
|
||
if self._running:
|
||
return
|
||
self._running = True
|
||
self._stop_event.clear()
|
||
self._thread = threading.Thread(target=self._flush_loop, daemon=True)
|
||
self._thread.start()
|
||
logger.info("EsoFPersistenceService started (data_dir=%s)", self.data_dir)
|
||
|
||
def stop(self) -> None:
|
||
if not self._running:
|
||
return
|
||
self._running = False
|
||
self._stop_event.set()
|
||
if self._thread:
|
||
self._thread.join(timeout=10)
|
||
logger.info("EsoFPersistenceService stopped")
|
||
|
||
|
||
# ── Standalone test ───────────────────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
import sys, tempfile
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||
|
||
sys.path.insert(0, str(Path(__file__).parent.parent / "external_factors"))
|
||
from esoteric_factors_service import MarketIndicators
|
||
|
||
mi = MarketIndicators()
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
svc = EsoFPersistenceService(data_dir=Path(tmpdir), flush_interval_s=5)
|
||
svc.start()
|
||
|
||
print("Feeding 12 synthetic snapshots …")
|
||
for i in range(12):
|
||
snap = mi.get_indicators()
|
||
svc.update_snapshot(snap)
|
||
time.sleep(0.1)
|
||
|
||
print("Forcing flush …")
|
||
path = svc.force_flush()
|
||
print(f"Written: {path}")
|
||
|
||
if path:
|
||
d = np.load(path, allow_pickle=True)
|
||
print(f"Keys: {list(d.keys())}")
|
||
print(f"esof_names: {d['esof_names']}")
|
||
print(f"esof_values shape: {d['esof_values'].shape}")
|
||
print(f"timestamps[:3]: {d['timestamps'][:3]}")
|
||
print(f"sessions[:3]: {d['sessions'][:3]}")
|
||
print(f"moon_phases[:3]: {d['moon_phases'][:3]}")
|
||
print(f"metadata: {d['_metadata']}")
|
||
|
||
svc.stop()
|
||
print("Done.")
|