Files
DOLPHIN/prod/esof_persistence.py

425 lines
17 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
ESOF PERSISTENCE SERVICE v1.0
==============================
Off-hot-path persistence for Esoteric Factors snapshots.
Design mirrors ExFPersistenceService (prod/exf_persistence.py) for symmetry:
- Background thread, non-blocking to hot path
- Rolling deque of HISTORY_MAXLEN snapshots (default 60 × 5s = 5-min window)
- Flushes the full rolling block to NPZ every FLUSH_INTERVAL_S seconds
- Writing *N rolling prior data points* (not a single overwritten flat file)
- Same directory as production scan files: /mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/
- Same __Indicators.npz naming convention as scan and extf files
DOLPHIN format:
/mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/esof_snapshot_{YYYY-MM-DD_HH-MM-SS}__Indicators.npz
NPZ schema (mirrors api_names / api_indicators / api_success pattern):
esof_names : (N_IND,) U32 indicator names (fixed, see ESOF_NAMES)
esof_values : (N_PTS, N_IND) float64 rolling time-series rows
timestamps : (N_PTS,) U64 ISO UTC per row
unix_times : (N_PTS,) int64 unix epoch per row
moon_phases : (N_PTS,) U32 categorical: moon phase name per row
sessions : (N_PTS,) U32 categorical: liquidity session per row
_metadata : scalar str JSON: version, service, flush_time, n_points
Numeric indicators (24 total):
moon_illumination, mercury_retrograde,
population_weighted_hour, liquidity_weighted_hour, market_cycle_position,
fib_closest_minute, fib_harmonic_strength,
cal_year, cal_month, cal_day, cal_hour, cal_minute, cal_dow, cal_woy,
rt_Americas_hour, rt_Americas_tradfi,
rt_EMEA_hour, rt_EMEA_tradfi,
rt_South_Asia_hour, rt_South_Asia_tradfi,
rt_East_Asia_hour, rt_East_Asia_tradfi,
rt_Oceania_SEA_hour, rt_Oceania_SEA_tradfi,
"""
import json
import logging
import threading
import time
import hashlib
import numpy as np
from collections import deque
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ── Configuration ─────────────────────────────────────────────────────────────
DATA_DIR = Path("/mnt/ng6_data/eigenvalues")
FLUSH_INTERVAL_S = 300 # 5 minutes — off hot path (mirrors ExF)
MAX_FILE_AGE_DAYS = 7 # keep 7 days of history (mirrors ExF)
HISTORY_MAXLEN = 60 # 60 × 5s = 5-min rolling window per flush block
# ── Indicator definition ──────────────────────────────────────────────────────
# Ordered list of numeric indicator names stored in esof_values columns.
# Order is FROZEN — do not reorder without a schema-version bump.
ESOF_NAMES: Tuple[str, ...] = (
"moon_illumination",
"mercury_retrograde",
"population_weighted_hour",
"liquidity_weighted_hour",
"market_cycle_position",
"fib_closest_minute",
"fib_harmonic_strength",
"cal_year",
"cal_month",
"cal_day",
"cal_hour",
"cal_minute",
"cal_dow",
"cal_woy",
"rt_Americas_hour",
"rt_Americas_tradfi",
"rt_EMEA_hour",
"rt_EMEA_tradfi",
"rt_South_Asia_hour",
"rt_South_Asia_tradfi",
"rt_East_Asia_hour",
"rt_East_Asia_tradfi",
"rt_Oceania_SEA_hour",
"rt_Oceania_SEA_tradfi",
)
N_IND = len(ESOF_NAMES)
_NAME_TO_IDX = {n: i for i, n in enumerate(ESOF_NAMES)}
def _extract_row(snapshot: Dict[str, Any]) -> Tuple[np.ndarray, str, str]:
"""
Flatten a raw EsoF snapshot dict into:
(values: float64 array len=N_IND, moon_phase: str, session: str)
Missing/non-numeric values become NaN.
"""
row = np.full(N_IND, np.nan, dtype=np.float64)
cal = snapshot.get("calendar", {})
fib = snapshot.get("fibonacci_time", {})
rt = snapshot.get("regional_times", {})
# Direct scalars
for name, val in (
("moon_illumination", snapshot.get("moon_illumination")),
("mercury_retrograde", snapshot.get("mercury_retrograde")),
("population_weighted_hour",snapshot.get("population_weighted_hour")),
("liquidity_weighted_hour", snapshot.get("liquidity_weighted_hour")),
("market_cycle_position", snapshot.get("market_cycle_position")),
# fibonacci
("fib_closest_minute", fib.get("closest_fib_minute")),
("fib_harmonic_strength", fib.get("harmonic_strength")),
# calendar
("cal_year", cal.get("year")),
("cal_month", cal.get("month")),
("cal_day", cal.get("day_of_month")),
("cal_hour", cal.get("hour")),
("cal_minute", cal.get("minute")),
("cal_dow", cal.get("day_of_week")),
("cal_woy", cal.get("week_of_year")),
# regional Americas
("rt_Americas_hour", rt.get("Americas", {}).get("hour")),
("rt_Americas_tradfi", int(rt.get("Americas", {}).get("is_tradfi_open", False))),
# EMEA
("rt_EMEA_hour", rt.get("EMEA", {}).get("hour")),
("rt_EMEA_tradfi", int(rt.get("EMEA", {}).get("is_tradfi_open", False))),
# South_Asia
("rt_South_Asia_hour", rt.get("South_Asia", {}).get("hour")),
("rt_South_Asia_tradfi",int(rt.get("South_Asia", {}).get("is_tradfi_open", False))),
# East_Asia
("rt_East_Asia_hour", rt.get("East_Asia", {}).get("hour")),
("rt_East_Asia_tradfi", int(rt.get("East_Asia", {}).get("is_tradfi_open", False))),
# Oceania_SEA
("rt_Oceania_SEA_hour", rt.get("Oceania_SEA", {}).get("hour")),
("rt_Oceania_SEA_tradfi", int(rt.get("Oceania_SEA", {}).get("is_tradfi_open", False))),
):
if val is not None:
try:
row[_NAME_TO_IDX[name]] = float(val)
except (TypeError, ValueError):
pass # stays NaN
moon_phase = str(snapshot.get("moon_phase_name", ""))
session = str(snapshot.get("liquidity_session", ""))
return row, moon_phase, session
# ── Stats dataclass ───────────────────────────────────────────────────────────
@dataclass
class EsoFPersistenceStats:
files_written: int = 0
files_failed: int = 0
bytes_written: int = 0
points_written: int = 0
last_write_time: float = 0.0
last_write_path: Optional[Path] = None
history: deque = field(default_factory=lambda: deque(maxlen=100))
# ── Service ───────────────────────────────────────────────────────────────────
class EsoFPersistenceService:
"""
Off-hot-path persistence service for Esoteric Factors.
Maintains a rolling deque of HISTORY_MAXLEN snapshots in memory.
Every FLUSH_INTERVAL_S seconds the full rolling block is written as a
single NPZ time-series to DATA_DIR/{YYYY-MM-DD}/.
Usage (mirrors ExFPersistenceService):
svc = EsoFPersistenceService()
svc.start()
# in hot path (called from esof_prefect_flow main loop):
svc.update_snapshot(data)
svc.stop()
"""
def __init__(
self,
data_dir: Path = DATA_DIR,
flush_interval_s: float = FLUSH_INTERVAL_S,
history_maxlen: int = HISTORY_MAXLEN,
):
self.data_dir = Path(data_dir)
self.flush_interval_s = flush_interval_s
self._stats = EsoFPersistenceStats()
# Rolling deque: each entry = (timestamp_iso, unix, row_array, moon_phase, session)
self._history: deque = deque(maxlen=history_maxlen)
self._lock = threading.Lock()
self._running = False
self._thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self.data_dir.mkdir(parents=True, exist_ok=True)
# ── Public API (non-blocking, thread-safe) ────────────────────────────────
def update_snapshot(self, snapshot: Dict[str, Any]) -> None:
"""
Called from the hot path every 5s. Extracts and appends one row to
the rolling deque O(1), non-blocking.
"""
ts = snapshot.get("timestamp", datetime.now(timezone.utc).isoformat())
unix = snapshot.get("unix", int(time.time()))
row, moon_phase, session = _extract_row(snapshot)
with self._lock:
self._history.append((ts, unix, row, moon_phase, session))
def get_stats(self) -> Dict[str, Any]:
with self._lock:
return {
"files_written": self._stats.files_written,
"files_failed": self._stats.files_failed,
"bytes_written": self._stats.bytes_written,
"points_written": self._stats.points_written,
"history_depth": len(self._history),
"last_write_time": self._stats.last_write_time,
"last_write_path": str(self._stats.last_write_path)
if self._stats.last_write_path else None,
}
def force_flush(self) -> Optional[Path]:
"""Immediate flush — for testing/debugging."""
with self._lock:
history_copy = list(self._history)
if history_copy:
return self._write_npz(history_copy)
return None
# ── Internal write ────────────────────────────────────────────────────────
def _write_npz(self, history: list) -> Optional[Path]:
"""
Write a rolling block of EsoF snapshots to NPZ.
NPZ arrays:
esof_names (N_IND,) str fixed indicator name list
esof_values (N_PTS, N_IND) f64 time-series rows
timestamps (N_PTS,) str ISO UTC per point
unix_times (N_PTS,) i64 unix epoch per point
moon_phases (N_PTS,) str categorical
sessions (N_PTS,) str categorical
_metadata scalar str JSON: version, flush_time, n_points
"""
try:
n = len(history)
now = datetime.now(timezone.utc)
date_str = now.strftime("%Y-%m-%d")
ts_str = now.strftime("%Y-%m-%d_%H-%M-%S")
date_dir = self.data_dir / date_str
date_dir.mkdir(parents=True, exist_ok=True)
filename = f"esof_snapshot_{ts_str}__Indicators.npz"
filepath = date_dir / filename
# Unzip history tuples
timestamps = np.array([e[0] for e in history], dtype="U64")
unix_times = np.array([e[1] for e in history], dtype=np.int64)
esof_values = np.vstack([e[2] for e in history]).astype(np.float64) # (N, N_IND)
moon_phases = np.array([e[3] for e in history], dtype="U32")
sessions = np.array([e[4] for e in history], dtype="U32")
metadata = json.dumps({
"_version": "1.0",
"_service": "EsoFPersistence",
"_flush_time": now.isoformat(),
"_n_points": n,
"_n_indicators": N_IND,
"_history_maxlen": self._history.maxlen,
"_flush_interval_s": self.flush_interval_s,
})
np.savez_compressed(
filepath,
esof_names = np.array(ESOF_NAMES, dtype="U32"),
esof_values = esof_values,
timestamps = timestamps,
unix_times = unix_times,
moon_phases = moon_phases,
sessions = sessions,
_metadata = np.array(metadata),
)
# Checksum (mirrors ExF pattern)
file_bytes = filepath.read_bytes()
checksum = hashlib.md5(file_bytes).hexdigest()
(filepath.parent / (filename + ".md5")).write_text(
f"{checksum} {filename}\n"
)
size = len(file_bytes)
logger.info(
"EsoF persisted: %s (%d points × %d indicators, %d bytes)",
filename, n, N_IND, size,
)
return filepath
except Exception as exc:
logger.error("EsoF NPZ write failed: %s", exc)
self._stats.files_failed += 1
return None
def _cleanup_old_files(self) -> int:
"""Remove date directories older than MAX_FILE_AGE_DAYS."""
removed = 0
cutoff = time.time() - (MAX_FILE_AGE_DAYS * 86400)
try:
for date_dir in self.data_dir.iterdir():
if not date_dir.is_dir():
continue
try:
if date_dir.stat().st_mtime < cutoff:
import shutil
shutil.rmtree(date_dir)
removed += 1
logger.info("EsoF cleanup: removed %s", date_dir.name)
except Exception as exc:
logger.warning("EsoF cleanup error for %s: %s", date_dir, exc)
except Exception as exc:
logger.warning("EsoF cleanup scan error: %s", exc)
return removed
# ── Background flush loop ─────────────────────────────────────────────────
def _flush_loop(self) -> None:
logger.info(
"EsoFPersistenceService flush loop started "
"(interval=%ds, history_maxlen=%d)",
self.flush_interval_s, self._history.maxlen,
)
while not self._stop_event.is_set():
try:
with self._lock:
history_copy = list(self._history)
if history_copy:
filepath = self._write_npz(history_copy)
if filepath:
size = filepath.stat().st_size
self._stats.files_written += 1
self._stats.bytes_written += size
self._stats.points_written += len(history_copy)
self._stats.last_write_time = time.time()
self._stats.last_write_path = filepath
self._stats.history.append({
"flush_time": datetime.now(timezone.utc).isoformat(),
"path": str(filepath),
"n_points": len(history_copy),
"size": size,
})
# Periodic cleanup every ~20 flushes (~100 min)
if self._stats.files_written > 0 and self._stats.files_written % 20 == 0:
self._cleanup_old_files()
except Exception as exc:
logger.error("EsoF flush loop error: %s", exc)
self._stats.files_failed += 1
self._stop_event.wait(timeout=self.flush_interval_s)
logger.info("EsoFPersistenceService flush loop stopped")
# ── Lifecycle ─────────────────────────────────────────────────────────────
def start(self) -> None:
if self._running:
return
self._running = True
self._stop_event.clear()
self._thread = threading.Thread(target=self._flush_loop, daemon=True)
self._thread.start()
logger.info("EsoFPersistenceService started (data_dir=%s)", self.data_dir)
def stop(self) -> None:
if not self._running:
return
self._running = False
self._stop_event.set()
if self._thread:
self._thread.join(timeout=10)
logger.info("EsoFPersistenceService stopped")
# ── Standalone test ───────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys, tempfile
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
sys.path.insert(0, str(Path(__file__).parent.parent / "external_factors"))
from esoteric_factors_service import MarketIndicators
mi = MarketIndicators()
with tempfile.TemporaryDirectory() as tmpdir:
svc = EsoFPersistenceService(data_dir=Path(tmpdir), flush_interval_s=5)
svc.start()
print("Feeding 12 synthetic snapshots …")
for i in range(12):
snap = mi.get_indicators()
svc.update_snapshot(snap)
time.sleep(0.1)
print("Forcing flush …")
path = svc.force_flush()
print(f"Written: {path}")
if path:
d = np.load(path, allow_pickle=True)
print(f"Keys: {list(d.keys())}")
print(f"esof_names: {d['esof_names']}")
print(f"esof_values shape: {d['esof_values'].shape}")
print(f"timestamps[:3]: {d['timestamps'][:3]}")
print(f"sessions[:3]: {d['sessions'][:3]}")
print(f"moon_phases[:3]: {d['moon_phases'][:3]}")
print(f"metadata: {d['_metadata']}")
svc.stop()
print("Done.")