425 lines
17 KiB
Python
425 lines
17 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
ESOF PERSISTENCE SERVICE v1.0
|
|||
|
|
==============================
|
|||
|
|
Off-hot-path persistence for Esoteric Factors snapshots.
|
|||
|
|
|
|||
|
|
Design mirrors ExFPersistenceService (prod/exf_persistence.py) for symmetry:
|
|||
|
|
- Background thread, non-blocking to hot path
|
|||
|
|
- Rolling deque of HISTORY_MAXLEN snapshots (default 60 × 5s = 5-min window)
|
|||
|
|
- Flushes the full rolling block to NPZ every FLUSH_INTERVAL_S seconds
|
|||
|
|
- Writing *N rolling prior data points* (not a single overwritten flat file)
|
|||
|
|
- Same directory as production scan files: /mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/
|
|||
|
|
- Same __Indicators.npz naming convention as scan and extf files
|
|||
|
|
|
|||
|
|
DOLPHIN format:
|
|||
|
|
/mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/esof_snapshot_{YYYY-MM-DD_HH-MM-SS}__Indicators.npz
|
|||
|
|
|
|||
|
|
NPZ schema (mirrors api_names / api_indicators / api_success pattern):
|
|||
|
|
esof_names : (N_IND,) U32 — indicator names (fixed, see ESOF_NAMES)
|
|||
|
|
esof_values : (N_PTS, N_IND) float64 — rolling time-series rows
|
|||
|
|
timestamps : (N_PTS,) U64 — ISO UTC per row
|
|||
|
|
unix_times : (N_PTS,) int64 — unix epoch per row
|
|||
|
|
moon_phases : (N_PTS,) U32 — categorical: moon phase name per row
|
|||
|
|
sessions : (N_PTS,) U32 — categorical: liquidity session per row
|
|||
|
|
_metadata : scalar str — JSON: version, service, flush_time, n_points
|
|||
|
|
|
|||
|
|
Numeric indicators (24 total):
|
|||
|
|
moon_illumination, mercury_retrograde,
|
|||
|
|
population_weighted_hour, liquidity_weighted_hour, market_cycle_position,
|
|||
|
|
fib_closest_minute, fib_harmonic_strength,
|
|||
|
|
cal_year, cal_month, cal_day, cal_hour, cal_minute, cal_dow, cal_woy,
|
|||
|
|
rt_Americas_hour, rt_Americas_tradfi,
|
|||
|
|
rt_EMEA_hour, rt_EMEA_tradfi,
|
|||
|
|
rt_South_Asia_hour, rt_South_Asia_tradfi,
|
|||
|
|
rt_East_Asia_hour, rt_East_Asia_tradfi,
|
|||
|
|
rt_Oceania_SEA_hour, rt_Oceania_SEA_tradfi,
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import logging
|
|||
|
|
import threading
|
|||
|
|
import time
|
|||
|
|
import hashlib
|
|||
|
|
import numpy as np
|
|||
|
|
from collections import deque
|
|||
|
|
from dataclasses import dataclass, field
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
# ── Configuration ─────────────────────────────────────────────────────────────
|
|||
|
|
DATA_DIR = Path("/mnt/ng6_data/eigenvalues")
|
|||
|
|
FLUSH_INTERVAL_S = 300 # 5 minutes — off hot path (mirrors ExF)
|
|||
|
|
MAX_FILE_AGE_DAYS = 7 # keep 7 days of history (mirrors ExF)
|
|||
|
|
HISTORY_MAXLEN = 60 # 60 × 5s = 5-min rolling window per flush block
|
|||
|
|
|
|||
|
|
# ── Indicator definition ──────────────────────────────────────────────────────
|
|||
|
|
# Ordered list of numeric indicator names stored in esof_values columns.
|
|||
|
|
# Order is FROZEN — do not reorder without a schema-version bump.
|
|||
|
|
ESOF_NAMES: Tuple[str, ...] = (
|
|||
|
|
"moon_illumination",
|
|||
|
|
"mercury_retrograde",
|
|||
|
|
"population_weighted_hour",
|
|||
|
|
"liquidity_weighted_hour",
|
|||
|
|
"market_cycle_position",
|
|||
|
|
"fib_closest_minute",
|
|||
|
|
"fib_harmonic_strength",
|
|||
|
|
"cal_year",
|
|||
|
|
"cal_month",
|
|||
|
|
"cal_day",
|
|||
|
|
"cal_hour",
|
|||
|
|
"cal_minute",
|
|||
|
|
"cal_dow",
|
|||
|
|
"cal_woy",
|
|||
|
|
"rt_Americas_hour",
|
|||
|
|
"rt_Americas_tradfi",
|
|||
|
|
"rt_EMEA_hour",
|
|||
|
|
"rt_EMEA_tradfi",
|
|||
|
|
"rt_South_Asia_hour",
|
|||
|
|
"rt_South_Asia_tradfi",
|
|||
|
|
"rt_East_Asia_hour",
|
|||
|
|
"rt_East_Asia_tradfi",
|
|||
|
|
"rt_Oceania_SEA_hour",
|
|||
|
|
"rt_Oceania_SEA_tradfi",
|
|||
|
|
)
|
|||
|
|
N_IND = len(ESOF_NAMES)
|
|||
|
|
_NAME_TO_IDX = {n: i for i, n in enumerate(ESOF_NAMES)}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _extract_row(snapshot: Dict[str, Any]) -> Tuple[np.ndarray, str, str]:
|
|||
|
|
"""
|
|||
|
|
Flatten a raw EsoF snapshot dict into:
|
|||
|
|
(values: float64 array len=N_IND, moon_phase: str, session: str)
|
|||
|
|
|
|||
|
|
Missing/non-numeric values become NaN.
|
|||
|
|
"""
|
|||
|
|
row = np.full(N_IND, np.nan, dtype=np.float64)
|
|||
|
|
cal = snapshot.get("calendar", {})
|
|||
|
|
fib = snapshot.get("fibonacci_time", {})
|
|||
|
|
rt = snapshot.get("regional_times", {})
|
|||
|
|
|
|||
|
|
# Direct scalars
|
|||
|
|
for name, val in (
|
|||
|
|
("moon_illumination", snapshot.get("moon_illumination")),
|
|||
|
|
("mercury_retrograde", snapshot.get("mercury_retrograde")),
|
|||
|
|
("population_weighted_hour",snapshot.get("population_weighted_hour")),
|
|||
|
|
("liquidity_weighted_hour", snapshot.get("liquidity_weighted_hour")),
|
|||
|
|
("market_cycle_position", snapshot.get("market_cycle_position")),
|
|||
|
|
# fibonacci
|
|||
|
|
("fib_closest_minute", fib.get("closest_fib_minute")),
|
|||
|
|
("fib_harmonic_strength", fib.get("harmonic_strength")),
|
|||
|
|
# calendar
|
|||
|
|
("cal_year", cal.get("year")),
|
|||
|
|
("cal_month", cal.get("month")),
|
|||
|
|
("cal_day", cal.get("day_of_month")),
|
|||
|
|
("cal_hour", cal.get("hour")),
|
|||
|
|
("cal_minute", cal.get("minute")),
|
|||
|
|
("cal_dow", cal.get("day_of_week")),
|
|||
|
|
("cal_woy", cal.get("week_of_year")),
|
|||
|
|
# regional Americas
|
|||
|
|
("rt_Americas_hour", rt.get("Americas", {}).get("hour")),
|
|||
|
|
("rt_Americas_tradfi", int(rt.get("Americas", {}).get("is_tradfi_open", False))),
|
|||
|
|
# EMEA
|
|||
|
|
("rt_EMEA_hour", rt.get("EMEA", {}).get("hour")),
|
|||
|
|
("rt_EMEA_tradfi", int(rt.get("EMEA", {}).get("is_tradfi_open", False))),
|
|||
|
|
# South_Asia
|
|||
|
|
("rt_South_Asia_hour", rt.get("South_Asia", {}).get("hour")),
|
|||
|
|
("rt_South_Asia_tradfi",int(rt.get("South_Asia", {}).get("is_tradfi_open", False))),
|
|||
|
|
# East_Asia
|
|||
|
|
("rt_East_Asia_hour", rt.get("East_Asia", {}).get("hour")),
|
|||
|
|
("rt_East_Asia_tradfi", int(rt.get("East_Asia", {}).get("is_tradfi_open", False))),
|
|||
|
|
# Oceania_SEA
|
|||
|
|
("rt_Oceania_SEA_hour", rt.get("Oceania_SEA", {}).get("hour")),
|
|||
|
|
("rt_Oceania_SEA_tradfi", int(rt.get("Oceania_SEA", {}).get("is_tradfi_open", False))),
|
|||
|
|
):
|
|||
|
|
if val is not None:
|
|||
|
|
try:
|
|||
|
|
row[_NAME_TO_IDX[name]] = float(val)
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
pass # stays NaN
|
|||
|
|
|
|||
|
|
moon_phase = str(snapshot.get("moon_phase_name", ""))
|
|||
|
|
session = str(snapshot.get("liquidity_session", ""))
|
|||
|
|
return row, moon_phase, session
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Stats dataclass ───────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
@dataclass
|
|||
|
|
class EsoFPersistenceStats:
|
|||
|
|
files_written: int = 0
|
|||
|
|
files_failed: int = 0
|
|||
|
|
bytes_written: int = 0
|
|||
|
|
points_written: int = 0
|
|||
|
|
last_write_time: float = 0.0
|
|||
|
|
last_write_path: Optional[Path] = None
|
|||
|
|
history: deque = field(default_factory=lambda: deque(maxlen=100))
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Service ───────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
class EsoFPersistenceService:
|
|||
|
|
"""
|
|||
|
|
Off-hot-path persistence service for Esoteric Factors.
|
|||
|
|
|
|||
|
|
Maintains a rolling deque of HISTORY_MAXLEN snapshots in memory.
|
|||
|
|
Every FLUSH_INTERVAL_S seconds the full rolling block is written as a
|
|||
|
|
single NPZ time-series to DATA_DIR/{YYYY-MM-DD}/.
|
|||
|
|
|
|||
|
|
Usage (mirrors ExFPersistenceService):
|
|||
|
|
svc = EsoFPersistenceService()
|
|||
|
|
svc.start()
|
|||
|
|
# in hot path (called from esof_prefect_flow main loop):
|
|||
|
|
svc.update_snapshot(data)
|
|||
|
|
svc.stop()
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
data_dir: Path = DATA_DIR,
|
|||
|
|
flush_interval_s: float = FLUSH_INTERVAL_S,
|
|||
|
|
history_maxlen: int = HISTORY_MAXLEN,
|
|||
|
|
):
|
|||
|
|
self.data_dir = Path(data_dir)
|
|||
|
|
self.flush_interval_s = flush_interval_s
|
|||
|
|
self._stats = EsoFPersistenceStats()
|
|||
|
|
|
|||
|
|
# Rolling deque: each entry = (timestamp_iso, unix, row_array, moon_phase, session)
|
|||
|
|
self._history: deque = deque(maxlen=history_maxlen)
|
|||
|
|
self._lock = threading.Lock()
|
|||
|
|
self._running = False
|
|||
|
|
self._thread: Optional[threading.Thread] = None
|
|||
|
|
self._stop_event = threading.Event()
|
|||
|
|
|
|||
|
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|
|||
|
|
# ── Public API (non-blocking, thread-safe) ────────────────────────────────
|
|||
|
|
|
|||
|
|
def update_snapshot(self, snapshot: Dict[str, Any]) -> None:
|
|||
|
|
"""
|
|||
|
|
Called from the hot path every 5s. Extracts and appends one row to
|
|||
|
|
the rolling deque — O(1), non-blocking.
|
|||
|
|
"""
|
|||
|
|
ts = snapshot.get("timestamp", datetime.now(timezone.utc).isoformat())
|
|||
|
|
unix = snapshot.get("unix", int(time.time()))
|
|||
|
|
row, moon_phase, session = _extract_row(snapshot)
|
|||
|
|
with self._lock:
|
|||
|
|
self._history.append((ts, unix, row, moon_phase, session))
|
|||
|
|
|
|||
|
|
def get_stats(self) -> Dict[str, Any]:
|
|||
|
|
with self._lock:
|
|||
|
|
return {
|
|||
|
|
"files_written": self._stats.files_written,
|
|||
|
|
"files_failed": self._stats.files_failed,
|
|||
|
|
"bytes_written": self._stats.bytes_written,
|
|||
|
|
"points_written": self._stats.points_written,
|
|||
|
|
"history_depth": len(self._history),
|
|||
|
|
"last_write_time": self._stats.last_write_time,
|
|||
|
|
"last_write_path": str(self._stats.last_write_path)
|
|||
|
|
if self._stats.last_write_path else None,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def force_flush(self) -> Optional[Path]:
|
|||
|
|
"""Immediate flush — for testing/debugging."""
|
|||
|
|
with self._lock:
|
|||
|
|
history_copy = list(self._history)
|
|||
|
|
if history_copy:
|
|||
|
|
return self._write_npz(history_copy)
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# ── Internal write ────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _write_npz(self, history: list) -> Optional[Path]:
|
|||
|
|
"""
|
|||
|
|
Write a rolling block of EsoF snapshots to NPZ.
|
|||
|
|
|
|||
|
|
NPZ arrays:
|
|||
|
|
esof_names (N_IND,) str — fixed indicator name list
|
|||
|
|
esof_values (N_PTS, N_IND) f64 — time-series rows
|
|||
|
|
timestamps (N_PTS,) str — ISO UTC per point
|
|||
|
|
unix_times (N_PTS,) i64 — unix epoch per point
|
|||
|
|
moon_phases (N_PTS,) str — categorical
|
|||
|
|
sessions (N_PTS,) str — categorical
|
|||
|
|
_metadata scalar str — JSON: version, flush_time, n_points
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
n = len(history)
|
|||
|
|
now = datetime.now(timezone.utc)
|
|||
|
|
date_str = now.strftime("%Y-%m-%d")
|
|||
|
|
ts_str = now.strftime("%Y-%m-%d_%H-%M-%S")
|
|||
|
|
|
|||
|
|
date_dir = self.data_dir / date_str
|
|||
|
|
date_dir.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|
|||
|
|
filename = f"esof_snapshot_{ts_str}__Indicators.npz"
|
|||
|
|
filepath = date_dir / filename
|
|||
|
|
|
|||
|
|
# Unzip history tuples
|
|||
|
|
timestamps = np.array([e[0] for e in history], dtype="U64")
|
|||
|
|
unix_times = np.array([e[1] for e in history], dtype=np.int64)
|
|||
|
|
esof_values = np.vstack([e[2] for e in history]).astype(np.float64) # (N, N_IND)
|
|||
|
|
moon_phases = np.array([e[3] for e in history], dtype="U32")
|
|||
|
|
sessions = np.array([e[4] for e in history], dtype="U32")
|
|||
|
|
|
|||
|
|
metadata = json.dumps({
|
|||
|
|
"_version": "1.0",
|
|||
|
|
"_service": "EsoFPersistence",
|
|||
|
|
"_flush_time": now.isoformat(),
|
|||
|
|
"_n_points": n,
|
|||
|
|
"_n_indicators": N_IND,
|
|||
|
|
"_history_maxlen": self._history.maxlen,
|
|||
|
|
"_flush_interval_s": self.flush_interval_s,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
np.savez_compressed(
|
|||
|
|
filepath,
|
|||
|
|
esof_names = np.array(ESOF_NAMES, dtype="U32"),
|
|||
|
|
esof_values = esof_values,
|
|||
|
|
timestamps = timestamps,
|
|||
|
|
unix_times = unix_times,
|
|||
|
|
moon_phases = moon_phases,
|
|||
|
|
sessions = sessions,
|
|||
|
|
_metadata = np.array(metadata),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Checksum (mirrors ExF pattern)
|
|||
|
|
file_bytes = filepath.read_bytes()
|
|||
|
|
checksum = hashlib.md5(file_bytes).hexdigest()
|
|||
|
|
(filepath.parent / (filename + ".md5")).write_text(
|
|||
|
|
f"{checksum} {filename}\n"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
size = len(file_bytes)
|
|||
|
|
logger.info(
|
|||
|
|
"EsoF persisted: %s (%d points × %d indicators, %d bytes)",
|
|||
|
|
filename, n, N_IND, size,
|
|||
|
|
)
|
|||
|
|
return filepath
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
logger.error("EsoF NPZ write failed: %s", exc)
|
|||
|
|
self._stats.files_failed += 1
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def _cleanup_old_files(self) -> int:
|
|||
|
|
"""Remove date directories older than MAX_FILE_AGE_DAYS."""
|
|||
|
|
removed = 0
|
|||
|
|
cutoff = time.time() - (MAX_FILE_AGE_DAYS * 86400)
|
|||
|
|
try:
|
|||
|
|
for date_dir in self.data_dir.iterdir():
|
|||
|
|
if not date_dir.is_dir():
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
if date_dir.stat().st_mtime < cutoff:
|
|||
|
|
import shutil
|
|||
|
|
shutil.rmtree(date_dir)
|
|||
|
|
removed += 1
|
|||
|
|
logger.info("EsoF cleanup: removed %s", date_dir.name)
|
|||
|
|
except Exception as exc:
|
|||
|
|
logger.warning("EsoF cleanup error for %s: %s", date_dir, exc)
|
|||
|
|
except Exception as exc:
|
|||
|
|
logger.warning("EsoF cleanup scan error: %s", exc)
|
|||
|
|
return removed
|
|||
|
|
|
|||
|
|
# ── Background flush loop ─────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _flush_loop(self) -> None:
|
|||
|
|
logger.info(
|
|||
|
|
"EsoFPersistenceService flush loop started "
|
|||
|
|
"(interval=%ds, history_maxlen=%d)",
|
|||
|
|
self.flush_interval_s, self._history.maxlen,
|
|||
|
|
)
|
|||
|
|
while not self._stop_event.is_set():
|
|||
|
|
try:
|
|||
|
|
with self._lock:
|
|||
|
|
history_copy = list(self._history)
|
|||
|
|
|
|||
|
|
if history_copy:
|
|||
|
|
filepath = self._write_npz(history_copy)
|
|||
|
|
if filepath:
|
|||
|
|
size = filepath.stat().st_size
|
|||
|
|
self._stats.files_written += 1
|
|||
|
|
self._stats.bytes_written += size
|
|||
|
|
self._stats.points_written += len(history_copy)
|
|||
|
|
self._stats.last_write_time = time.time()
|
|||
|
|
self._stats.last_write_path = filepath
|
|||
|
|
self._stats.history.append({
|
|||
|
|
"flush_time": datetime.now(timezone.utc).isoformat(),
|
|||
|
|
"path": str(filepath),
|
|||
|
|
"n_points": len(history_copy),
|
|||
|
|
"size": size,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Periodic cleanup every ~20 flushes (~100 min)
|
|||
|
|
if self._stats.files_written > 0 and self._stats.files_written % 20 == 0:
|
|||
|
|
self._cleanup_old_files()
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
logger.error("EsoF flush loop error: %s", exc)
|
|||
|
|
self._stats.files_failed += 1
|
|||
|
|
|
|||
|
|
self._stop_event.wait(timeout=self.flush_interval_s)
|
|||
|
|
|
|||
|
|
logger.info("EsoFPersistenceService flush loop stopped")
|
|||
|
|
|
|||
|
|
# ── Lifecycle ─────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def start(self) -> None:
|
|||
|
|
if self._running:
|
|||
|
|
return
|
|||
|
|
self._running = True
|
|||
|
|
self._stop_event.clear()
|
|||
|
|
self._thread = threading.Thread(target=self._flush_loop, daemon=True)
|
|||
|
|
self._thread.start()
|
|||
|
|
logger.info("EsoFPersistenceService started (data_dir=%s)", self.data_dir)
|
|||
|
|
|
|||
|
|
def stop(self) -> None:
|
|||
|
|
if not self._running:
|
|||
|
|
return
|
|||
|
|
self._running = False
|
|||
|
|
self._stop_event.set()
|
|||
|
|
if self._thread:
|
|||
|
|
self._thread.join(timeout=10)
|
|||
|
|
logger.info("EsoFPersistenceService stopped")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Standalone test ───────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
import sys, tempfile
|
|||
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|||
|
|
|
|||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "external_factors"))
|
|||
|
|
from esoteric_factors_service import MarketIndicators
|
|||
|
|
|
|||
|
|
mi = MarketIndicators()
|
|||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|||
|
|
svc = EsoFPersistenceService(data_dir=Path(tmpdir), flush_interval_s=5)
|
|||
|
|
svc.start()
|
|||
|
|
|
|||
|
|
print("Feeding 12 synthetic snapshots …")
|
|||
|
|
for i in range(12):
|
|||
|
|
snap = mi.get_indicators()
|
|||
|
|
svc.update_snapshot(snap)
|
|||
|
|
time.sleep(0.1)
|
|||
|
|
|
|||
|
|
print("Forcing flush …")
|
|||
|
|
path = svc.force_flush()
|
|||
|
|
print(f"Written: {path}")
|
|||
|
|
|
|||
|
|
if path:
|
|||
|
|
d = np.load(path, allow_pickle=True)
|
|||
|
|
print(f"Keys: {list(d.keys())}")
|
|||
|
|
print(f"esof_names: {d['esof_names']}")
|
|||
|
|
print(f"esof_values shape: {d['esof_values'].shape}")
|
|||
|
|
print(f"timestamps[:3]: {d['timestamps'][:3]}")
|
|||
|
|
print(f"sessions[:3]: {d['sessions'][:3]}")
|
|||
|
|
print(f"moon_phases[:3]: {d['moon_phases'][:3]}")
|
|||
|
|
print(f"metadata: {d['_metadata']}")
|
|||
|
|
|
|||
|
|
svc.stop()
|
|||
|
|
print("Done.")
|