#!/usr/bin/env python3 """ ESOF PERSISTENCE SERVICE v1.0 ============================== Off-hot-path persistence for Esoteric Factors snapshots. Design mirrors ExFPersistenceService (prod/exf_persistence.py) for symmetry: - Background thread, non-blocking to hot path - Rolling deque of HISTORY_MAXLEN snapshots (default 60 × 5s = 5-min window) - Flushes the full rolling block to NPZ every FLUSH_INTERVAL_S seconds - Writing *N rolling prior data points* (not a single overwritten flat file) - Same directory as production scan files: /mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/ - Same __Indicators.npz naming convention as scan and extf files DOLPHIN format: /mnt/ng6_data/eigenvalues/{YYYY-MM-DD}/esof_snapshot_{YYYY-MM-DD_HH-MM-SS}__Indicators.npz NPZ schema (mirrors api_names / api_indicators / api_success pattern): esof_names : (N_IND,) U32 — indicator names (fixed, see ESOF_NAMES) esof_values : (N_PTS, N_IND) float64 — rolling time-series rows timestamps : (N_PTS,) U64 — ISO UTC per row unix_times : (N_PTS,) int64 — unix epoch per row moon_phases : (N_PTS,) U32 — categorical: moon phase name per row sessions : (N_PTS,) U32 — categorical: liquidity session per row _metadata : scalar str — JSON: version, service, flush_time, n_points Numeric indicators (24 total): moon_illumination, mercury_retrograde, population_weighted_hour, liquidity_weighted_hour, market_cycle_position, fib_closest_minute, fib_harmonic_strength, cal_year, cal_month, cal_day, cal_hour, cal_minute, cal_dow, cal_woy, rt_Americas_hour, rt_Americas_tradfi, rt_EMEA_hour, rt_EMEA_tradfi, rt_South_Asia_hour, rt_South_Asia_tradfi, rt_East_Asia_hour, rt_East_Asia_tradfi, rt_Oceania_SEA_hour, rt_Oceania_SEA_tradfi, """ import json import logging import threading import time import hashlib import numpy as np from collections import deque from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Tuple logger = logging.getLogger(__name__) # ── Configuration ───────────────────────────────────────────────────────────── DATA_DIR = Path("/mnt/ng6_data/eigenvalues") FLUSH_INTERVAL_S = 300 # 5 minutes — off hot path (mirrors ExF) MAX_FILE_AGE_DAYS = 7 # keep 7 days of history (mirrors ExF) HISTORY_MAXLEN = 60 # 60 × 5s = 5-min rolling window per flush block # ── Indicator definition ────────────────────────────────────────────────────── # Ordered list of numeric indicator names stored in esof_values columns. # Order is FROZEN — do not reorder without a schema-version bump. ESOF_NAMES: Tuple[str, ...] = ( "moon_illumination", "mercury_retrograde", "population_weighted_hour", "liquidity_weighted_hour", "market_cycle_position", "fib_closest_minute", "fib_harmonic_strength", "cal_year", "cal_month", "cal_day", "cal_hour", "cal_minute", "cal_dow", "cal_woy", "rt_Americas_hour", "rt_Americas_tradfi", "rt_EMEA_hour", "rt_EMEA_tradfi", "rt_South_Asia_hour", "rt_South_Asia_tradfi", "rt_East_Asia_hour", "rt_East_Asia_tradfi", "rt_Oceania_SEA_hour", "rt_Oceania_SEA_tradfi", ) N_IND = len(ESOF_NAMES) _NAME_TO_IDX = {n: i for i, n in enumerate(ESOF_NAMES)} def _extract_row(snapshot: Dict[str, Any]) -> Tuple[np.ndarray, str, str]: """ Flatten a raw EsoF snapshot dict into: (values: float64 array len=N_IND, moon_phase: str, session: str) Missing/non-numeric values become NaN. """ row = np.full(N_IND, np.nan, dtype=np.float64) cal = snapshot.get("calendar", {}) fib = snapshot.get("fibonacci_time", {}) rt = snapshot.get("regional_times", {}) # Direct scalars for name, val in ( ("moon_illumination", snapshot.get("moon_illumination")), ("mercury_retrograde", snapshot.get("mercury_retrograde")), ("population_weighted_hour",snapshot.get("population_weighted_hour")), ("liquidity_weighted_hour", snapshot.get("liquidity_weighted_hour")), ("market_cycle_position", snapshot.get("market_cycle_position")), # fibonacci ("fib_closest_minute", fib.get("closest_fib_minute")), ("fib_harmonic_strength", fib.get("harmonic_strength")), # calendar ("cal_year", cal.get("year")), ("cal_month", cal.get("month")), ("cal_day", cal.get("day_of_month")), ("cal_hour", cal.get("hour")), ("cal_minute", cal.get("minute")), ("cal_dow", cal.get("day_of_week")), ("cal_woy", cal.get("week_of_year")), # regional Americas ("rt_Americas_hour", rt.get("Americas", {}).get("hour")), ("rt_Americas_tradfi", int(rt.get("Americas", {}).get("is_tradfi_open", False))), # EMEA ("rt_EMEA_hour", rt.get("EMEA", {}).get("hour")), ("rt_EMEA_tradfi", int(rt.get("EMEA", {}).get("is_tradfi_open", False))), # South_Asia ("rt_South_Asia_hour", rt.get("South_Asia", {}).get("hour")), ("rt_South_Asia_tradfi",int(rt.get("South_Asia", {}).get("is_tradfi_open", False))), # East_Asia ("rt_East_Asia_hour", rt.get("East_Asia", {}).get("hour")), ("rt_East_Asia_tradfi", int(rt.get("East_Asia", {}).get("is_tradfi_open", False))), # Oceania_SEA ("rt_Oceania_SEA_hour", rt.get("Oceania_SEA", {}).get("hour")), ("rt_Oceania_SEA_tradfi", int(rt.get("Oceania_SEA", {}).get("is_tradfi_open", False))), ): if val is not None: try: row[_NAME_TO_IDX[name]] = float(val) except (TypeError, ValueError): pass # stays NaN moon_phase = str(snapshot.get("moon_phase_name", "")) session = str(snapshot.get("liquidity_session", "")) return row, moon_phase, session # ── Stats dataclass ─────────────────────────────────────────────────────────── @dataclass class EsoFPersistenceStats: files_written: int = 0 files_failed: int = 0 bytes_written: int = 0 points_written: int = 0 last_write_time: float = 0.0 last_write_path: Optional[Path] = None history: deque = field(default_factory=lambda: deque(maxlen=100)) # ── Service ─────────────────────────────────────────────────────────────────── class EsoFPersistenceService: """ Off-hot-path persistence service for Esoteric Factors. Maintains a rolling deque of HISTORY_MAXLEN snapshots in memory. Every FLUSH_INTERVAL_S seconds the full rolling block is written as a single NPZ time-series to DATA_DIR/{YYYY-MM-DD}/. Usage (mirrors ExFPersistenceService): svc = EsoFPersistenceService() svc.start() # in hot path (called from esof_prefect_flow main loop): svc.update_snapshot(data) svc.stop() """ def __init__( self, data_dir: Path = DATA_DIR, flush_interval_s: float = FLUSH_INTERVAL_S, history_maxlen: int = HISTORY_MAXLEN, ): self.data_dir = Path(data_dir) self.flush_interval_s = flush_interval_s self._stats = EsoFPersistenceStats() # Rolling deque: each entry = (timestamp_iso, unix, row_array, moon_phase, session) self._history: deque = deque(maxlen=history_maxlen) self._lock = threading.Lock() self._running = False self._thread: Optional[threading.Thread] = None self._stop_event = threading.Event() self.data_dir.mkdir(parents=True, exist_ok=True) # ── Public API (non-blocking, thread-safe) ──────────────────────────────── def update_snapshot(self, snapshot: Dict[str, Any]) -> None: """ Called from the hot path every 5s. Extracts and appends one row to the rolling deque — O(1), non-blocking. """ ts = snapshot.get("timestamp", datetime.now(timezone.utc).isoformat()) unix = snapshot.get("unix", int(time.time())) row, moon_phase, session = _extract_row(snapshot) with self._lock: self._history.append((ts, unix, row, moon_phase, session)) def get_stats(self) -> Dict[str, Any]: with self._lock: return { "files_written": self._stats.files_written, "files_failed": self._stats.files_failed, "bytes_written": self._stats.bytes_written, "points_written": self._stats.points_written, "history_depth": len(self._history), "last_write_time": self._stats.last_write_time, "last_write_path": str(self._stats.last_write_path) if self._stats.last_write_path else None, } def force_flush(self) -> Optional[Path]: """Immediate flush — for testing/debugging.""" with self._lock: history_copy = list(self._history) if history_copy: return self._write_npz(history_copy) return None # ── Internal write ──────────────────────────────────────────────────────── def _write_npz(self, history: list) -> Optional[Path]: """ Write a rolling block of EsoF snapshots to NPZ. NPZ arrays: esof_names (N_IND,) str — fixed indicator name list esof_values (N_PTS, N_IND) f64 — time-series rows timestamps (N_PTS,) str — ISO UTC per point unix_times (N_PTS,) i64 — unix epoch per point moon_phases (N_PTS,) str — categorical sessions (N_PTS,) str — categorical _metadata scalar str — JSON: version, flush_time, n_points """ try: n = len(history) now = datetime.now(timezone.utc) date_str = now.strftime("%Y-%m-%d") ts_str = now.strftime("%Y-%m-%d_%H-%M-%S") date_dir = self.data_dir / date_str date_dir.mkdir(parents=True, exist_ok=True) filename = f"esof_snapshot_{ts_str}__Indicators.npz" filepath = date_dir / filename # Unzip history tuples timestamps = np.array([e[0] for e in history], dtype="U64") unix_times = np.array([e[1] for e in history], dtype=np.int64) esof_values = np.vstack([e[2] for e in history]).astype(np.float64) # (N, N_IND) moon_phases = np.array([e[3] for e in history], dtype="U32") sessions = np.array([e[4] for e in history], dtype="U32") metadata = json.dumps({ "_version": "1.0", "_service": "EsoFPersistence", "_flush_time": now.isoformat(), "_n_points": n, "_n_indicators": N_IND, "_history_maxlen": self._history.maxlen, "_flush_interval_s": self.flush_interval_s, }) np.savez_compressed( filepath, esof_names = np.array(ESOF_NAMES, dtype="U32"), esof_values = esof_values, timestamps = timestamps, unix_times = unix_times, moon_phases = moon_phases, sessions = sessions, _metadata = np.array(metadata), ) # Checksum (mirrors ExF pattern) file_bytes = filepath.read_bytes() checksum = hashlib.md5(file_bytes).hexdigest() (filepath.parent / (filename + ".md5")).write_text( f"{checksum} {filename}\n" ) size = len(file_bytes) logger.info( "EsoF persisted: %s (%d points × %d indicators, %d bytes)", filename, n, N_IND, size, ) return filepath except Exception as exc: logger.error("EsoF NPZ write failed: %s", exc) self._stats.files_failed += 1 return None def _cleanup_old_files(self) -> int: """Remove date directories older than MAX_FILE_AGE_DAYS.""" removed = 0 cutoff = time.time() - (MAX_FILE_AGE_DAYS * 86400) try: for date_dir in self.data_dir.iterdir(): if not date_dir.is_dir(): continue try: if date_dir.stat().st_mtime < cutoff: import shutil shutil.rmtree(date_dir) removed += 1 logger.info("EsoF cleanup: removed %s", date_dir.name) except Exception as exc: logger.warning("EsoF cleanup error for %s: %s", date_dir, exc) except Exception as exc: logger.warning("EsoF cleanup scan error: %s", exc) return removed # ── Background flush loop ───────────────────────────────────────────────── def _flush_loop(self) -> None: logger.info( "EsoFPersistenceService flush loop started " "(interval=%ds, history_maxlen=%d)", self.flush_interval_s, self._history.maxlen, ) while not self._stop_event.is_set(): try: with self._lock: history_copy = list(self._history) if history_copy: filepath = self._write_npz(history_copy) if filepath: size = filepath.stat().st_size self._stats.files_written += 1 self._stats.bytes_written += size self._stats.points_written += len(history_copy) self._stats.last_write_time = time.time() self._stats.last_write_path = filepath self._stats.history.append({ "flush_time": datetime.now(timezone.utc).isoformat(), "path": str(filepath), "n_points": len(history_copy), "size": size, }) # Periodic cleanup every ~20 flushes (~100 min) if self._stats.files_written > 0 and self._stats.files_written % 20 == 0: self._cleanup_old_files() except Exception as exc: logger.error("EsoF flush loop error: %s", exc) self._stats.files_failed += 1 self._stop_event.wait(timeout=self.flush_interval_s) logger.info("EsoFPersistenceService flush loop stopped") # ── Lifecycle ───────────────────────────────────────────────────────────── def start(self) -> None: if self._running: return self._running = True self._stop_event.clear() self._thread = threading.Thread(target=self._flush_loop, daemon=True) self._thread.start() logger.info("EsoFPersistenceService started (data_dir=%s)", self.data_dir) def stop(self) -> None: if not self._running: return self._running = False self._stop_event.set() if self._thread: self._thread.join(timeout=10) logger.info("EsoFPersistenceService stopped") # ── Standalone test ─────────────────────────────────────────────────────────── if __name__ == "__main__": import sys, tempfile logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") sys.path.insert(0, str(Path(__file__).parent.parent / "external_factors")) from esoteric_factors_service import MarketIndicators mi = MarketIndicators() with tempfile.TemporaryDirectory() as tmpdir: svc = EsoFPersistenceService(data_dir=Path(tmpdir), flush_interval_s=5) svc.start() print("Feeding 12 synthetic snapshots …") for i in range(12): snap = mi.get_indicators() svc.update_snapshot(snap) time.sleep(0.1) print("Forcing flush …") path = svc.force_flush() print(f"Written: {path}") if path: d = np.load(path, allow_pickle=True) print(f"Keys: {list(d.keys())}") print(f"esof_names: {d['esof_names']}") print(f"esof_values shape: {d['esof_values'].shape}") print(f"timestamps[:3]: {d['timestamps'][:3]}") print(f"sessions[:3]: {d['sessions'][:3]}") print(f"moon_phases[:3]: {d['moon_phases'][:3]}") print(f"metadata: {d['_metadata']}") svc.stop() print("Done.")