initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
166
prod/scan_bridge_service.py
Executable file
166
prod/scan_bridge_service.py
Executable file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DOLPHIN Scan Bridge Service (Linux)
|
||||
====================================
|
||||
Watches Arrow scan files and pushes to Hazelcast.
|
||||
Handles DolphinNG6 restart/scan_number resets by using file timestamps.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
import pyarrow as pa
|
||||
import pyarrow.ipc as ipc
|
||||
import numpy as np
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("ScanBridge")
|
||||
|
||||
sys.path.insert(0, '/mnt/dolphinng5_predict')
|
||||
sys.path.insert(0, '/mnt/dolphinng5_predict/nautilus_dolphin')
|
||||
|
||||
import hazelcast
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
|
||||
sys.path.insert(0, '/mnt/dolphinng5_predict/prod')
|
||||
from dolphin_exit_handler import install_exit_handler
|
||||
install_exit_handler("scan_bridge")
|
||||
|
||||
|
||||
class NumpyEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
if isinstance(obj, np.floating):
|
||||
return float(obj)
|
||||
if isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
return super().default(obj)
|
||||
|
||||
|
||||
class ArrowScanReader:
|
||||
def load_scan(self, filepath):
|
||||
with pa.memory_map(str(filepath), 'r') as source:
|
||||
table = ipc.open_file(source).read_all()
|
||||
|
||||
result = {}
|
||||
for col in table.column_names:
|
||||
val = table.column(col)[0].as_py()
|
||||
# Parse JSON columns (assets_json, asset_prices_json, etc.)
|
||||
if col.endswith('_json') and val:
|
||||
result[col.replace('_json', '')] = json.loads(val)
|
||||
else:
|
||||
result[col] = val
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class ScanHandler(FileSystemEventHandler):
|
||||
def __init__(self, reader, hz_map):
|
||||
self.reader = reader
|
||||
self.hz_map = hz_map
|
||||
self.last_mtime = 0
|
||||
self.scans_pushed = 0
|
||||
|
||||
def on_created(self, event):
|
||||
if event.is_directory or not event.src_path.endswith('.arrow'):
|
||||
return
|
||||
self._process(event.src_path)
|
||||
|
||||
def _process(self, filepath):
|
||||
try:
|
||||
time.sleep(0.02)
|
||||
mtime = Path(filepath).stat().st_mtime
|
||||
if mtime <= self.last_mtime:
|
||||
return
|
||||
|
||||
scan = self.reader.load_scan(filepath)
|
||||
scan['bridge_ts'] = datetime.now(timezone.utc).isoformat()
|
||||
scan['file_mtime'] = mtime
|
||||
|
||||
self.hz_map.put("latest_eigen_scan", json.dumps(scan, cls=NumpyEncoder))
|
||||
self.last_mtime = mtime
|
||||
self.scans_pushed += 1
|
||||
|
||||
if self.scans_pushed % 100 == 0:
|
||||
logger.info(f"Pushed {self.scans_pushed} | #{scan.get('scan_number')} | "
|
||||
f"{len(scan.get('assets', []))} assets | {len(scan.get('asset_prices', []))} prices")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
|
||||
|
||||
def get_latest_file(arrow_dir):
|
||||
latest_file = None
|
||||
latest_mtime = 0
|
||||
|
||||
try:
|
||||
with os.scandir(arrow_dir) as it:
|
||||
for entry in it:
|
||||
if entry.name.endswith('.arrow') and entry.is_file():
|
||||
mtime = entry.stat().st_mtime
|
||||
if mtime > latest_mtime:
|
||||
latest_mtime = mtime
|
||||
latest_file = Path(entry.path)
|
||||
except FileNotFoundError:
|
||||
return None, 0
|
||||
|
||||
return latest_file, latest_mtime
|
||||
|
||||
|
||||
ARROW_BASE = Path('/mnt/dolphinng6_data/arrow_scans')
|
||||
|
||||
|
||||
def main():
|
||||
hz = hazelcast.HazelcastClient(cluster_name="dolphin", cluster_members=["127.0.0.1:5701"])
|
||||
hz_map = hz.get_map("DOLPHIN_FEATURES").blocking()
|
||||
logger.info("Connected to Hazelcast")
|
||||
|
||||
reader = ArrowScanReader()
|
||||
handler = ScanHandler(reader, hz_map)
|
||||
observer = None
|
||||
current_date = None
|
||||
|
||||
try:
|
||||
while True:
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
arrow_dir = ARROW_BASE / today
|
||||
|
||||
# Day rolled over or first start — rewire observer
|
||||
if today != current_date:
|
||||
if observer is not None:
|
||||
observer.stop()
|
||||
observer.join()
|
||||
arrow_dir.mkdir(parents=True, exist_ok=True)
|
||||
observer = Observer()
|
||||
observer.schedule(handler, str(arrow_dir), recursive=False)
|
||||
observer.start()
|
||||
current_date = today
|
||||
logger.info(f"Watching: {arrow_dir}")
|
||||
|
||||
# Catch up on any files already present
|
||||
latest_file, _ = get_latest_file(arrow_dir)
|
||||
if latest_file:
|
||||
handler._process(str(latest_file))
|
||||
logger.info(f"Caught up to scan #{handler.scans_pushed} pushed so far")
|
||||
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
if observer is not None:
|
||||
observer.stop()
|
||||
observer.join()
|
||||
hz.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user