167 lines
5.0 KiB
Python
167 lines
5.0 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
DOLPHIN Scan Bridge Service (Linux)
|
||
|
|
====================================
|
||
|
|
Watches Arrow scan files and pushes to Hazelcast.
|
||
|
|
Handles DolphinNG6 restart/scan_number resets by using file timestamps.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
import json
|
||
|
|
import logging
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
import pyarrow as pa
|
||
|
|
import pyarrow.ipc as ipc
|
||
|
|
import numpy as np
|
||
|
|
|
||
|
|
logging.basicConfig(
|
||
|
|
level=logging.INFO,
|
||
|
|
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
||
|
|
)
|
||
|
|
logger = logging.getLogger("ScanBridge")
|
||
|
|
|
||
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict')
|
||
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict/nautilus_dolphin')
|
||
|
|
|
||
|
|
import hazelcast
|
||
|
|
from watchdog.observers import Observer
|
||
|
|
from watchdog.events import FileSystemEventHandler
|
||
|
|
|
||
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict/prod')
|
||
|
|
from dolphin_exit_handler import install_exit_handler
|
||
|
|
install_exit_handler("scan_bridge")
|
||
|
|
|
||
|
|
|
||
|
|
class NumpyEncoder(json.JSONEncoder):
|
||
|
|
def default(self, obj):
|
||
|
|
if isinstance(obj, np.integer):
|
||
|
|
return int(obj)
|
||
|
|
if isinstance(obj, np.floating):
|
||
|
|
return float(obj)
|
||
|
|
if isinstance(obj, np.ndarray):
|
||
|
|
return obj.tolist()
|
||
|
|
return super().default(obj)
|
||
|
|
|
||
|
|
|
||
|
|
class ArrowScanReader:
|
||
|
|
def load_scan(self, filepath):
|
||
|
|
with pa.memory_map(str(filepath), 'r') as source:
|
||
|
|
table = ipc.open_file(source).read_all()
|
||
|
|
|
||
|
|
result = {}
|
||
|
|
for col in table.column_names:
|
||
|
|
val = table.column(col)[0].as_py()
|
||
|
|
# Parse JSON columns (assets_json, asset_prices_json, etc.)
|
||
|
|
if col.endswith('_json') and val:
|
||
|
|
result[col.replace('_json', '')] = json.loads(val)
|
||
|
|
else:
|
||
|
|
result[col] = val
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
class ScanHandler(FileSystemEventHandler):
|
||
|
|
def __init__(self, reader, hz_map):
|
||
|
|
self.reader = reader
|
||
|
|
self.hz_map = hz_map
|
||
|
|
self.last_mtime = 0
|
||
|
|
self.scans_pushed = 0
|
||
|
|
|
||
|
|
def on_created(self, event):
|
||
|
|
if event.is_directory or not event.src_path.endswith('.arrow'):
|
||
|
|
return
|
||
|
|
self._process(event.src_path)
|
||
|
|
|
||
|
|
def _process(self, filepath):
|
||
|
|
try:
|
||
|
|
time.sleep(0.02)
|
||
|
|
mtime = Path(filepath).stat().st_mtime
|
||
|
|
if mtime <= self.last_mtime:
|
||
|
|
return
|
||
|
|
|
||
|
|
scan = self.reader.load_scan(filepath)
|
||
|
|
scan['bridge_ts'] = datetime.now(timezone.utc).isoformat()
|
||
|
|
scan['file_mtime'] = mtime
|
||
|
|
|
||
|
|
self.hz_map.put("latest_eigen_scan", json.dumps(scan, cls=NumpyEncoder))
|
||
|
|
self.last_mtime = mtime
|
||
|
|
self.scans_pushed += 1
|
||
|
|
|
||
|
|
if self.scans_pushed % 100 == 0:
|
||
|
|
logger.info(f"Pushed {self.scans_pushed} | #{scan.get('scan_number')} | "
|
||
|
|
f"{len(scan.get('assets', []))} assets | {len(scan.get('asset_prices', []))} prices")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def get_latest_file(arrow_dir):
|
||
|
|
latest_file = None
|
||
|
|
latest_mtime = 0
|
||
|
|
|
||
|
|
try:
|
||
|
|
with os.scandir(arrow_dir) as it:
|
||
|
|
for entry in it:
|
||
|
|
if entry.name.endswith('.arrow') and entry.is_file():
|
||
|
|
mtime = entry.stat().st_mtime
|
||
|
|
if mtime > latest_mtime:
|
||
|
|
latest_mtime = mtime
|
||
|
|
latest_file = Path(entry.path)
|
||
|
|
except FileNotFoundError:
|
||
|
|
return None, 0
|
||
|
|
|
||
|
|
return latest_file, latest_mtime
|
||
|
|
|
||
|
|
|
||
|
|
ARROW_BASE = Path('/mnt/dolphinng6_data/arrow_scans')
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
hz = hazelcast.HazelcastClient(cluster_name="dolphin", cluster_members=["127.0.0.1:5701"])
|
||
|
|
hz_map = hz.get_map("DOLPHIN_FEATURES").blocking()
|
||
|
|
logger.info("Connected to Hazelcast")
|
||
|
|
|
||
|
|
reader = ArrowScanReader()
|
||
|
|
handler = ScanHandler(reader, hz_map)
|
||
|
|
observer = None
|
||
|
|
current_date = None
|
||
|
|
|
||
|
|
try:
|
||
|
|
while True:
|
||
|
|
today = datetime.now().strftime('%Y-%m-%d')
|
||
|
|
arrow_dir = ARROW_BASE / today
|
||
|
|
|
||
|
|
# Day rolled over or first start — rewire observer
|
||
|
|
if today != current_date:
|
||
|
|
if observer is not None:
|
||
|
|
observer.stop()
|
||
|
|
observer.join()
|
||
|
|
arrow_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
observer = Observer()
|
||
|
|
observer.schedule(handler, str(arrow_dir), recursive=False)
|
||
|
|
observer.start()
|
||
|
|
current_date = today
|
||
|
|
logger.info(f"Watching: {arrow_dir}")
|
||
|
|
|
||
|
|
# Catch up on any files already present
|
||
|
|
latest_file, _ = get_latest_file(arrow_dir)
|
||
|
|
if latest_file:
|
||
|
|
handler._process(str(latest_file))
|
||
|
|
logger.info(f"Caught up to scan #{handler.scans_pushed} pushed so far")
|
||
|
|
|
||
|
|
time.sleep(1)
|
||
|
|
except KeyboardInterrupt:
|
||
|
|
pass
|
||
|
|
finally:
|
||
|
|
if observer is not None:
|
||
|
|
observer.stop()
|
||
|
|
observer.join()
|
||
|
|
hz.shutdown()
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|