Files
DOLPHIN/prod/prefect_services/watchdog_service_prefect.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

57 lines
1.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""System Watchdog Service - Prefect Managed"""
import subprocess
import sys
import time
from prefect import flow, task, get_run_logger
SERVICE_SCRIPT = "/mnt/dolphinng5_predict/prod/system_watchdog_service.py"
@task
def check_safety() -> bool:
"""Check if Safety data is in Hz."""
try:
import hazelcast
client = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["127.0.0.1:5701"],
)
safety = client.get_map('DOLPHIN_SAFETY').blocking()
data = safety.get('latest')
client.shutdown()
return data is not None
except:
return False
@flow(name="watchdog-service")
def watchdog_service_flow():
"""Manage System Watchdog Service."""
logger = get_run_logger()
logger.info("Starting Watchdog Service...")
proc = subprocess.Popen(
[sys.executable, SERVICE_SCRIPT],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True
)
logger.info(f"Watchdog started (PID: {proc.pid})")
try:
while True:
time.sleep(60)
if proc.poll() is not None:
logger.error("Watchdog died, restarting...")
proc = subprocess.Popen([sys.executable, SERVICE_SCRIPT])
else:
healthy = check_safety()
logger.info(f"Safety data: {'' if healthy else ''}")
except KeyboardInterrupt:
logger.info("Stopping Watchdog...")
proc.terminate()
proc.wait()
if __name__ == "__main__":
watchdog_service_flow()