#!/usr/bin/env python3 """System Watchdog Service - Prefect Managed""" import subprocess import sys import time from prefect import flow, task, get_run_logger SERVICE_SCRIPT = "/mnt/dolphinng5_predict/prod/system_watchdog_service.py" @task def check_safety() -> bool: """Check if Safety data is in Hz.""" try: import hazelcast client = hazelcast.HazelcastClient( cluster_name="dolphin", cluster_members=["127.0.0.1:5701"], ) safety = client.get_map('DOLPHIN_SAFETY').blocking() data = safety.get('latest') client.shutdown() return data is not None except: return False @flow(name="watchdog-service") def watchdog_service_flow(): """Manage System Watchdog Service.""" logger = get_run_logger() logger.info("Starting Watchdog Service...") proc = subprocess.Popen( [sys.executable, SERVICE_SCRIPT], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True ) logger.info(f"Watchdog started (PID: {proc.pid})") try: while True: time.sleep(60) if proc.poll() is not None: logger.error("Watchdog died, restarting...") proc = subprocess.Popen([sys.executable, SERVICE_SCRIPT]) else: healthy = check_safety() logger.info(f"Safety data: {'✅' if healthy else '⏳'}") except KeyboardInterrupt: logger.info("Stopping Watchdog...") proc.terminate() proc.wait() if __name__ == "__main__": watchdog_service_flow()