Files
DOLPHIN/prod/prefect_services/watchdog_service_prefect.py

57 lines
1.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""System Watchdog Service - Prefect Managed"""
import subprocess
import sys
import time
from prefect import flow, task, get_run_logger
SERVICE_SCRIPT = "/mnt/dolphinng5_predict/prod/system_watchdog_service.py"
@task
def check_safety() -> bool:
"""Check if Safety data is in Hz."""
try:
import hazelcast
client = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["127.0.0.1:5701"],
)
safety = client.get_map('DOLPHIN_SAFETY').blocking()
data = safety.get('latest')
client.shutdown()
return data is not None
except:
return False
@flow(name="watchdog-service")
def watchdog_service_flow():
"""Manage System Watchdog Service."""
logger = get_run_logger()
logger.info("Starting Watchdog Service...")
proc = subprocess.Popen(
[sys.executable, SERVICE_SCRIPT],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True
)
logger.info(f"Watchdog started (PID: {proc.pid})")
try:
while True:
time.sleep(60)
if proc.poll() is not None:
logger.error("Watchdog died, restarting...")
proc = subprocess.Popen([sys.executable, SERVICE_SCRIPT])
else:
healthy = check_safety()
logger.info(f"Safety data: {'' if healthy else ''}")
except KeyboardInterrupt:
logger.info("Stopping Watchdog...")
proc.terminate()
proc.wait()
if __name__ == "__main__":
watchdog_service_flow()