57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""System Watchdog Service - Prefect Managed"""
|
||
|
|
import subprocess
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
from prefect import flow, task, get_run_logger
|
||
|
|
|
||
|
|
SERVICE_SCRIPT = "/mnt/dolphinng5_predict/prod/system_watchdog_service.py"
|
||
|
|
|
||
|
|
@task
|
||
|
|
def check_safety() -> bool:
|
||
|
|
"""Check if Safety data is in Hz."""
|
||
|
|
try:
|
||
|
|
import hazelcast
|
||
|
|
client = hazelcast.HazelcastClient(
|
||
|
|
cluster_name="dolphin",
|
||
|
|
cluster_members=["127.0.0.1:5701"],
|
||
|
|
)
|
||
|
|
safety = client.get_map('DOLPHIN_SAFETY').blocking()
|
||
|
|
data = safety.get('latest')
|
||
|
|
client.shutdown()
|
||
|
|
return data is not None
|
||
|
|
except:
|
||
|
|
return False
|
||
|
|
|
||
|
|
@flow(name="watchdog-service")
|
||
|
|
def watchdog_service_flow():
|
||
|
|
"""Manage System Watchdog Service."""
|
||
|
|
logger = get_run_logger()
|
||
|
|
logger.info("Starting Watchdog Service...")
|
||
|
|
|
||
|
|
proc = subprocess.Popen(
|
||
|
|
[sys.executable, SERVICE_SCRIPT],
|
||
|
|
stdout=subprocess.PIPE,
|
||
|
|
stderr=subprocess.STDOUT,
|
||
|
|
universal_newlines=True
|
||
|
|
)
|
||
|
|
|
||
|
|
logger.info(f"Watchdog started (PID: {proc.pid})")
|
||
|
|
|
||
|
|
try:
|
||
|
|
while True:
|
||
|
|
time.sleep(60)
|
||
|
|
if proc.poll() is not None:
|
||
|
|
logger.error("Watchdog died, restarting...")
|
||
|
|
proc = subprocess.Popen([sys.executable, SERVICE_SCRIPT])
|
||
|
|
else:
|
||
|
|
healthy = check_safety()
|
||
|
|
logger.info(f"Safety data: {'✅' if healthy else '⏳'}")
|
||
|
|
except KeyboardInterrupt:
|
||
|
|
logger.info("Stopping Watchdog...")
|
||
|
|
proc.terminate()
|
||
|
|
proc.wait()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
watchdog_service_flow()
|