142 lines
3.9 KiB
YAML
142 lines
3.9 KiB
YAML
|
|
services:
|
||
|
|
|
||
|
|
autoheal:
|
||
|
|
image: willfarrell/autoheal:latest
|
||
|
|
container_name: dolphin-autoheal
|
||
|
|
restart: always
|
||
|
|
volumes:
|
||
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
||
|
|
environment:
|
||
|
|
- AUTOHEAL_CONTAINER_LABEL=autoheal
|
||
|
|
- AUTOHEAL_INTERVAL=10 # poll every 10s (MHS fires first at ~1s)
|
||
|
|
- AUTOHEAL_START_PERIOD=30 # grace on container cold start
|
||
|
|
- AUTOHEAL_DEFAULT_STOP_TIMEOUT=10
|
||
|
|
|
||
|
|
hazelcast:
|
||
|
|
image: hazelcast/hazelcast:5.3
|
||
|
|
container_name: dolphin-hazelcast
|
||
|
|
# NOTE: autoheal REMOVED (2026-04-07). HZ is RAM-only volatile — restarting
|
||
|
|
# wipes ALL state and causes cascading failures. Better to leave it running
|
||
|
|
# even if temporarily unhealthy than to restart and lose everything.
|
||
|
|
ports:
|
||
|
|
- "5701:5701"
|
||
|
|
environment:
|
||
|
|
- JAVA_OPTS=-Xmx2g
|
||
|
|
- HZ_CLUSTERNAME=dolphin
|
||
|
|
volumes:
|
||
|
|
- hz_data:/opt/hazelcast/data
|
||
|
|
restart: unless-stopped
|
||
|
|
init: true
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD-SHELL", "timeout 5 bash -c '</dev/tcp/localhost/5701' || exit 1"]
|
||
|
|
interval: 15s
|
||
|
|
timeout: 5s
|
||
|
|
retries: 5
|
||
|
|
start_period: 90s
|
||
|
|
|
||
|
|
hazelcast-mc:
|
||
|
|
image: hazelcast/management-center:5.3
|
||
|
|
container_name: dolphin-hazelcast-mc
|
||
|
|
labels:
|
||
|
|
- autoheal=true
|
||
|
|
ports:
|
||
|
|
- "8080:8080"
|
||
|
|
environment:
|
||
|
|
- MC_DEFAULT_CLUSTER=dolphin
|
||
|
|
- MC_DEFAULT_CLUSTER_MEMBERS=hazelcast:5701
|
||
|
|
depends_on:
|
||
|
|
hazelcast:
|
||
|
|
condition: service_healthy
|
||
|
|
restart: unless-stopped
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:8080/ > /dev/null || exit 1"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 5s
|
||
|
|
retries: 3
|
||
|
|
start_period: 60s
|
||
|
|
|
||
|
|
prefect-server:
|
||
|
|
image: prefecthq/prefect:3-latest
|
||
|
|
container_name: dolphin-prefect
|
||
|
|
labels:
|
||
|
|
- autoheal=true
|
||
|
|
ports:
|
||
|
|
- "4200:4200"
|
||
|
|
command: prefect server start --host 0.0.0.0
|
||
|
|
environment:
|
||
|
|
# CRITICAL: These must match the Tailscale FQDN for external access
|
||
|
|
- PREFECT_UI_URL=http://dolphin.taile8ad92.ts.net:4200
|
||
|
|
- PREFECT_API_URL=http://dolphin.taile8ad92.ts.net:4200/api
|
||
|
|
- PREFECT_SERVER_API_HOST=0.0.0.0
|
||
|
|
- PREFECT_SERVER_CORS_ALLOWED_ORIGINS=*
|
||
|
|
- PYTHONUNBUFFERED=1
|
||
|
|
- PREFECT_LOGGING_TO_API_BATCH_INTERVAL=0.3
|
||
|
|
volumes:
|
||
|
|
- prefect_data:/root/.prefect
|
||
|
|
restart: unless-stopped
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD-SHELL", "curl -sf http://localhost:4200/api/health | grep -q true || exit 1"]
|
||
|
|
interval: 10s
|
||
|
|
timeout: 5s
|
||
|
|
retries: 3
|
||
|
|
start_period: 45s
|
||
|
|
|
||
|
|
clickhouse:
|
||
|
|
image: clickhouse/clickhouse-server:24.3-alpine
|
||
|
|
container_name: dolphin-clickhouse
|
||
|
|
restart: unless-stopped
|
||
|
|
labels:
|
||
|
|
- autoheal=true
|
||
|
|
ports:
|
||
|
|
- "8123:8123"
|
||
|
|
- "9000:9000"
|
||
|
|
volumes:
|
||
|
|
- prod_ch_data:/var/lib/clickhouse
|
||
|
|
- ./clickhouse/config.xml:/etc/clickhouse-server/config.d/dolphin.xml:ro
|
||
|
|
- ./clickhouse/users.xml:/etc/clickhouse-server/users.d/dolphin.xml:ro
|
||
|
|
networks:
|
||
|
|
- prod_default
|
||
|
|
ulimits:
|
||
|
|
nofile:
|
||
|
|
soft: 262144
|
||
|
|
hard: 262144
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD-SHELL", "wget -q -O /dev/null http://127.0.0.1:8123/ping || exit 1"]
|
||
|
|
interval: 10s
|
||
|
|
timeout: 5s
|
||
|
|
retries: 5
|
||
|
|
start_period: 30s
|
||
|
|
|
||
|
|
otelcol:
|
||
|
|
image: otel/opentelemetry-collector-contrib:0.121.0
|
||
|
|
container_name: dolphin-otelcol
|
||
|
|
restart: unless-stopped
|
||
|
|
labels:
|
||
|
|
- autoheal=true
|
||
|
|
ports:
|
||
|
|
- "4317:4317"
|
||
|
|
- "4318:4318"
|
||
|
|
volumes:
|
||
|
|
- ./otelcol/config.yaml:/etc/otelcol-contrib/config.yaml:ro
|
||
|
|
networks:
|
||
|
|
- prod_default
|
||
|
|
depends_on:
|
||
|
|
clickhouse:
|
||
|
|
condition: service_healthy
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD-SHELL", "wget -q -O - http://localhost:13133/ | grep -q 'Server available' && echo ok || exit 1"]
|
||
|
|
interval: 15s
|
||
|
|
timeout: 5s
|
||
|
|
retries: 3
|
||
|
|
start_period: 20s
|
||
|
|
|
||
|
|
networks:
|
||
|
|
prod_default:
|
||
|
|
external: true
|
||
|
|
|
||
|
|
volumes:
|
||
|
|
hz_data:
|
||
|
|
prefect_data:
|
||
|
|
prod_ch_data:
|
||
|
|
external: true
|