Files
DOLPHIN/prod/ch_writer.py

186 lines
6.8 KiB
Python
Raw Normal View History

"""
ch_writer.py Dolphin ClickHouse fire-and-forget writer.
All inserts are async (CH async_insert=1, wait_for_async_insert=0).
Uses HTTP INSERT with JSONEachRow zero external dependencies.
OTel transport note:
This file is the single integration point. To switch to OTel transport
(e.g., when Uptrace is the primary sink), replace _flush() internals only.
All caller code (ch_put calls across services) stays unchanged.
Usage:
from ch_writer import ch_put
ch_put("eigen_scans", {"ts": int(time.time() * 1e6), "scan_number": n, ...})
Environment overrides (optional):
CH_URL default: http://localhost:8123
CH_USER default: dolphin
CH_PASS default: dolphin_ch_2026
CH_DB default: dolphin
"""
import json
import logging
import os
import random
import struct
import threading
import time
import urllib.request
from collections import defaultdict
from queue import Full, Queue
log = logging.getLogger("ch_writer")
CH_URL = os.environ.get("CH_URL", "http://localhost:8123")
CH_USER = os.environ.get("CH_USER", "dolphin")
CH_PASS = os.environ.get("CH_PASS", "dolphin_ch_2026")
CH_DB = os.environ.get("CH_DB", "dolphin")
# ─── Timestamp helpers ────────────────────────────────────────────────────────
def ts_us() -> int:
"""Current UTC time as microseconds — for DateTime64(6) fields."""
return int(time.time() * 1_000_000)
def ts_ms() -> int:
"""Current UTC time as milliseconds — for DateTime64(3) fields."""
return int(time.time() * 1_000)
# ─── UUIDv7 — time-ordered distributed trace ID ───────────────────────────────
def uuid7() -> str:
"""
Generate a UUIDv7 RFC 9562 time-ordered UUID.
Layout (128 bits):
[0:48] Unix timestamp milliseconds sortable, embeds timing
[48:52] Version = 0b0111 (7)
[52:64] rand_a (12 bits) sub-ms uniqueness
[64:66] Variant = 0b10
[66:128] rand_b (62 bits) entropy
Properties:
- Lexicographically sortable by time (no JOIN to recover timestamp)
- CH can use as ORDER BY component alongside ts columns
- Drop-in for UUIDv4 (same string format, same String column type)
- Pure stdlib no dependencies
Usage:
scan_uuid = uuid7() # NG7: one per scan
# Pass downstream to trade_events, obf_fast_intrade, posture_events
# This IS the distributed trace ID across the causal chain.
"""
ts_ms_val = int(time.time() * 1_000)
rand_a = random.getrandbits(12)
rand_b = random.getrandbits(62)
hi = (ts_ms_val << 16) | 0x7000 | rand_a
lo = (0b10 << 62) | rand_b
b = struct.pack(">QQ", hi, lo)
return (
f"{b[0:4].hex()}-{b[4:6].hex()}-"
f"{b[6:8].hex()}-{b[8:10].hex()}-{b[10:16].hex()}"
)
# ─── Internal writer ──────────────────────────────────────────────────────────
class _CHWriter:
"""
Thread-safe, non-blocking ClickHouse writer.
Batches rows per table and flushes every flush_interval_s.
The caller's thread is NEVER blocked — queue.put_nowait() drops
silently if the queue is full (observability is best-effort).
"""
def __init__(self, flush_interval_s: float = 1.0, maxqueue: int = 50_000, db: str = CH_DB):
self._q: Queue = Queue(maxsize=maxqueue)
self._interval = flush_interval_s
self._db = db
self._dropped = 0
self._t = threading.Thread(
target=self._run, daemon=True, name=f"ch-writer-{db}"
)
self._t.start()
def put(self, table: str, row: dict) -> None:
"""Non-blocking enqueue. Silently drops on full queue."""
try:
self._q.put_nowait((table, row))
except Full:
self._dropped += 1
if self._dropped % 1000 == 1:
log.warning("ch_writer: %d rows dropped (queue full)", self._dropped)
def _run(self):
batch: dict[str, list] = defaultdict(list)
deadline = time.monotonic() + self._interval
while True:
remaining = max(0.005, deadline - time.monotonic())
try:
table, row = self._q.get(timeout=remaining)
batch[table].append(row)
except Exception:
pass # timeout — fall through to flush check
if time.monotonic() >= deadline:
if batch:
self._flush(batch)
batch = defaultdict(list)
deadline = time.monotonic() + self._interval
def _flush(self, batch: dict[str, list]):
for table, rows in batch.items():
if not rows:
continue
body = "\n".join(json.dumps(r) for r in rows).encode()
url = (
f"{CH_URL}/?database={self._db}"
f"&query=INSERT+INTO+{table}+FORMAT+JSONEachRow"
f"&async_insert=1&wait_for_async_insert=0"
)
req = urllib.request.Request(url, data=body, method="POST")
req.add_header("X-ClickHouse-User", CH_USER)
req.add_header("X-ClickHouse-Key", CH_PASS)
req.add_header("Content-Type", "application/octet-stream")
try:
with urllib.request.urlopen(req, timeout=5) as resp:
if resp.status not in (200, 201):
log.debug(
"CH flush [%s]: HTTP %s", table, resp.status
)
except Exception as e:
# Observability writes must never surface to callers
log.debug("CH flush error [%s]: %s", table, e)
# ─── Module-level singletons ─────────────────────────────────────────────────
_writer = _CHWriter(db="dolphin")
_writer_green = _CHWriter(db="dolphin_green")
def ch_put(table: str, row: dict) -> None:
"""
Fire-and-forget insert into dolphin.<table> (BLUE environment).
Args:
table: ClickHouse table name (without database prefix), e.g. "eigen_scans"
row: Dict of column_name value. Timestamps should be:
- DateTime64(6) fields: int microseconds (use ts_us())
- DateTime64(3) fields: int milliseconds (use ts_ms())
- Date fields: "YYYY-MM-DD" string
"""
_writer.put(table, row)
def ch_put_green(table: str, row: dict) -> None:
"""
Fire-and-forget insert into dolphin_green.<table> (GREEN / NT TradingNode environment).
Same signature as ch_put drop-in for GREEN services.
"""
_writer_green.put(table, row)