""" ch_writer.py — Dolphin ClickHouse fire-and-forget writer. All inserts are async (CH async_insert=1, wait_for_async_insert=0). Uses HTTP INSERT with JSONEachRow — zero external dependencies. OTel transport note: This file is the single integration point. To switch to OTel transport (e.g., when Uptrace is the primary sink), replace _flush() internals only. All caller code (ch_put calls across services) stays unchanged. Usage: from ch_writer import ch_put ch_put("eigen_scans", {"ts": int(time.time() * 1e6), "scan_number": n, ...}) Environment overrides (optional): CH_URL — default: http://localhost:8123 CH_USER — default: dolphin CH_PASS — default: dolphin_ch_2026 CH_DB — default: dolphin """ import json import logging import os import random import struct import threading import time import urllib.request from collections import defaultdict from queue import Full, Queue log = logging.getLogger("ch_writer") CH_URL = os.environ.get("CH_URL", "http://localhost:8123") CH_USER = os.environ.get("CH_USER", "dolphin") CH_PASS = os.environ.get("CH_PASS", "dolphin_ch_2026") CH_DB = os.environ.get("CH_DB", "dolphin") # ─── Timestamp helpers ──────────────────────────────────────────────────────── def ts_us() -> int: """Current UTC time as microseconds — for DateTime64(6) fields.""" return int(time.time() * 1_000_000) def ts_ms() -> int: """Current UTC time as milliseconds — for DateTime64(3) fields.""" return int(time.time() * 1_000) # ─── UUIDv7 — time-ordered distributed trace ID ─────────────────────────────── def uuid7() -> str: """ Generate a UUIDv7 — RFC 9562 time-ordered UUID. Layout (128 bits): [0:48] Unix timestamp milliseconds — sortable, embeds timing [48:52] Version = 0b0111 (7) [52:64] rand_a (12 bits) — sub-ms uniqueness [64:66] Variant = 0b10 [66:128] rand_b (62 bits) — entropy Properties: - Lexicographically sortable by time (no JOIN to recover timestamp) - CH can use as ORDER BY component alongside ts columns - Drop-in for UUIDv4 (same string format, same String column type) - Pure stdlib — no dependencies Usage: scan_uuid = uuid7() # NG7: one per scan # Pass downstream to trade_events, obf_fast_intrade, posture_events # This IS the distributed trace ID across the causal chain. """ ts_ms_val = int(time.time() * 1_000) rand_a = random.getrandbits(12) rand_b = random.getrandbits(62) hi = (ts_ms_val << 16) | 0x7000 | rand_a lo = (0b10 << 62) | rand_b b = struct.pack(">QQ", hi, lo) return ( f"{b[0:4].hex()}-{b[4:6].hex()}-" f"{b[6:8].hex()}-{b[8:10].hex()}-{b[10:16].hex()}" ) # ─── Internal writer ────────────────────────────────────────────────────────── class _CHWriter: """ Thread-safe, non-blocking ClickHouse writer. Batches rows per table and flushes every flush_interval_s. The caller's thread is NEVER blocked — queue.put_nowait() drops silently if the queue is full (observability is best-effort). """ def __init__(self, flush_interval_s: float = 1.0, maxqueue: int = 50_000, db: str = CH_DB): self._q: Queue = Queue(maxsize=maxqueue) self._interval = flush_interval_s self._db = db self._dropped = 0 self._t = threading.Thread( target=self._run, daemon=True, name=f"ch-writer-{db}" ) self._t.start() def put(self, table: str, row: dict) -> None: """Non-blocking enqueue. Silently drops on full queue.""" try: self._q.put_nowait((table, row)) except Full: self._dropped += 1 if self._dropped % 1000 == 1: log.warning("ch_writer: %d rows dropped (queue full)", self._dropped) def _run(self): batch: dict[str, list] = defaultdict(list) deadline = time.monotonic() + self._interval while True: remaining = max(0.005, deadline - time.monotonic()) try: table, row = self._q.get(timeout=remaining) batch[table].append(row) except Exception: pass # timeout — fall through to flush check if time.monotonic() >= deadline: if batch: self._flush(batch) batch = defaultdict(list) deadline = time.monotonic() + self._interval def _flush(self, batch: dict[str, list]): for table, rows in batch.items(): if not rows: continue body = "\n".join(json.dumps(r) for r in rows).encode() url = ( f"{CH_URL}/?database={self._db}" f"&query=INSERT+INTO+{table}+FORMAT+JSONEachRow" f"&async_insert=1&wait_for_async_insert=0" ) req = urllib.request.Request(url, data=body, method="POST") req.add_header("X-ClickHouse-User", CH_USER) req.add_header("X-ClickHouse-Key", CH_PASS) req.add_header("Content-Type", "application/octet-stream") try: with urllib.request.urlopen(req, timeout=5) as resp: if resp.status not in (200, 201): log.debug( "CH flush [%s]: HTTP %s", table, resp.status ) except Exception as e: # Observability writes must never surface to callers log.debug("CH flush error [%s]: %s", table, e) # ─── Module-level singletons ───────────────────────────────────────────────── _writer = _CHWriter(db="dolphin") _writer_green = _CHWriter(db="dolphin_green") def ch_put(table: str, row: dict) -> None: """ Fire-and-forget insert into dolphin.