clock.py: mono_ns single timebase; LatencyHistogram (raw reservoir, exact nearest-rank percentiles); PlaneClock per-plane seq clocks with strict staleness budgets (age==budget not stale — MHS FIX-9 lesson); DeadlineScheduler — single-driver timer heap with EARLY-WAKE on earlier-than-head insert (the jitter-budget mechanism), isolated callbacks. harness.py: seeded deterministic event storms (sequence-hash asserted) driving the REAL Rust ExecutionKernel via the MOCK bundle; reaction latency measured producer-stamp→post-fold across the queue hop exactly as the production account stream consumes; ACCOUNT_UPDATE wallet sentinel tracks kernel k_capital so synthetic storms never trip capital_frozen; sustained throughput reported alongside the gate. V0 GATE (prod host, 50k events, 5k concurrent deadlines, burst 8/12ms ≈ 667 ev/s offered): venue_event_reaction p99 7.19ms (<10ms budget), deadline_jitter p99 4.86ms (<25ms), zero early fires. Capacity artifacts: the 32/1ms and 16/12ms storms (archived reports) show intra-burst queueing dominating beyond ~1.3k ev/s offered at ~0.33ms/fold. 17 unit tests. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
322 lines
12 KiB
Python
322 lines
12 KiB
Python
"""VIOLET reactor clock primitives (Stage V0).
|
|
|
|
NOT a metronome. The system "clock" is:
|
|
- ONE monotonic timebase (``mono_ns``) stamped on every event;
|
|
- per-plane sequence clocks (``PlaneClock``) with staleness budgets —
|
|
each data plane (scan ~5-6 s, venue push ~ms, account events) advances
|
|
at its own rate and is judged stale against its own budget;
|
|
- a deadline scheduler (``DeadlineScheduler``) for genuinely time-based
|
|
work (router TTLs, SL guards): a timer heap driven by ONE coroutine,
|
|
woken EARLY via asyncio.Event when an earlier deadline is inserted —
|
|
the early-wake, not the tick, is what keeps jitter inside budget.
|
|
|
|
Latency accounting uses ``LatencyHistogram``: raw reservoir samples with
|
|
exact order-statistic percentiles (no bucket interpolation) so the V0 gate
|
|
numbers are not artifacts of bucketing.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import heapq
|
|
import json
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Callable, Dict, List, Optional
|
|
|
|
|
|
def mono_ns() -> int:
|
|
"""The single VIOLET timebase. Wall-clock is for report metadata only."""
|
|
return time.monotonic_ns()
|
|
|
|
|
|
# ── Latency accounting ───────────────────────────────────────────────────────
|
|
|
|
|
|
class LatencyHistogram:
|
|
"""Raw-reservoir latency recorder with exact percentiles.
|
|
|
|
Keeps up to ``reservoir`` raw samples (first-N retention: V0 storms are
|
|
sized below the reservoir so retention is total; the cap only guards
|
|
memory if a harness is misconfigured). Percentiles are exact order
|
|
statistics over the retained samples.
|
|
"""
|
|
|
|
def __init__(self, name: str, *, reservoir: int = 200_000) -> None:
|
|
self.name = name
|
|
self.reservoir = int(reservoir)
|
|
self._samples: List[int] = []
|
|
self._overflow = 0
|
|
self._max_ns = 0
|
|
self._min_ns: Optional[int] = None
|
|
|
|
def record(self, dt_ns: int) -> None:
|
|
dt = int(dt_ns)
|
|
if dt > self._max_ns:
|
|
self._max_ns = dt
|
|
if self._min_ns is None or dt < self._min_ns:
|
|
self._min_ns = dt
|
|
if len(self._samples) < self.reservoir:
|
|
self._samples.append(dt)
|
|
else:
|
|
self._overflow += 1
|
|
|
|
@property
|
|
def count(self) -> int:
|
|
return len(self._samples) + self._overflow
|
|
|
|
def percentile_ns(self, p: float) -> int:
|
|
"""Exact order statistic (nearest-rank) over retained samples.
|
|
|
|
nearest-rank: the smallest value whose cumulative fraction >= p,
|
|
i.e. rank = ceil(p * n). (round(p*n + 0.5) is WRONG: banker's
|
|
rounding makes round(5.5) == 6, shifting p50 of 1..10 to 6.)
|
|
"""
|
|
if not self._samples:
|
|
return 0
|
|
ordered = sorted(self._samples)
|
|
import math as _math
|
|
|
|
rank = max(1, min(len(ordered), _math.ceil(p * len(ordered))))
|
|
return ordered[rank - 1]
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
ms = 1e-6
|
|
return {
|
|
"name": self.name,
|
|
"count": self.count,
|
|
"retained": len(self._samples),
|
|
"overflow_dropped": self._overflow,
|
|
"min_ms": (self._min_ns or 0) * ms,
|
|
"p50_ms": self.percentile_ns(0.50) * ms,
|
|
"p90_ms": self.percentile_ns(0.90) * ms,
|
|
"p99_ms": self.percentile_ns(0.99) * ms,
|
|
"p999_ms": self.percentile_ns(0.999) * ms,
|
|
"max_ms": self._max_ns * ms,
|
|
}
|
|
|
|
def report(self) -> str:
|
|
d = self.to_dict()
|
|
return (
|
|
f"{d['name']:<24} n={d['count']:<8} "
|
|
f"p50={d['p50_ms']:8.3f}ms p90={d['p90_ms']:8.3f}ms "
|
|
f"p99={d['p99_ms']:8.3f}ms p99.9={d['p999_ms']:8.3f}ms "
|
|
f"max={d['max_ms']:8.3f}ms"
|
|
)
|
|
|
|
|
|
# ── Plane clocks ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
# Default staleness budgets (ns). Scan budget = two missed 6 s scans; venue
|
|
# budget = 2 s without any push while connected; account = 5 s. These are
|
|
# LABELS for health logic — never data cadences (operator rule: cadences are
|
|
# sacred; staleness thresholds are tunable).
|
|
SCAN_STALENESS_NS = 12_000_000_000
|
|
VENUE_STALENESS_NS = 2_000_000_000
|
|
ACCOUNT_STALENESS_NS = 5_000_000_000
|
|
|
|
|
|
@dataclass
|
|
class PlaneClock:
|
|
"""Sequence clock for one data plane.
|
|
|
|
``seq`` increments on every observed plane event; ``last_mono_ns`` stamps
|
|
it on the shared timebase. ``is_stale`` is strict (> budget): an age
|
|
exactly equal to the budget is NOT stale, so a budget chosen as an exact
|
|
multiple of the plane's natural cadence does not flap on-cycle (the MHS
|
|
FIX-9 lesson: a threshold equal to the writer's refresh period is stale
|
|
by construction).
|
|
"""
|
|
|
|
name: str
|
|
staleness_budget_ns: int
|
|
seq: int = 0
|
|
last_mono_ns: int = -1
|
|
|
|
def tick(self, now_ns: Optional[int] = None) -> int:
|
|
self.seq += 1
|
|
self.last_mono_ns = mono_ns() if now_ns is None else int(now_ns)
|
|
return self.seq
|
|
|
|
def age_ns(self, now_ns: Optional[int] = None) -> int:
|
|
"""Nanoseconds since last tick; -1 if the plane has never ticked."""
|
|
if self.last_mono_ns < 0:
|
|
return -1
|
|
now = mono_ns() if now_ns is None else int(now_ns)
|
|
return max(0, now - self.last_mono_ns)
|
|
|
|
def is_stale(self, now_ns: Optional[int] = None) -> bool:
|
|
"""True when the plane has ticked at least once AND age > budget.
|
|
|
|
A never-ticked plane is 'not yet alive', a different condition from
|
|
'went stale' — callers gate startup on seq > 0 separately.
|
|
"""
|
|
age = self.age_ns(now_ns)
|
|
return age > self.staleness_budget_ns if age >= 0 else False
|
|
|
|
def snapshot(self) -> Dict[str, Any]:
|
|
return {
|
|
"name": self.name,
|
|
"seq": self.seq,
|
|
"last_mono_ns": self.last_mono_ns,
|
|
"age_ns": self.age_ns(),
|
|
"stale": self.is_stale(),
|
|
"budget_ns": self.staleness_budget_ns,
|
|
}
|
|
|
|
|
|
# ── Deadline scheduler ───────────────────────────────────────────────────────
|
|
|
|
|
|
class Deadline:
|
|
"""Handle for a scheduled deadline. ``cancel()`` is idempotent."""
|
|
|
|
__slots__ = ("due_mono_ns", "name", "_cancelled", "_fired")
|
|
|
|
def __init__(self, due_mono_ns: int, name: str = "") -> None:
|
|
self.due_mono_ns = int(due_mono_ns)
|
|
self.name = name
|
|
self._cancelled = False
|
|
self._fired = False
|
|
|
|
@property
|
|
def cancelled(self) -> bool:
|
|
return self._cancelled
|
|
|
|
@property
|
|
def fired(self) -> bool:
|
|
return self._fired
|
|
|
|
def cancel(self) -> None:
|
|
self._cancelled = True
|
|
|
|
|
|
class DeadlineScheduler:
|
|
"""Single-driver asyncio deadline scheduler.
|
|
|
|
One coroutine owns a heapq of ``(due_ns, n, Deadline, callback)``. It
|
|
sleeps ``min(next_due - now, max_sleep_ms)`` and is woken EARLY through
|
|
an asyncio.Event whenever a deadline earlier than the current head is
|
|
inserted — early-wake is the jitter-budget mechanism; the max_sleep tick
|
|
is only a liveness backstop.
|
|
|
|
Callbacks are synchronous and MUST be non-blocking; async work is
|
|
dispatched by the callback via ``loop.create_task``. A callback
|
|
exception is isolated (logged via the optional ``on_error`` hook) and
|
|
never kills the driver.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
max_sleep_ms: int = 50,
|
|
jitter_hist: Optional[LatencyHistogram] = None,
|
|
on_error: Optional[Callable[[Deadline, BaseException], None]] = None,
|
|
) -> None:
|
|
self._max_sleep_s = max(0.001, float(max_sleep_ms) / 1000.0)
|
|
self.jitter_hist = jitter_hist
|
|
self._on_error = on_error
|
|
self._heap: List[tuple] = []
|
|
self._n = 0
|
|
self._wake = asyncio.Event()
|
|
self._task: Optional[asyncio.Task] = None
|
|
self._stopping = False
|
|
self.fired_count = 0
|
|
self.cancelled_count = 0
|
|
|
|
# -- scheduling --------------------------------------------------------
|
|
|
|
def schedule_at(
|
|
self, due_mono_ns: int, cb: Callable[[], None], *, name: str = ""
|
|
) -> Deadline:
|
|
dl = Deadline(due_mono_ns, name)
|
|
self._n += 1
|
|
earlier_than_head = not self._heap or due_mono_ns < self._heap[0][0]
|
|
heapq.heappush(self._heap, (int(due_mono_ns), self._n, dl, cb))
|
|
if earlier_than_head:
|
|
self._wake.set() # early-wake the driver for the new head
|
|
return dl
|
|
|
|
def schedule_in(
|
|
self, delay_ms: float, cb: Callable[[], None], *, name: str = ""
|
|
) -> Deadline:
|
|
return self.schedule_at(
|
|
mono_ns() + int(float(delay_ms) * 1e6), cb, name=name
|
|
)
|
|
|
|
@property
|
|
def pending(self) -> int:
|
|
return sum(1 for _, _, dl, _ in self._heap if not dl.cancelled and not dl.fired)
|
|
|
|
# -- driver ------------------------------------------------------------
|
|
|
|
def start(self) -> None:
|
|
if self._task is None or self._task.done():
|
|
self._stopping = False
|
|
self._task = asyncio.get_running_loop().create_task(
|
|
self._run(), name="violet_deadline_driver"
|
|
)
|
|
|
|
async def stop(self) -> None:
|
|
self._stopping = True
|
|
self._wake.set()
|
|
if self._task is not None:
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._task = None
|
|
|
|
async def _run(self) -> None:
|
|
while not self._stopping:
|
|
now = mono_ns()
|
|
# Fire everything due. NEVER fire early: strict due <= now.
|
|
while self._heap and self._heap[0][0] <= now:
|
|
_, _, dl, cb = heapq.heappop(self._heap)
|
|
if dl.cancelled:
|
|
self.cancelled_count += 1
|
|
continue
|
|
dl._fired = True
|
|
self.fired_count += 1
|
|
if self.jitter_hist is not None:
|
|
self.jitter_hist.record(mono_ns() - dl.due_mono_ns)
|
|
try:
|
|
cb()
|
|
except BaseException as exc: # noqa: BLE001 — isolate callbacks
|
|
if self._on_error is not None:
|
|
try:
|
|
self._on_error(dl, exc)
|
|
except Exception:
|
|
pass
|
|
now = mono_ns()
|
|
# Sleep until next head or the liveness tick, whichever first;
|
|
# an earlier insert sets the event and we re-evaluate immediately.
|
|
if self._heap:
|
|
delay_s = min(
|
|
self._max_sleep_s,
|
|
max(0.0, (self._heap[0][0] - now) / 1e9),
|
|
)
|
|
else:
|
|
delay_s = self._max_sleep_s
|
|
try:
|
|
await asyncio.wait_for(self._wake.wait(), timeout=delay_s)
|
|
except asyncio.TimeoutError:
|
|
pass
|
|
self._wake.clear()
|
|
|
|
|
|
# ── Report bundling helper ───────────────────────────────────────────────────
|
|
|
|
|
|
def histograms_report(hists: List[LatencyHistogram]) -> str:
|
|
return "\n".join(h.report() for h in hists)
|
|
|
|
|
|
def histograms_json(hists: List[LatencyHistogram], **meta: Any) -> str:
|
|
return json.dumps(
|
|
{"meta": meta, "histograms": {h.name: h.to_dict() for h in hists}},
|
|
indent=2,
|
|
default=str,
|
|
)
|