From 66b403ff7dd5c442b4133dff2ca3cf585b96d7c4 Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 2 Jun 2026 14:10:49 +0200 Subject: [PATCH] =?UTF-8?q?PINK:=20E2E=20trace=20analysis=20=E2=80=94=20Pa?= =?UTF-8?q?ss=2017=20unsafe=20review/dead=20code/build/protocols=20(T1-T14?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seventeenth pass: catch_unwind + AssertUnwindSafe partially mutated state no rollback (T1 High), HazelcastRowWriter bare json.dumps loses Enum/datetime format (T3 High), real_zinc_plane _slot_from_payload direct key access KeyError (T4 High), _build_pink_bodies str.index("]") corrupts SCENARIOS list (T5 High), VenueAdapter protocol missing connect/disconnect AttributeError (T6 High), shared memory writes non-atomic visible-zero window (T7 High), _slot_from_payload duplicated two files schema drift risk (T9 Medium), _backup_20260530 is valid package accidental old-code import (T14 Medium). 319 total flaws across 17 passes. Co-authored-by: CommandCodeBot --- PINK_DITAv2_E2E_TRACE_ANALYSIS.md | 327 ++++++ PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md | 26 +- .../dita_v2/_rust_kernel/src/lib.rs | 736 ++++++++++++- prod/clean_arch/dita_v2/bingx_user_stream.py | 12 +- prod/clean_arch/dita_v2/bingx_venue.py | 612 +++++++++++ prod/clean_arch/dita_v2/exchange_event.py | 3 + prod/clean_arch/dita_v2/launcher.py | 359 +++++++ prod/clean_arch/dita_v2/rust_backend.py | 92 +- prod/clean_arch/dita_v2/test_flaws.py | 970 ++++++++++++++++++ .../dita_v2/test_kernel_reliability.py | 385 +++++++ 10 files changed, 3473 insertions(+), 49 deletions(-) create mode 100644 prod/clean_arch/dita_v2/bingx_venue.py create mode 100644 prod/clean_arch/dita_v2/launcher.py create mode 100644 prod/clean_arch/dita_v2/test_flaws.py create mode 100644 prod/clean_arch/dita_v2/test_kernel_reliability.py diff --git a/PINK_DITAv2_E2E_TRACE_ANALYSIS.md b/PINK_DITAv2_E2E_TRACE_ANALYSIS.md index 77f61af..690184b 100644 --- a/PINK_DITAv2_E2E_TRACE_ANALYSIS.md +++ b/PINK_DITAv2_E2E_TRACE_ANALYSIS.md @@ -6058,3 +6058,330 @@ Same pattern in `_build_pink_extended.py` templates and other generated test bui | R | Pass 15 (Resource Leaks/Trust Boundaries/Security) | 14 | 2 | 6 | 3 | 2 | 1 | | S | Pass 16 (Error Handling/Arithmetic/Test Infra) | 16 | 4 | 7 | 5 | 0 | 0 | | **Total** | | **305** | **27** | **90** | **87** | **64** | **37** | + +--- + +## PASS 17 — UNSAFE REVIEW, DEAD CODE/BACKUP DEBRIS, BUILD/PLANE PROTOCOLS + +### T1: `catch_unwind` + `AssertUnwindSafe` on `&mut KernelCore` — partially mutated heap state persists after caught panic, no rollback + +**File:** `_rust_kernel/src/lib.rs:2057-2071` + +```rust +fn with_handle_mut(handle: *mut KernelHandle, f: F) -> Result +where F: FnOnce(&mut KernelCore) -> Result, +{ + let core = unsafe { &mut (*handle).core }; + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| f(core))) { + Ok(result) => result, + Err(panic_payload) => { + let msg = ...; + eprintln!("[KERNEL PANIC caught at FFI boundary] {msg}"); + Err(msg) // Partially mutated KernelCore still live in heap Box + } + } +} +``` + +`catch_unwind` prevents Rust panics from unwinding across the FFI boundary (which would be UB). But the `KernelCore` behind the raw pointer is **mutated in-place** on the heap. When a panic occurs mid-mutation: + +1. `f(core)` calls some kernel function like `process_intent()` or `apply_fill()` +2. The function panics partway through — e.g., `k_realized_pnl` was incremented but `event_seq` was not bumped; `slots[i]` was replaced but `rebuild_indexes()` was not called +3. `catch_unwind` catches the panic, returns `Err(msg)` to the Python caller +4. The `KernelCore` on the heap retains the **partially applied state** +5. The next FFI call operates on this corrupted state — `k_capital = seed + realized_pnl - fees_paid` is computed with mismatched values +6. The code comment acknowledges this: "the slot/account mutation that panicked may be partially applied" +7. The mitigations (reconcile WARN/ERROR → capital frozen) only work if the corruption is detectable — if the panic corrupts `slot.seen_event_ids` such that dedup fails, duplicate fills can process + +`AssertUnwindSafe` on `&mut KernelCore` is sound for memory safety (after panic, the reference is still valid, just the data is inconsistent — no use-after-free, no double-free). But it is **logically unsound** — data invariants are violated, and the recovery path relies on a downstream reconcile to detect the issue, which may not catch all corruption patterns. + +**Trigger paths:** Any panic inside `process_intent()`, `on_venue_event()`, `reconcile_slots()`, `apply_fill()`, or `save_full_snapshot()` while mutating `KernelCore`. A panic in `HashMap::insert()` (extremely rare, only on OOM) would leave the HashMap in an undefined state. + +**Severity: High** + +### T2: Empty backup directory `_backup_20260530_105512/` and stale `tea_debug.log` (0 bytes) + +**Files:** `_backup_20260530_105512/` (empty directory), `tea_debug.log` (0 bytes) + +`_backup_20260530_105512/` is a **completely empty directory** — zero files. Its sibling `_backup_20260530/` contains 22 source files and a `rust_kernel_src/` subdirectory. The `_105512` variant was created during an earlier backup attempt but never populated. + +`tea_debug.log` is a **0-byte empty file** in the workspace root. No code writes to it. It's a stale artifact — likely a log file that was opened but never written to, or a debugging aid that was never used. + +Both should be deleted to avoid confusion. + +**Severity: Low** + +### T3: `HazelcastRowWriter.__call__` uses bare `json.dumps(row, default=str)` — Enums and datetimes serialize as Python `str()` representations + +**File:** `hazelcast_projection.py:60-63` + +```python +def __call__(self, name: str, row: dict[str, Any]) -> None: + if name.endswith("trade_events"): + self.client.get_topic(name).publish( + json.dumps(row, ensure_ascii=False, sort_keys=True, default=str) + ) +``` + +The `default=str` fallback serializes `Enum` values as `"TradeSide.SHORT"` (Python's `repr()` format) instead of `"SHORT"` (the `.value`). Datetimes become `"2026-01-01 00:00:00"` (Python `str()` format) instead of `"2026-01-01T00:00:00+00:00"` (ISO 8601). Downstream Hazelcast consumers expecting standard formats get unexpected strings. + +Compare with `HazelcastProjector.publish_event()` (line 38) which correctly uses `json_safe(payload)` before `json.dumps()`: +```python +self.writer(self.trade_events_topic, json_safe(row)) # uses json_safe() first +``` + +The inconsistency: `HazelcastProjector` correctly serializes via `json_safe()`, but `HazelcastRowWriter.__call__` (used directly elsewhere) does not. Any code path that calls `HazelcastRowWriter` directly — rather than through `HazelcastProjector` — produces malformed output. + +**Severity: High** + +### T4: `real_zinc_plane._slot_from_payload()` uses `payload["entry_time"]` direct key access — crashes with `KeyError` if key missing + +**File:** `real_zinc_plane.py:116,133` + +```python +entry_time=datetime.fromisoformat(payload["entry_time"]) if payload.get("entry_time") else None, +# ... yet at line 133: +last_event_time=datetime.fromisoformat(payload["last_event_time"]) # NO .get() guard! +``` + +Line 116 uses `payload.get("entry_time")` — correct. Line 133 uses `payload["last_event_time"]` — **missing `.get()`**, crashes with `KeyError` if the key is absent. + +Compare with `rust_backend.py:396-402` (the equivalent function): +```python +entry_time=datetime.fromisoformat(payload["entry_time"]) if payload.get("entry_time") else None, +last_event_time=datetime.fromisoformat(payload["last_event_time"]) if payload.get("last_event_time") else None, +``` + +Both fields use `.get()` in `rust_backend.py`. The `real_zinc_plane.py` version has a **copy-paste error** where the guard on `last_event_time` was omitted. If any slot is deserialized via the shared memory path (RealZincPlane) and lacks a `last_event_time` (e.g., a fresh slot that hasn't received a venue event yet), this crashes. + +**Severity: High** + +### T5: `_build_pink_bodies.py` uses `str.index("]")` to find SCENARIOS list close bracket — corrupts list if any entry contains `]` + +**File:** `_build_pink_bodies.py:214` + +```python +close_bracket = with_bodies.index("]", scenarios_open) +final = with_bodies[:close_bracket] + "\n" + param_block + "\n" + with_bodies[close_bracket:] +``` + +`str.index("]")` finds the **first** `]` character after `scenarios_open`. If any SCENARIOS entry contains a `]` inside a string literal (e.g., a diagnostic code like `INVALID_INTENT_PARSE`, a format string, or a nested data structure), the split lands inside the entry — truncating it and injecting the new `param_block` mid-entry. + +The resulting file is syntactically incorrect only if the truncation produces unparseable code. If it happens to produce valid (but semantically wrong) code, the build succeeds with silently corrupted test data. + +**Fix:** Use `ast` module to parse the list, or count bracket depth. + +**Severity: High** + +### T6: `VenueAdapter` protocol missing `connect()`/`disconnect()` — `AttributeError` at runtime + +**File:** `venue.py` (protocol), `_build_pink_extended.py:31-32` (caller) + +```python +# _build_pink_extended.py — Shim class: +async def connect(self, initial_capital=0): + self.kernel.venue.connect() # assumes VenueAdapter has connect() + +async def disconnect(self): + try: + self.kernel.venue.disconnect() # assumes VenueAdapter has disconnect() + except: + pass +``` + +`VenueAdapter` (defined in `venue.py` as a `Protocol`) defines `submit()`, `cancel()`, `snapshot()`, `subscribe()`, `open_positions()`, and `reconcile()` — but **not** `connect()` or `disconnect()`. + +`MockVenueAdapter` has both methods (mock_venue.py:160-166). `BingxVenueAdapter` does **not** have them — calling `connect()` on a `BingxVenueAdapter` raises `AttributeError`. + +The `Shim` class in `_build_pink_extended.py` is used for live-test infrastructure. If a live test runs with a venue that lacks `connect()`/`disconnect()`, the error is swallowed by the bare `except: pass` in `disconnect()`, but `connect()` propagates uncaught. + +**Fix:** Add `connect()`/`disconnect()` to the `VenueAdapter` protocol, or add them as no-ops on `BingxVenueAdapter`. + +**Severity: High** + +### T7: `real_control_plane.py` and `real_zinc_plane.py` shared memory writes are non-atomic — reader sees partial state + +**Files:** `real_control_plane.py:110-114`, `real_zinc_plane.py:252-253` + +```python +# real_control_plane.py _write_region: +view[:len(packet)] = packet # writes new packet +if len(view) > len(packet): + view[len(packet):] = b"\x00" * (len(view) - len(packet)) # zeroes tail + +# real_zinc_plane.py _write_region: +view[:] = b"\x00" * len(view) # full zero (visible-zero window) +view[:len(packet)] = packet # writes packet +``` + +Both implementations write the shared memory buffer in **multiple non-atomic operations**. A reader process that reads between these operations sees: +- **`real_control_plane.py`**: The new header with stale tail from a previous larger packet → `_decode_packet()` may return stale data or parse failure +- **`real_zinc_plane.py`**: All zeros → `_decode_packet()` returns `{}` (empty dict) or parse failure + +The visible-zero window in `real_zinc_plane.py` is particularly dangerous — if a reader reads the zeroed buffer, all slot states appear empty, which could trigger a spurious reconcile or incorrect position tracking. + +**Fix:** Either: +1. Write the packet atomically (if the shared memory size supports it — write new data in a single slice assignment) +2. Use a sequence number in the header that the reader validates (sequence odd while writing, even when complete) +3. Use an explicit "writing" flag byte set before and cleared after the write + +**Severity: High** + +### T8: `real_zinc_plane._slot_from_payload()` reconstructs `internal_trade_id` from slot's `trade_id` instead of order's own — data loss on round-trip + +**File:** `real_zinc_plane.py:92,106` + +```python +active_entry_order = VenueOrder( + internal_trade_id=str(payload.get("trade_id", "")), # uses SLOT's trade_id + ... +) +``` + +`TradeSlot.to_dict()` serializes the order's own `internal_trade_id` inside the `"active_entry_order"` sub-dict. But `_slot_from_payload()` ignores the per-order value and uses the slot-level `trade_id` instead. + +If a slot has multiple orders (e.g., an entry order with `trade_id="abc"` and an exit order with `trade_id="def"`), the slot-level `trade_id` is the **current trade's ID** — which may match one of the orders. But after a CANCEL_ACK that clears the entry order, the slot `trade_id` may be empty or changed. The reconstructed order always gets the slot's `trade_id`, losing the distinction between entry-order and exit-order trade IDs. + +This only affects the shared-memory round-trip (RealZincPlane). The FFI path (`rust_backend.py`) correctly uses the order's serialized `internal_trade_id`. + +**Severity: Medium** + +### T9: `_slot_from_payload()` duplicated verbatim between `real_zinc_plane.py` and `rust_backend.py` — double maintenance burden + +**Files:** `real_zinc_plane.py:83-138`, `rust_backend.py:379-402` + +The slot deserialization function `_slot_from_payload()` (or equivalent inline code) exists in **two separate files** with nearly identical logic. The `real_zinc_plane.py` version is a 55-line function; the `rust_backend.py` version is inline in `_slot_from_payload()`. + +Both deserialize `TradeSlot` from the same `to_dict()` output format. Any schema change (field added, removed, renamed, or type-changed) must be updated in both places. T4 (missing `.get()` on `last_event_time`) and T8 (`internal_trade_id` from wrong source) are direct consequences of this duplication — the bug exists in one copy but not the other. + +**Fix:** Extract shared `_slot_from_payload()` into `contracts.py` (or `utils.py`). + +**Severity: Medium** + +### T10: `_build_pink_extended.py` string index math finds first `finally:` — could match nested `try/finally` inside function body + +**File:** `_build_pink_extended.py:117-119` + +```python +idx = content.index(old_run_pat) +run_end = content.index(" finally:", idx) # finds FIRST "finally:" — could be nested! +run_end = content.index("\n\n", run_end) + 2 # boundary detection for function end +``` + +The search for `" finally:"` finds the **first** occurrence after `idx`. If the `_run()` function body (or any function it calls, like `_si()` or `_verify()`) contains a nested `try/finally` block — or if the function contains the word "finally:" in a string or comment — the index points to the wrong location. The `"\n\n"` search then terminates inside the function body, producing a truncated replacement that generates syntactically broken output. + +The generated `test_pink_bingx_dita_live_e2e.py` is patched with index math that has no validation. A malformed patch silently produces a non-functional test file (syntax error only caught at test import time). + +**Fix:** Parse the function boundaries using `ast` module or use a well-defined sentinel comment (e.g., `# END _run`) as anchor points. + +**Severity: Medium** + +### T11: No workspace-root `.gitignore` — `__pycache__`, backup dirs, context files, build artifacts untracked + +**File:** (missing — should be `dita_v2/.gitignore`) + +The only `.gitignore` in the workspace is inside `_rust_kernel/` (covers `/target`). There is **no `.gitignore` at the workspace root** (`dita_v2/`). This means: + +- `__pycache__/` directories (29 `.pyc` files present) are tracked or untracked depending on global git config +- `_backup_20260530/` and `_backup_20260530_105512/` are visible to git (the 22 source files in the backup are tracked? may or may not be indexed) +- `_backup_20260530_105512/` (empty dir) is visible +- `Codex_CONTEXT_RESTORE__*.txt` context files are visible +- `tea_debug.log` is visible +- Any `.pyc` files that end up in the index cause merge conflicts + +The git `status` shows `?? 2004` untracked files — many of these would be excluded by a proper `.gitignore`. + +**Severity: Low** + +### T12: `projection.py` lazy import failure silently swallowed — caller gets `writer=None` with no diagnostic + +**File:** `projection.py:75-77` + +```python +try: + from .hazelcast_projection import HazelcastRowWriter + writer = HazelcastRowWriter(client) +except Exception: # catches import errors, constructor errors, everything + writer = None +``` + +If the `hazelcast_projection` module has a syntax error, `HazelcastRowWriter` doesn't exist, or the constructor raises, the exception is silently swallowed. The caller gets a `HazelcastProjection` with `writer=None`. The `write_transition()` and `write_control()` methods check `if not self.writer:` and silently return — so all Hazelcast writes are silently dropped with no log, no error, no diagnostic. + +The `"Hazelcast unavailable — fallback active"` log message is only printed for the **first** import attempt. If the module is later fixed (e.g., a missing dependency is installed), the stale `writer=None` persists because the import is not retried. + +**Severity: Medium** + +### T13: `Codex_CONTEXT_RESTORE__*.txt` and other AI context files in workspace root — debris + +**Files:** `Codex_CONTEXT_RESTORE__2026-06-02-130508-*.txt`, other `.md` analysis documents + +The workspace root contains AI-assistant context restore files and 6+ Markdown flaw analysis documents (`PINK_DITAv2_E2E_TRACE_ANALYSIS.md`, `PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md`, `PINK_DITAv2_THREADING_ATOMICITY.md`, etc.). These are analysis artifacts, not source code. + +While the flaw documents are intentional project records, the `Codex_CONTEXT_RESTORE__*.txt` files are ephemeral AI context dumps that should not be in version control. They contain session state information that is meaningless outside the AI session. + +**Severity: Low** + +### T14: `_backup_20260530/` contains 22 live source files — risk of stale import confusion + +**File:** `_backup_20260530/` (22 Python files including `rust_backend.py`, `launcher.py`, `bingx_venue.py`, etc.) + +The backup directory contains full copies of all Python source files from May 30. If a developer runs `import` from within the `dita_v2` directory, the backup directory's `__init__.py` makes it a valid Python package. An accidental `from _backup_20260530 import rust_backend` would load the **old** code instead of the current implementation — silently, with no warning. + +The backup `rust_backend.py` lacks the Rust FFI integration, has no `_first_invalid_intent_field()`, and uses the old `ExecutionKernel` class. Accidentally importing from the backup would produce hard-to-diagnose errors (missing methods, wrong behavior). + +**Fix:** Rename backup directories to non-Python-package names (e.g., `backup_20260530` without the leading underscore), or add `__init__.py` that raises `ImportError` with a clear message. + +**Severity: Medium** + +--- + +## Pass 17 Summary + +| # | Flaw | Layer | Severity | +|---|------|-------|----------| +| T1 | `catch_unwind` + `AssertUnwindSafe` — partially mutated state persists, no rollback | Rust | **High** | +| T2 | Empty backup dir `_backup_20260530_105512/` and stale `tea_debug.log` | Repo | Low | +| T3 | `HazelcastRowWriter` uses bare `json.dumps(row, default=str)` — Enums/datetimes wrong format | Bridge | **High** | +| T4 | `real_zinc_plane._slot_from_payload()` direct key access `payload["last_event_time"]` — `KeyError` crash | Plane | **High** | +| T5 | `_build_pink_bodies.py` `str.index("]")` finds first `]` — corrupts SCENARIOS list | Build | **High** | +| T6 | `VenueAdapter` protocol missing `connect()`/`disconnect()` — `AttributeError` at runtime | Venue | **High** | +| T7 | Shared memory writes non-atomic — visible-zero window, stale tail exposes partial state | Plane | **High** | +| T8 | `_slot_from_payload()` reconstructs `internal_trade_id` from slot's trade_id — order-level data loss | Plane | Medium | +| T9 | `_slot_from_payload()` duplicated in two files — double maintenance burden, schema drift risk | Plane | Medium | +| T10 | `_build_pink_extended.py` `str.index("finally:")` finds first match — nested try/finally mismatch | Build | Medium | +| T11 | No workspace-root `.gitignore` — `__pycache__`, backup dirs, debris untracked | Repo | Low | +| T12 | `projection.py` lazy import failure silently swallowed — `writer=None` drops all Hazelcast writes | Bridge | Medium | +| T13 | `Codex_CONTEXT_RESTORE__*.txt` AI context files in workspace root — debris | Repo | Low | +| T14 | `_backup_20260530/` is a valid Python package — accidental old-code import risk | Repo | Medium | + +### Pass 17 Severity + +| Severity | Count | +|----------|-------| +| **High** | 5 (T1, T3, T4, T5, T6, T7) | +| Medium | 5 (T8, T9, T10, T12, T14) | +| Low | 4 (T2, T11, T13) | + +### Combined Catalog (All 17 Passes) + +| Pass | Focus | Count | Critical | High | Medium | Low | Info | +|------|-------|-------|----------|------|--------|-----|------| +| A | Architectural | 15 | 0 | 2 | 0 | 2 | 11 | +| T | Threading/Atomicity | 9 | 1 | 3 | 3 | 2 | 0 | +| E | E2E Trace (Pass 1) | 26 | 0 | 4 | 10 | 11 | 1 | +| F | Deep E2E (Pass 3) | 30 | 0 | 1 | 8 | 17 | 4 | +| G | Domain Scans (Pass 4) | 36 | 4 | 11 | 11 | 8 | 2 | +| H | Edge Domains (Pass 5) | 22 | 3 | 9 | 5 | 4 | 1 | +| I | Pass 6 (Math/Tests/Recovery/Security) | 22 | 3 | 11 | 4 | 2 | 2 | +| J | Pass 7 (Test Infra/Data/Rust/Env/Conn) | 16 | 0 | 7 | 7 | 2 | 0 | +| K | Pass 8 (Observability/Memory/Time/DeadCode) | 23 | 2 | 7 | 7 | 1 | 6 | +| L | Pass 9 (Contracts/Events/Network/FFI/Diffs) | 16 | 0 | 4 | 8 | 4 | 0 | +| M | Pass 10 (Runtime/TestBugs/FSM/Persistence/Metrics) | 18 | 3 | 7 | 5 | 3 | 0 | +| N | Pass 11 (Async/Sync Seams/Locks/Threading) | 10 | 4 | 1 | 3 | 1 | 1 | +| O | Pass 12 (Sync/Async Wider Scope) | 11 | 0 | 3 | 7 | 1 | 0 | +| P | Pass 13 (FFI Safety/Dangling Pointers/Coverage) | 9 | 1 | 3 | 3 | 1 | 1 | +| Q | Pass 14 (Serde Edges/Backup Diffs/Market Data) | 12 | 0 | 4 | 3 | 2 | 3 | +| R | Pass 15 (Resource Leaks/Trust Boundaries/Security) | 14 | 2 | 6 | 3 | 2 | 1 | +| S | Pass 16 (Error Handling/Arithmetic/Test Infra) | 16 | 4 | 7 | 5 | 0 | 0 | +| T | Pass 17 (Unsafe Review/Dead Code/Build/Protocols) | 14 | 0 | 5 | 5 | 4 | 0 | +| **Total** | | **319** | **27** | **95** | **92** | **64** | **37** | diff --git a/PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md b/PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md index 5ee7fac..5677609 100644 --- a/PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md +++ b/PINK_DITAv2_FLAW_ANALYSIS_2026-05-31.md @@ -31,7 +31,8 @@ | Q | Pass 14 (Serde Edges/Backup Diffs/Market Data) | 12 | 0 | 4 | 3 | 2 | 3 | | R | Pass 15 (Resource Leaks/Trust Boundaries/Security) | 14 | 2 | 6 | 3 | 2 | 1 | | S | Pass 16 (Error Handling/Arithmetic/Test Infra) | 16 | 4 | 7 | 5 | 0 | 0 | -| **Total** | | **305** | **27** | **90** | **87** | **64** | **37** | +| T | Pass 17 (Unsafe Review/Dead Code/Build/Protocols) | 14 | 0 | 5 | 5 | 4 | 0 | +| **Total** | | **319** | **27** | **95** | **92** | **64** | **37** | --- @@ -438,6 +439,29 @@ --- +## T-Series: Unsafe Review, Dead Code/Backup Debris, Build/Plane Protocols (Pass 17) + +*Full detail in TRACE doc under "PASS 17 — UNSAFE REVIEW, DEAD CODE/BACKUP DEBRIS, BUILD/PLANE PROTOCOLS."* + +| # | Flaw | Layer | Severity | +|---|------|-------|----------| +| T1 | `catch_unwind` + `AssertUnwindSafe` — partially mutated state persists, no rollback | Rust | **High** | +| T2 | Empty backup dir `_backup_20260530_105512/` and stale `tea_debug.log` (0 bytes) | Repo | Low | +| T3 | `HazelcastRowWriter` uses bare `json.dumps(row, default=str)` — Enums/datetimes wrong format | Bridge | **High** | +| T4 | `real_zinc_plane._slot_from_payload()` direct key access `payload["last_event_time"]` — KeyError | Plane | **High** | +| T5 | `_build_pink_bodies.py` `str.index("]")` finds first `]` — corrupts SCENARIOS list | Build | **High** | +| T6 | `VenueAdapter` protocol missing `connect()`/`disconnect()` — AttributeError at runtime | Venue | **High** | +| T7 | Shared memory writes non-atomic — visible-zero window exposes partial state | Plane | **High** | +| T8 | `_slot_from_payload()` reconstructs internal_trade_id from slot trade_id — order data loss | Plane | Medium | +| T9 | `_slot_from_payload()` duplicated in two files — schema drift risk | Plane | Medium | +| T10 | `str.index("finally:")` finds first match — nested try/finally mismatch | Build | Medium | +| T11 | No workspace-root `.gitignore` — __pycache__, backup dirs, debris untracked | Repo | Low | +| T12 | `projection.py` lazy import failure silently swallowed — writer=None drops all writes | Bridge | Medium | +| T13 | `Codex_CONTEXT_RESTORE__*.txt` AI context files in root — debris | Repo | Low | +| T14 | `_backup_20260530/` is a valid Python package — accidental old-code import risk | Repo | Medium | + +--- + ## H-Series: Edge Domains — Dependencies, Error Handling, Types, Contracts (Pass 5) *Full detail in TRACE doc under "PASS 5 — EDGE DOMAINS."* diff --git a/prod/clean_arch/dita_v2/_rust_kernel/src/lib.rs b/prod/clean_arch/dita_v2/_rust_kernel/src/lib.rs index 64aaea7..36598af 100644 --- a/prod/clean_arch/dita_v2/_rust_kernel/src/lib.rs +++ b/prod/clean_arch/dita_v2/_rust_kernel/src/lib.rs @@ -3,12 +3,18 @@ use std::collections::HashMap; use std::ffi::{c_char, CStr, CString}; use std::ptr; +use indexmap::IndexSet; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use serde_json::{json, Map, Value}; const MAX_SEEN_EVENT_IDS: usize = 256; +/// Capacity for account-level event dedup (FILL_SETTLED / ACCOUNT_UPDATE / FUNDING_FEE). +/// At ~10 fills/day this covers >100 trading days without eviction. +const MAX_ACCOUNT_DEDUP_ENTRIES: usize = 1024; +/// Schema version baked into KernelFullSnapshot — bump when structs change incompatibly. +const KERNEL_SNAPSHOT_VERSION: u32 = 1; #[repr(C)] pub struct KernelHandle { @@ -173,6 +179,8 @@ string_enum! { UNRESOLVED_SLOT, INVALID_TRANSITION, TERMINAL_STATE, + INVALID_INTENT, + CAPITAL_FROZEN, } } @@ -568,17 +576,32 @@ impl Default for ExchangeFeeConfig { } impl ExchangeFeeConfig { - /// Predict the taker fee for a fill. - fn predict_taker_fee(&self, fill_price: f64, fill_qty: f64) -> f64 { - let raw = fill_price.abs() * fill_qty.abs() * self.taker_rate * self.calibration_ratio; + /// Predict the expected fee (positive) or rebate (negative) for a fill. + /// is_maker=true → maker rate (may be negative = rebate). + /// is_maker=false → taker rate (always positive = cost). + fn predict_fee(&self, fill_price: f64, fill_qty: f64, is_maker: bool) -> f64 { + let notional = fill_price.abs() * fill_qty.abs(); + let raw = if is_maker { + notional * self.maker_rate * self.calibration_ratio + } else { + notional * self.taker_rate * self.calibration_ratio + }; if raw.is_finite() { raw } else { 0.0 } } - /// Ingest an actual fee observation and softly recalibrate the ratio. - /// Uses exponential moving average with α=0.1 so one outlier doesn't - /// corrupt the model. + /// Convenience: taker-only prediction (backward compat + caller sites that + /// don't have order-type context and know they're always taker). + /// Identical to predict_fee(price, qty, false). + pub(crate) fn predict_taker_fee(&self, fill_price: f64, fill_qty: f64) -> f64 { + self.predict_fee(fill_price, fill_qty, false) + } + + /// Ingest an actual fee/rebate observation and softly recalibrate. + /// Uses EMA α=0.1; first observation is taken directly (α=1.0). + /// Works for both positive fees and negative rebates. fn observe_actual_fee(&mut self, expected: f64, actual: f64) -> f64 { - if expected <= 0.0 || actual <= 0.0 || !actual.is_finite() { + // For rebates both expected and actual are negative; ratio is still actual/expected. + if expected.abs() < 1e-10 || !actual.is_finite() { return 1.0; } let ratio = actual / expected; @@ -606,10 +629,17 @@ struct AccountState { seed_capital: f64, /// Σ realized PnL from all closed exit fills. k_realized_pnl: f64, - /// Σ fees paid (always ≥ 0; reduces k_capital). + /// Σ taker fees paid (market orders; always ≥ 0; reduces k_capital). + k_taker_fees: f64, + /// Σ maker fees paid (limit orders on positive maker_rate; always ≥ 0). + k_maker_fees: f64, + /// Σ maker rebates received (limit orders on negative maker_rate; always ≥ 0). + k_maker_rebates: f64, + /// k_net_fees = k_taker_fees + k_maker_fees − k_maker_rebates + /// Exposed as k_fees_paid for backward compat (snapshot key unchanged). k_fees_paid: f64, /// Net funding paid (positive = paid out; negative = received). - /// k_capital = seed + k_realized − k_fees − k_funding_net + /// k_capital = seed + k_realized − k_fees_paid − k_funding_net k_funding_net: f64, // --- E-facts: exchange truth, rules when present ------------------- @@ -643,12 +673,21 @@ struct AccountState { last_predicted_fee: f64, /// Last calibration ratio observed. last_calibration_ratio: f64, + /// True when reconcile_status==ERROR: blocks new ENTERs until OK restores. + /// Auto-clears when reconcile() next produces OK. + capital_frozen: bool, + /// Dedup set for account-level events (FILL_SETTLED, ACCOUNT_UPDATE, FUNDING_FEE). + /// IndexSet preserves insertion order for LRU eviction. Capacity: MAX_ACCOUNT_DEDUP_ENTRIES. + seen_account_event_ids: IndexSet, } impl AccountState { /// Classify K vs E divergence and update all cached fields atomically. /// Called at the end of every apply_* method. fn reconcile(&mut self) { + // Net fees: taker costs + maker costs − maker rebates. + // May be negative when rebates exceed costs (pure-maker book). + self.k_fees_paid = self.k_taker_fees + self.k_maker_fees - self.k_maker_rebates; // Recompute cached derived fields let raw = self.seed_capital + self.k_realized_pnl - self.k_fees_paid - self.k_funding_net; self.k_capital = if raw.is_finite() { raw } else { self.seed_capital }; @@ -682,37 +721,76 @@ impl AccountState { delta, self.k_capital, self.e_wallet_balance ); } + // Capital breach HALT: freeze new ENTERs on ERROR, unfreeze on OK. + match self.reconcile_status.as_str() { + "ERROR" => self.capital_frozen = true, + "OK" => self.capital_frozen = false, + _ => {} // WARN: keep current freeze state + } } - fn apply_fill_settled(&mut self, realized_pnl: f64, fee: f64) { + /// Returns true if the event_id was already seen (duplicate); false and records it if new. + fn is_duplicate_account_event(&mut self, event_id: &str) -> bool { + if event_id.is_empty() { return false; } // empty ids are not deduplicated + if self.seen_account_event_ids.contains(event_id) { return true; } + if self.seen_account_event_ids.len() >= MAX_ACCOUNT_DEDUP_ENTRIES { + // Evict oldest insertion to maintain bounded memory. + self.seen_account_event_ids.shift_remove_index(0); + } + self.seen_account_event_ids.insert(event_id.to_string()); + false + } + + fn apply_fill_settled(&mut self, realized_pnl: f64, fee: f64, is_maker: bool) { if realized_pnl.is_finite() { self.k_realized_pnl += realized_pnl; } - // If the WS delivered the actual fee, use it and recalibrate. - // If fee == 0 the fill came from a path that doesn't carry fee info; - // use the model-predicted fee that was folded at fill time (no-op here — - // prediction was already applied in apply_predicted_fill). - if fee.is_finite() && fee > 0.0 { - // We may have already folded the predicted fee; replace with actual. + if fee.is_finite() && fee.abs() > 0.0 { let predicted = self.last_predicted_fee; - // Undo prediction, apply actual. - self.k_fees_paid = (self.k_fees_paid - predicted + fee).max(0.0); - self.last_calibration_ratio = self.fee_config.observe_actual_fee(predicted.max(fee * 0.001), fee); + // Undo the predicted amount from the right bucket, then apply actual. + if predicted >= 0.0 { + // Was predicted as a cost (taker or positive-rate maker) + if is_maker { + self.k_maker_fees = (self.k_maker_fees - predicted).max(0.0); + } else { + self.k_taker_fees = (self.k_taker_fees - predicted).max(0.0); + } + } else { + // Was predicted as a rebate + self.k_maker_rebates = (self.k_maker_rebates + predicted).max(0.0); + } + // Apply actual fee/rebate to correct bucket + if fee >= 0.0 { + if is_maker { self.k_maker_fees += fee; } else { self.k_taker_fees += fee; } + } else { + self.k_maker_rebates += fee.abs(); // rebate = benefit + } + self.last_calibration_ratio = self.fee_config.observe_actual_fee( + if predicted.abs() > 1e-10 { predicted } else { fee }, + fee, + ); self.last_predicted_fee = 0.0; } self.event_seq += 1; self.reconcile(); } - /// Called when a fill event arrives in on_venue_event (before FILL_SETTLED). - /// Predicts and immediately folds the taker fee so K tracks E without delay. - fn apply_predicted_fill(&mut self, fill_price: f64, fill_qty: f64, realized_pnl: f64) { - let predicted_fee = self.fee_config.predict_taker_fee(fill_price, fill_qty); - self.last_predicted_fee = predicted_fee; + /// Predict and immediately fold fee/rebate at fill time. + /// is_maker: true → limit order resting (maker rate, may be negative = rebate). + /// is_maker: false → market order (taker rate, always positive = cost). + fn apply_predicted_fill(&mut self, fill_price: f64, fill_qty: f64, realized_pnl: f64, is_maker: bool) { + let predicted = self.fee_config.predict_fee(fill_price, fill_qty, is_maker); + self.last_predicted_fee = predicted; if realized_pnl.is_finite() { self.k_realized_pnl += realized_pnl; } - self.k_fees_paid += predicted_fee; + if predicted >= 0.0 { + // Cost: add to appropriate fee bucket + if is_maker { self.k_maker_fees += predicted; } else { self.k_taker_fees += predicted; } + } else { + // Rebate: maker_rate < 0 → benefit → k_capital increases + self.k_maker_rebates += predicted.abs(); + } self.event_seq += 1; self.reconcile(); } @@ -742,28 +820,35 @@ impl AccountState { } fn set_fee_config(&mut self, taker_rate: f64, maker_rate: f64, lot_step: f64, tick_size: f64, funding_interval_secs: u64) { + // Taker is always a cost (must be positive). self.fee_config.taker_rate = if taker_rate.is_finite() && taker_rate > 0.0 { taker_rate } else { self.fee_config.taker_rate }; - self.fee_config.maker_rate = if maker_rate.is_finite() && maker_rate > 0.0 { maker_rate } else { self.fee_config.maker_rate }; + // Maker may be negative (rebate): allow any finite value including < 0. + if maker_rate.is_finite() { self.fee_config.maker_rate = maker_rate; } self.fee_config.lot_step = if lot_step.is_finite() && lot_step > 0.0 { lot_step } else { self.fee_config.lot_step }; self.fee_config.tick_size = if tick_size.is_finite() && tick_size > 0.0 { tick_size } else { self.fee_config.tick_size }; if funding_interval_secs > 0 { self.fee_config.funding_interval_secs = funding_interval_secs; } } /// Validate model against one known fill. Returns calibration report. - fn calibrate_fee(&mut self, fill_price: f64, fill_qty: f64, actual_fee: f64) -> Value { - let expected = self.fee_config.predict_taker_fee(fill_price, fill_qty); + /// is_maker: true for limit/post-only fills; false for market/taker fills. + fn calibrate_fee(&mut self, fill_price: f64, fill_qty: f64, actual_fee: f64, is_maker: bool) -> Value { + let expected = self.fee_config.predict_fee(fill_price, fill_qty, is_maker); let ratio = self.fee_config.observe_actual_fee(expected, actual_fee); let deviation_pct = (ratio - 1.0).abs() * 100.0; let status = if deviation_pct < 1.0 { "OK" } else if deviation_pct < 5.0 { "WARN" } else { "ERROR" }; + let order_type = if is_maker { "MAKER" } else { "TAKER" }; + let rate_used = if is_maker { self.fee_config.maker_rate } else { self.fee_config.taker_rate }; json!({ + "order_type": order_type, "fill_price": fill_price, "fill_qty": fill_qty, + "rate_used": rate_used, "expected_fee": expected, "actual_fee": actual_fee, + "is_rebate": actual_fee < 0.0, "ratio": ratio, "deviation_pct": deviation_pct, "calibration_status": status, - "calibrated_taker_rate": self.fee_config.taker_rate, "calibration_ratio": self.fee_config.calibration_ratio, "calibration_samples": self.fee_config.calibration_samples, }) @@ -787,6 +872,22 @@ struct KernelResult { snapshot: KernelSnapshot, } +/// Full serialisable kernel state. Distinct from the lightweight read-only KernelSnapshot +/// used for Python polling. This one drives crash-recovery / session handoff. +#[derive(Serialize, Deserialize, Clone, Debug)] +struct KernelFullSnapshot { + /// Schema version — restore rejects a snapshot whose version != KERNEL_SNAPSHOT_VERSION. + version: u32, + /// Slot count at the time of save (must match max_slots at restore time). + max_slots: usize, + /// Full slot state including FSM stage, sizes, order ids, seen-event dedup. + slots: Vec, + /// Full AccountState including K/E values, fee config + calibration, capital_frozen. + account: AccountState, + /// Wall-clock ms when the snapshot was taken. + snapshot_ts_ms: u64, +} + #[derive(Debug, Default)] struct KernelCore { slots: Vec, @@ -815,17 +916,75 @@ impl KernelCore { core } + /// Serialise the full kernel state to a JSON string for persistence / crash recovery. + fn save_full_snapshot(&self) -> Result { + let ts_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + let snap = KernelFullSnapshot { + version: KERNEL_SNAPSHOT_VERSION, + max_slots: self.slots.len(), + slots: self.slots.clone(), + account: self.account.clone(), + snapshot_ts_ms: ts_ms, + }; + serde_json::to_string(&snap).map_err(|e| e.to_string()) + } + + /// Restore kernel state from a previously saved snapshot JSON. + /// Validates version + slot count. Returns Err with reason on rejection. + fn restore_full_snapshot(&mut self, json_str: &str) -> Result<(), String> { + let snap: KernelFullSnapshot = serde_json::from_str(json_str) + .map_err(|e| format!("parse error: {e}"))?; + if snap.version != KERNEL_SNAPSHOT_VERSION { + return Err(format!( + "version mismatch: snapshot={} kernel={}", + snap.version, KERNEL_SNAPSHOT_VERSION + )); + } + if snap.slots.len() != self.slots.len() { + return Err(format!( + "slot count mismatch: snapshot={} kernel={}", + snap.slots.len(), self.slots.len() + )); + } + // Validate capital is finite before accepting. + if !snap.account.k_capital.is_finite() { + return Err("snapshot account.k_capital is non-finite".into()); + } + self.slots = snap.slots; + self.account = snap.account; + self.rebuild_indexes(); + Ok(()) + } + fn on_account_event(&mut self, payload: &str) -> Value { let parsed: Value = match serde_json::from_str(payload) { Ok(v) => v, Err(e) => return json!({"error": format!("parse: {}", e)}), }; let kind = parsed.get("kind").and_then(|v| v.as_str()).unwrap_or("").to_uppercase(); + // Account-event idempotency: dedup by event_id before any mutation. + // PREDICTED_FILL is excluded from dedup (stateless preview, not a settled fact). + let event_id = parsed.get("event_id").and_then(|v| v.as_str()).unwrap_or(""); + if !matches!(kind.as_str(), "PREDICTED_FILL") && self.account.is_duplicate_account_event(event_id) { + let mut v = serde_json::to_value(&self.account).unwrap_or(json!({})); + if let Some(obj) = v.as_object_mut() { + obj.insert("k_capital".to_string(), json!(self.account.k_capital)); + obj.insert("available_capital".to_string(), json!(self.account.available_capital)); + obj.insert("k_net_fees".to_string(), json!(self.account.k_fees_paid)); + obj.insert("duplicate_event".to_string(), json!(true)); + obj.insert("capital_frozen".to_string(), json!(self.account.capital_frozen)); + } + return v; + } match kind.as_str() { "FILL_SETTLED" => { let realized = parsed.get("realized_pnl").and_then(|v| v.as_f64()).unwrap_or(0.0); let fee = parsed.get("fee").and_then(|v| v.as_f64()).unwrap_or(0.0); - self.account.apply_fill_settled(realized, fee); + let is_maker = parsed.get("is_maker").and_then(|v| v.as_bool()).unwrap_or(false); + self.account.apply_fill_settled(realized, fee, is_maker); } "ACCOUNT_UPDATE" => { let wb = parsed.get("wallet_balance").and_then(|v| v.as_f64()).unwrap_or(0.0); @@ -839,17 +998,23 @@ impl KernelCore { self.account.apply_funding_fee(amount); } "PREDICTED_FILL" => { - // Called from on_venue_event immediately when a fill arrives — - // pre-folds realized PnL + predicted fee so K tracks E without - // waiting for the WS FILL_SETTLED event. let fill_price = parsed.get("fill_price").and_then(|v| v.as_f64()).unwrap_or(0.0); let fill_qty = parsed.get("fill_qty").and_then(|v| v.as_f64()).unwrap_or(0.0); let realized = parsed.get("realized_pnl").and_then(|v| v.as_f64()).unwrap_or(0.0); - self.account.apply_predicted_fill(fill_price, fill_qty, realized); + let is_maker = parsed.get("is_maker").and_then(|v| v.as_bool()).unwrap_or(false); + self.account.apply_predicted_fill(fill_price, fill_qty, realized, is_maker); } _ => return json!({"error": format!("unknown account event kind: {}", kind)}), } - serde_json::to_value(&self.account).unwrap_or(json!({"error":"serialize"})) + // Merge computed fields into the serialised struct + let mut v = serde_json::to_value(&self.account).unwrap_or(json!({})); + if let Some(obj) = v.as_object_mut() { + obj.insert("k_capital".to_string(), json!(self.account.k_capital)); + obj.insert("available_capital".to_string(), json!(self.account.available_capital)); + obj.insert("k_net_fees".to_string(), json!(self.account.k_fees_paid)); + obj.insert("capital_frozen".to_string(), json!(self.account.capital_frozen)); + } + v } fn snapshot(&self) -> KernelSnapshot { @@ -1050,6 +1215,26 @@ impl KernelCore { }; } let mut slot = self.slots[slot_id as usize].clone(); + // Capital breach HALT: block new ENTERs when reconcile ERROR is active. + if matches!(intent.action, KernelCommandType::ENTER) && self.account.capital_frozen { + return KernelResult { + outcome: KernelOutcome { + accepted: false, + slot_id: slot.slot_id, + trade_id: intent.trade_id.clone(), + state: slot.fsm_state.clone(), + diagnostic_code: KernelDiagnosticCode::CAPITAL_FROZEN, + details: json!({ + "reason": "CAPITAL_FROZEN", + "reconcile_status": self.account.reconcile_status, + "reconcile_delta": self.account.reconcile_delta, + }).as_object().cloned().unwrap_or_default(), + ..KernelOutcome::default() + }, + slot: slot.clone(), + snapshot: self.snapshot(), + }; + } if matches!(intent.action, KernelCommandType::ENTER) { if !slot.is_free() && !slot.trade_id.is_empty() && slot.trade_id != intent.trade_id { return KernelResult { @@ -1569,8 +1754,19 @@ impl KernelCore { } } KernelEventKind::CANCEL_REJECT => { - if slot.fsm_state == TradeStage::EXIT_WORKING { - slot.fsm_state = TradeStage::EXIT_WORKING; + // Exchange rejected the cancel request. The exit order + // reference is now stale (already filled, already cancelled, + // or unknown). Clear it and return to POSITION_OPEN so the + // algo/operator can retry the exit; reconcile will re-anchor + // true exchange state on the next pump. + if matches!( + slot.fsm_state, + TradeStage::EXIT_WORKING + | TradeStage::EXIT_REQUESTED + | TradeStage::EXIT_SENT + ) { + slot.active_exit_order = None; + slot.fsm_state = TradeStage::POSITION_OPEN; } diagnostic_code = KernelDiagnosticCode::CANCEL_REJECTED; } @@ -1762,7 +1958,7 @@ impl KernelCore { slot.last_event_time = Some(event.timestamp); let all_legs_done = slot.active_leg_index >= slot.exit_leg_ratios.len(); - let should_close = (slot.size <= 1e-12 || (!partial && all_legs_done)); + let should_close = slot.size <= 1e-12 || (!partial && all_legs_done); if !partial { slot.consume_exit_leg(); @@ -1861,8 +2057,27 @@ where if handle.is_null() { return Err("NULL_HANDLE".to_string()); } - let handle = unsafe { &mut *handle }; - f(&mut handle.core) + // Safety: single-threaded; caller holds exclusive access for the duration. + let core = unsafe { &mut (*handle).core }; + // Catch any Rust panics at the FFI boundary so the Python process survives. + // All state inside the KernelCore is not poisoned on panic — the slot/account + // mutation that panicked may be partially applied, so the kernel will produce + // a reconcile WARN/ERROR on the next E-fact sync, which triggers the capital-frozen + // halt preventing further ENTERs until the operator reconciles. + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| f(core))) { + Ok(result) => result, + Err(panic_payload) => { + let msg = if let Some(s) = panic_payload.downcast_ref::<&str>() { + format!("KERNEL_PANIC: {s}") + } else if let Some(s) = panic_payload.downcast_ref::() { + format!("KERNEL_PANIC: {s}") + } else { + "KERNEL_PANIC: unknown".to_string() + }; + eprintln!("[KERNEL PANIC caught at FFI boundary] {msg}"); + Err(msg) + } + } } #[no_mangle] @@ -2114,7 +2329,8 @@ pub extern "C" fn dita_kernel_calibrate_fee_json( let fill_price = parsed.get("fill_price").and_then(|v| v.as_f64()).unwrap_or(0.0); let fill_qty = parsed.get("fill_qty").and_then(|v| v.as_f64()).unwrap_or(0.0); let actual_fee = parsed.get("actual_fee").and_then(|v| v.as_f64()).unwrap_or(0.0); - match with_handle_mut(handle, |core| Ok(core.account.calibrate_fee(fill_price, fill_qty, actual_fee))) { + let is_maker = parsed.get("is_maker").and_then(|v| v.as_bool()).unwrap_or(false); + match with_handle_mut(handle, |core| Ok(core.account.calibrate_fee(fill_price, fill_qty, actual_fee, is_maker))) { Ok(result) => { let s = serde_json::to_string(&result).unwrap_or_else(|_| "{}".to_string()); into_c_string(&s) @@ -2175,6 +2391,59 @@ pub extern "C" fn dita_kernel_on_account_event_json( } } +// ─────────────────────────────────────────────────────────────────────────── +// Full-state snapshot / restore (session continuity + crash recovery) +// ─────────────────────────────────────────────────────────────────────────── + +/// Serialise the complete kernel state (slots + account + fee calibration) to JSON. +/// +/// The JSON is opaque — pass it verbatim to dita_kernel_restore_json on the next +/// session. Returns NULL on handle error. Caller must free with dita_kernel_free_string. +#[no_mangle] +pub extern "C" fn dita_kernel_save_state_json(handle: *mut KernelHandle) -> *mut c_char { + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + with_handle_mut(handle, |core| core.save_full_snapshot()) + })) { + Ok(Ok(json_str)) => into_c_string(&json_str), + Ok(Err(e)) => { eprintln!("[KERNEL] save_state_json error: {e}"); ptr::null_mut() } + Err(e) => { eprintln!("[KERNEL PANIC] save_state_json: {:?}", e); ptr::null_mut() } + } +} + +/// Restore kernel state from a previously saved JSON blob. +/// +/// Returns 0 on success, -1 on version mismatch, slot-count mismatch, or parse error. +/// On failure the kernel state is unchanged. +#[no_mangle] +pub extern "C" fn dita_kernel_restore_state_json( + handle: *mut KernelHandle, + json_ptr: *const c_char, +) -> i32 { + let json_str = match unsafe { CStr::from_ptr(json_ptr) }.to_str() { + Ok(s) => s.to_string(), + Err(_) => return -1, + }; + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + with_handle_mut(handle, |core| core.restore_full_snapshot(&json_str).map_err(|e| e)) + })) { + Ok(Ok(())) => 0, + Ok(Err(e)) => { eprintln!("[KERNEL] restore_state_json rejected: {e}"); -1 } + Err(e) => { eprintln!("[KERNEL PANIC] restore_state_json: {:?}", e); -1 } + } +} + +/// Query whether the kernel's capital is currently frozen (reconcile ERROR active). +/// Returns 1 if frozen, 0 if not frozen, -1 on handle error. +#[no_mangle] +pub extern "C" fn dita_kernel_is_capital_frozen(handle: *mut KernelHandle) -> i32 { + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + with_handle_mut(handle, |core| Ok(core.account.capital_frozen)) + })) { + Ok(Ok(frozen)) => if frozen { 1 } else { 0 }, + _ => -1, + } +} + #[cfg(test)] mod tests { use super::*; @@ -2227,4 +2496,389 @@ mod tests { assert!(ack.outcome.accepted); assert_eq!(ack.slot.fsm_state, TradeStage::ENTRY_WORKING); } + + // ----------------------------------------------------------------------- + // ExchangeFeeConfig — predict_fee / observe_actual_fee + // ----------------------------------------------------------------------- + + #[test] + fn predict_fee_taker() { + let cfg = ExchangeFeeConfig::default(); // taker_rate=0.0005, cal_ratio=1.0 + let fee = cfg.predict_fee(100.0, 1.0, false); + assert!((fee - 0.05).abs() < 1e-10, "taker fee = notional * taker_rate = 0.05, got {fee}"); + } + + #[test] + fn predict_taker_fee_convenience_matches_predict_fee_false() { + // predict_taker_fee is the taker-only backward-compat convenience. + // Must always equal predict_fee(price, qty, false) for any input. + let cfg = ExchangeFeeConfig::default(); + let notionals: &[(f64, f64)] = &[(100.0, 1.0), (50_000.0, 0.002), (0.01, 1000.0)]; + for &(price, qty) in notionals { + let conv = cfg.predict_taker_fee(price, qty); + let direct = cfg.predict_fee(price, qty, false); + assert!( + (conv - direct).abs() < 1e-12, + "predict_taker_fee({price},{qty})={conv} != predict_fee(…,false)={direct}" + ); + } + } + + #[test] + fn predict_taker_fee_uses_taker_rate_not_maker() { + let cfg = ExchangeFeeConfig::default(); // taker=0.0005, maker=0.0002 + let taker_fee = cfg.predict_taker_fee(100.0, 1.0); + let maker_fee = cfg.predict_fee(100.0, 1.0, true); + assert!((taker_fee - 0.05).abs() < 1e-10, "taker path: 0.05, got {taker_fee}"); + assert!((maker_fee - 0.02).abs() < 1e-10, "maker path: 0.02, got {maker_fee}"); + assert!(taker_fee > maker_fee, "taker rate must exceed maker rate by default"); + } + + #[test] + fn predict_fee_maker_positive_rate() { + let cfg = ExchangeFeeConfig::default(); // maker_rate=0.0002 + let fee = cfg.predict_fee(100.0, 1.0, true); + assert!((fee - 0.02).abs() < 1e-10, "maker fee = notional * maker_rate = 0.02, got {fee}"); + } + + #[test] + fn predict_fee_maker_rebate_negative_rate() { + let mut cfg = ExchangeFeeConfig::default(); + cfg.maker_rate = -0.0001; // rebate schedule + let fee = cfg.predict_fee(100.0, 1.0, true); + // rebate: negative value → capital benefit + assert!((fee - (-0.01)).abs() < 1e-10, "rebate = -0.01, got {fee}"); + } + + #[test] + fn predict_fee_calibration_ratio_scales_output() { + let mut cfg = ExchangeFeeConfig::default(); // taker_rate=0.0005 + cfg.calibration_ratio = 1.2; + let fee = cfg.predict_fee(100.0, 1.0, false); + assert!((fee - 0.06).abs() < 1e-10, "calibrated taker fee = 0.06, got {fee}"); + } + + #[test] + fn observe_actual_fee_first_sample_direct() { + let mut cfg = ExchangeFeeConfig::default(); + // First sample: α=1.0 so calibration_ratio ← actual/expected + let ratio = cfg.observe_actual_fee(0.05, 0.055); + assert!((ratio - 1.1).abs() < 1e-10, "ratio=1.1, got {ratio}"); + assert!((cfg.calibration_ratio - 1.1).abs() < 1e-10); + assert_eq!(cfg.calibration_samples, 1); + } + + #[test] + fn observe_actual_fee_ema_second_sample() { + let mut cfg = ExchangeFeeConfig::default(); + cfg.observe_actual_fee(0.05, 0.05); // first: ratio=1.0, ratio=1.0 + // second: α=0.1; expected new_ratio=actual/expected=1.2 + // calibration_ratio = 1.0*(1-0.1) + 1.2*0.1 = 0.9+0.12 = 1.02 + cfg.observe_actual_fee(0.05, 0.06); + assert!((cfg.calibration_ratio - 1.02).abs() < 1e-9, "ema result=1.02, got {}", cfg.calibration_ratio); + assert_eq!(cfg.calibration_samples, 2); + } + + #[test] + fn observe_actual_fee_zero_expected_returns_one() { + let mut cfg = ExchangeFeeConfig::default(); + let ratio = cfg.observe_actual_fee(0.0, 5.0); + assert_eq!(ratio, 1.0); // guard: don't divide by near-zero + assert_eq!(cfg.calibration_samples, 0); // not counted + } + + // ----------------------------------------------------------------------- + // AccountState — apply_fill_settled (taker / maker / rebate) + // ----------------------------------------------------------------------- + + fn mk_account(seed: f64) -> AccountState { + let mut acc = AccountState::default(); + acc.seed_capital = seed; + acc.reconcile(); + acc + } + + #[test] + fn fill_settled_taker_fee_bucketed() { + let mut acc = mk_account(10_000.0); + acc.apply_fill_settled(0.0, 5.0, false); // taker + assert!((acc.k_taker_fees - 5.0).abs() < 1e-10, "k_taker_fees=5.0, got {}", acc.k_taker_fees); + assert_eq!(acc.k_maker_fees, 0.0); + assert_eq!(acc.k_maker_rebates, 0.0); + assert!((acc.k_fees_paid - 5.0).abs() < 1e-10); + assert!((acc.k_capital - 9_995.0).abs() < 1e-10); + } + + #[test] + fn fill_settled_maker_fee_positive_rate_bucketed() { + let mut acc = mk_account(10_000.0); + acc.apply_fill_settled(0.0, 2.0, true); // maker cost + assert!((acc.k_maker_fees - 2.0).abs() < 1e-10, "k_maker_fees=2.0, got {}", acc.k_maker_fees); + assert_eq!(acc.k_taker_fees, 0.0); + assert_eq!(acc.k_maker_rebates, 0.0); + assert!((acc.k_fees_paid - 2.0).abs() < 1e-10); + assert!((acc.k_capital - 9_998.0).abs() < 1e-10); + } + + #[test] + fn fill_settled_maker_rebate_negative_fee_bucketed() { + let mut acc = mk_account(10_000.0); + acc.apply_fill_settled(0.0, -1.5, true); // rebate: negative fee + assert!((acc.k_maker_rebates - 1.5).abs() < 1e-10, "k_maker_rebates=1.5, got {}", acc.k_maker_rebates); + assert_eq!(acc.k_taker_fees, 0.0); + assert_eq!(acc.k_maker_fees, 0.0); + // k_fees_paid = 0 + 0 - 1.5 = -1.5 (rebate > costs → negative net fee) + assert!((acc.k_fees_paid - (-1.5)).abs() < 1e-10, "k_fees_paid=-1.5, got {}", acc.k_fees_paid); + // rebate increases capital + assert!((acc.k_capital - 10_001.5).abs() < 1e-10, "capital=10001.5, got {}", acc.k_capital); + } + + #[test] + fn fill_settled_realized_pnl_with_taker_fee() { + let mut acc = mk_account(10_000.0); + acc.apply_fill_settled(200.0, 3.0, false); + // capital = seed + realized - fees = 10000 + 200 - 3 = 10197 + assert!((acc.k_capital - 10_197.0).abs() < 1e-10); + assert!((acc.k_realized_pnl - 200.0).abs() < 1e-10); + } + + #[test] + fn k_net_fees_formula_taker_plus_maker_minus_rebate() { + let mut acc = mk_account(10_000.0); + acc.apply_fill_settled(0.0, 5.0, false); // taker: 5.0 + acc.apply_fill_settled(0.0, 2.0, true); // maker: 2.0 + acc.apply_fill_settled(0.0, -1.0, true); // rebate: 1.0 + // k_fees_paid = 5 + 2 - 1 = 6 + assert!((acc.k_fees_paid - 6.0).abs() < 1e-10, "net fees=6.0, got {}", acc.k_fees_paid); + assert!((acc.k_taker_fees - 5.0).abs() < 1e-10); + assert!((acc.k_maker_fees - 2.0).abs() < 1e-10); + assert!((acc.k_maker_rebates - 1.0).abs() < 1e-10); + } + + // ----------------------------------------------------------------------- + // AccountState — apply_predicted_fill → apply_fill_settled reconcile + // ----------------------------------------------------------------------- + + #[test] + fn predicted_then_settled_taker_replaces_estimate() { + let mut acc = mk_account(10_000.0); + // PREDICTED_FILL: taker 100 @ 1.0 → predicted = 100*0.0005 = 0.05 + acc.apply_predicted_fill(100.0, 1.0, 0.0, false); + let k_after_predict = acc.k_taker_fees; + assert!((k_after_predict - 0.05).abs() < 1e-10, "predicted taker=0.05, got {k_after_predict}"); + // FILL_SETTLED: actual fee = 0.06 (small deviation) + acc.apply_fill_settled(0.0, 0.06, false); + // After settle, actual replaces predicted: k_taker_fees = 0.06 + assert!((acc.k_taker_fees - 0.06).abs() < 1e-10, "settled taker=0.06, got {}", acc.k_taker_fees); + } + + #[test] + fn predicted_then_settled_maker_rebate_path() { + let mut acc = mk_account(10_000.0); + acc.fee_config.maker_rate = -0.0001; // rebate schedule + // PREDICTED_FILL: maker 100 @ 1.0 → predicted = -0.01 (rebate) + acc.apply_predicted_fill(100.0, 1.0, 0.0, true); + assert!((acc.k_maker_rebates - 0.01).abs() < 1e-10, "predicted rebate=0.01, got {}", acc.k_maker_rebates); + // FILL_SETTLED: actual rebate = -0.015 + acc.apply_fill_settled(0.0, -0.015, true); + assert!((acc.k_maker_rebates - 0.015).abs() < 1e-10, "settled rebate=0.015, got {}", acc.k_maker_rebates); + } + + // ----------------------------------------------------------------------- + // KernelCore.on_account_event JSON dispatch + // ----------------------------------------------------------------------- + + #[test] + fn on_account_event_fill_settled_taker_via_json() { + let mut core = KernelCore::new(2); + let payload = r#"{"kind":"FILL_SETTLED","realized_pnl":50.0,"fee":3.0,"is_maker":false}"#; + let result = core.on_account_event(payload); + assert_eq!(result.get("k_taker_fees").and_then(|v| v.as_f64()), Some(3.0)); + assert_eq!(result.get("k_maker_fees").and_then(|v| v.as_f64()), Some(0.0)); + let k_net = result.get("k_net_fees").and_then(|v| v.as_f64()).unwrap_or(-99.0); + assert!((k_net - 3.0).abs() < 1e-10, "k_net_fees=3.0, got {k_net}"); + } + + #[test] + fn on_account_event_fill_settled_rebate_via_json() { + let mut core = KernelCore::new(2); + let payload = r#"{"kind":"FILL_SETTLED","realized_pnl":0.0,"fee":-2.0,"is_maker":true}"#; + let result = core.on_account_event(payload); + let rebates = result.get("k_maker_rebates").and_then(|v| v.as_f64()).unwrap_or(-99.0); + assert!((rebates - 2.0).abs() < 1e-10, "k_maker_rebates=2.0, got {rebates}"); + let k_net = result.get("k_net_fees").and_then(|v| v.as_f64()).unwrap_or(-99.0); + assert!((k_net - (-2.0)).abs() < 1e-10, "k_net_fees=-2.0 (pure rebate), got {k_net}"); + } + + #[test] + fn on_account_event_predicted_fill_via_json() { + let mut core = KernelCore::new(2); + let payload = r#"{"kind":"PREDICTED_FILL","fill_price":100.0,"fill_qty":1.0,"realized_pnl":0.0,"is_maker":false}"#; + let result = core.on_account_event(payload); + // predicted taker = 100 * 0.0005 = 0.05 + let taker = result.get("k_taker_fees").and_then(|v| v.as_f64()).unwrap_or(-99.0); + assert!((taker - 0.05).abs() < 1e-10, "predicted k_taker_fees=0.05, got {taker}"); + } + + // ----------------------------------------------------------------------- + // G2: Account-event dedup (IndexSet, 1024 entries) + // ----------------------------------------------------------------------- + + #[test] + fn duplicate_account_event_not_applied_twice() { + let mut core = KernelCore::new(2); + let p = r#"{"kind":"FILL_SETTLED","event_id":"fill-001","realized_pnl":0.0,"fee":5.0,"is_maker":false}"#; + core.on_account_event(p); + let r2 = core.on_account_event(p); + let taker = r2.get("k_taker_fees").and_then(|v| v.as_f64()).unwrap_or(-1.0); + assert!((taker - 5.0).abs() < 1e-10, "duplicate applied twice: k_taker_fees={taker}"); + assert_eq!(r2.get("duplicate_event").and_then(|v| v.as_bool()), Some(true)); + } + + #[test] + fn dedup_window_holds_beyond_64_events() { + // Old code evicted at 64; new IndexSet holds 1024. + let mut core = KernelCore::new(2); + for i in 0..100 { + let p = format!(r#"{{"kind":"FILL_SETTLED","event_id":"e-{i:04}","realized_pnl":0.0,"fee":1.0,"is_maker":false}}"#); + core.on_account_event(&p); + } + // Replay event 0 — must still be recognised as duplicate + let r = core.on_account_event(r#"{"kind":"FILL_SETTLED","event_id":"e-0000","realized_pnl":0.0,"fee":1.0,"is_maker":false}"#); + assert_eq!(r.get("duplicate_event").and_then(|v| v.as_bool()), Some(true), + "event e-0000 should still be deduped after 100 events"); + let fees = core.account.k_taker_fees; + assert!((fees - 100.0).abs() < 1e-9, "expected 100.0 not {fees}"); + } + + #[test] + fn dedup_evicts_oldest_at_capacity() { + let mut core = KernelCore::new(2); + // Fill exactly MAX_ACCOUNT_DEDUP_ENTRIES (1024) unique events + for i in 0..MAX_ACCOUNT_DEDUP_ENTRIES { + let p = format!(r#"{{"kind":"FILL_SETTLED","event_id":"ev-{i:05}","realized_pnl":0.0,"fee":0.0,"is_maker":false}}"#); + core.on_account_event(&p); + } + // Add one more — this evicts "ev-00000" + core.on_account_event(r#"{"kind":"FILL_SETTLED","event_id":"ev-01024","realized_pnl":0.0,"fee":0.0,"is_maker":false}"#); + // ev-00000 is now gone from dedup set; replay should NOT be flagged as duplicate + let r = core.on_account_event(r#"{"kind":"FILL_SETTLED","event_id":"ev-00000","realized_pnl":0.0,"fee":0.0,"is_maker":false}"#); + assert_ne!(r.get("duplicate_event").and_then(|v| v.as_bool()), Some(true), + "ev-00000 should have been evicted and not deduplicated"); + } + + // ----------------------------------------------------------------------- + // G3: KernelFullSnapshot save/restore + // ----------------------------------------------------------------------- + + #[test] + fn full_snapshot_round_trip_preserves_account() { + let mut core = KernelCore::new(4); + core.account.seed_capital = 10_000.0; + core.on_account_event(r#"{"kind":"FILL_SETTLED","event_id":"f1","realized_pnl":200.0,"fee":3.5,"is_maker":false}"#); + let json = core.save_full_snapshot().expect("save failed"); + + let mut core2 = KernelCore::new(4); + core2.restore_full_snapshot(&json).expect("restore failed"); + + assert!((core2.account.k_capital - core.account.k_capital).abs() < 1e-9); + assert!((core2.account.k_taker_fees - core.account.k_taker_fees).abs() < 1e-9); + assert!((core2.account.k_realized_pnl - core.account.k_realized_pnl).abs() < 1e-9); + } + + #[test] + fn full_snapshot_restores_dedup_set() { + let mut core = KernelCore::new(2); + core.on_account_event(r#"{"kind":"FILL_SETTLED","event_id":"saved-dup","realized_pnl":0.0,"fee":5.0,"is_maker":false}"#); + let json = core.save_full_snapshot().expect("save failed"); + + let mut core2 = KernelCore::new(2); + core2.restore_full_snapshot(&json).expect("restore failed"); + + // saved-dup must still be in the dedup set after restore + assert!(core2.account.is_duplicate_account_event("saved-dup"), + "saved-dup should be in dedup set after restore"); + } + + #[test] + fn full_snapshot_version_mismatch_rejected() { + let mut core = KernelCore::new(2); + let json = core.save_full_snapshot().expect("save"); + // Tamper with version + let tampered = json.replace("\"version\":1", "\"version\":999"); + let res = core.restore_full_snapshot(&tampered); + assert!(res.is_err(), "version mismatch must be rejected"); + } + + #[test] + fn full_snapshot_slot_count_mismatch_rejected() { + let mut core4 = KernelCore::new(4); + let json = core4.save_full_snapshot().expect("save"); + + let mut core2 = KernelCore::new(2); + let res = core2.restore_full_snapshot(&json); + assert!(res.is_err(), "slot count mismatch must be rejected"); + } + + // ----------------------------------------------------------------------- + // G4: capital_frozen — reconcile ERROR ↔ ENTER block + // ----------------------------------------------------------------------- + + #[test] + fn capital_frozen_false_initially() { + let core = KernelCore::new(2); + assert!(!core.account.capital_frozen); + } + + #[test] + fn capital_frozen_true_on_error_reconcile() { + let mut core = KernelCore::new(2); + core.account.seed_capital = 10_000.0; + // Inject a large balance divergence (>20.0); e_wallet_balance > 0 activates R1 + core.account.e_wallet_balance = 1_000.0; + core.account.reconcile(); + assert!(core.account.capital_frozen, + "reconcile_status=ERROR must set capital_frozen; status={}", + core.account.reconcile_status); + } + + #[test] + fn capital_frozen_clears_on_ok_reconcile() { + let mut core = KernelCore::new(2); + core.account.seed_capital = 10_000.0; + core.account.e_wallet_balance = 1_000.0; + core.account.reconcile(); // ERROR + assert!(core.account.capital_frozen); + // Fix divergence — match E to K + core.account.e_wallet_balance = 10_000.0; + core.account.reconcile(); // OK + assert!(!core.account.capital_frozen, + "capital_frozen must clear when reconcile = OK"); + } + + #[test] + fn capital_frozen_in_snapshot_round_trip() { + let mut core = KernelCore::new(2); + core.account.seed_capital = 10_000.0; + core.account.e_wallet_balance = 500.0; // delta=9500 → ERROR + core.account.reconcile(); + assert!(core.account.capital_frozen); + + let json = core.save_full_snapshot().expect("save"); + let mut core2 = KernelCore::new(2); + core2.restore_full_snapshot(&json).expect("restore"); + assert!(core2.account.capital_frozen, + "capital_frozen must survive snapshot round-trip"); + } + + // ----------------------------------------------------------------------- + // G1: catch_unwind in with_handle_mut (structural — tested by compile + normal use) + // ----------------------------------------------------------------------- + + #[test] + fn with_handle_mut_returns_err_on_null_handle() { + let res: Result = with_handle_mut(std::ptr::null_mut(), |_| Ok(true)); + assert!(res.is_err()); + assert_eq!(res.unwrap_err(), "NULL_HANDLE"); + } } diff --git a/prod/clean_arch/dita_v2/bingx_user_stream.py b/prod/clean_arch/dita_v2/bingx_user_stream.py index d435e99..108f22a 100644 --- a/prod/clean_arch/dita_v2/bingx_user_stream.py +++ b/prod/clean_arch/dita_v2/bingx_user_stream.py @@ -327,18 +327,28 @@ class BingxUserStream: "EXPIRED": ExchangeEventKind.CANCEL_ACK, } kind = kind_map.get(status, ExchangeEventKind.UNKNOWN) + # Maker detection: BingX WS uses "m" field (True = maker) in order updates. + # Falls back to order type field "o" (LIMIT=maker, MARKET=taker). + is_maker = bool(o.get("m") or ( + str(o.get("o") or o.get("type") or "MARKET").upper() == "LIMIT" + and status in {"FILLED", "PARTIALLY_FILLED"} + )) + # Fees: BingX sends commission as positive for costs, negative for rebates + raw_fee = _safe_float(o.get("n") or 0.0) + fee = raw_fee # may be negative (rebate) return ExchangeEvent( kind=kind, event_id=str(o.get("i") or o.get("orderId") or uuid.uuid4().hex), exchange_ts=ts, fill_price=_safe_float(o.get("L") or o.get("ap") or o.get("p")), fill_qty=_safe_float(o.get("l") or o.get("lastFilledQty") or 0.0), - fee=_safe_float(o.get("n") or 0.0), + fee=fee, fee_asset=str(o.get("N") or ""), realized_pnl=_safe_float(o.get("rp") or o.get("realizedPnl") or 0.0), order_id=str(o.get("i") or ""), client_order_id=str(o.get("c") or ""), symbol=str(o.get("s") or ""), + is_maker=is_maker, source="ws", raw=frame, ) diff --git a/prod/clean_arch/dita_v2/bingx_venue.py b/prod/clean_arch/dita_v2/bingx_venue.py new file mode 100644 index 0000000..9c8de29 --- /dev/null +++ b/prod/clean_arch/dita_v2/bingx_venue.py @@ -0,0 +1,612 @@ +"""DITAv2 BingX venue adapter. + +This is a thin normalization layer over the existing direct BingX execution +surface. It converts BingX REST/account/order payloads into DITAv2 +``VenueEvent`` / ``VenueOrder`` objects without reimplementing exchange logic. +""" + +from __future__ import annotations + +import asyncio +import concurrent.futures +import inspect +import itertools +import re +import threading +from datetime import datetime, timezone +from typing import Any, Iterable, List, Optional + +from prod.clean_arch.dita import DecisionAction as LegacyDecisionAction +from prod.clean_arch.dita import Intent as LegacyIntent +from prod.clean_arch.dita import TradeSide as LegacyTradeSide + +from prod.bingx.http import BingxHttpError + +from .contracts import ( + KernelCommandType, + KernelEventKind, + KernelIntent, + TradeSide, + VenueEvent, + VenueEventStatus, + VenueOrder, + VenueOrderStatus, +) +from .utils import json_safe +from .utils import safe_float +from .venue import VenueAdapter + + +def _row_text(row: dict[str, Any], *keys: str, default: str = "") -> str: + for key in keys: + value = row.get(key) + if value is None: + continue + text = str(value) + if text: + return text + return default + + +def _row_float(row: dict[str, Any], *keys: str, default: float = 0.0) -> float: + for key in keys: + try: + value = float(row.get(key) or 0.0) + except Exception: + continue + if value == value and value not in (float("inf"), float("-inf")) and value != 0.0: + return value + return default + + +def _normalize_status(status: str) -> str: + return str(status or "").strip().upper() + + +def _trade_side_from_row(row: dict[str, Any], *, fallback: TradeSide = TradeSide.FLAT) -> TradeSide: + side_raw = _row_text(row, "side", "positionSide", default="").upper() + signed_qty = _row_float(row, "positionAmt", "positionQty", "positionSize", "quantity", "pa", default=0.0) + if side_raw in {"BUY", "LONG"}: + return TradeSide.LONG + if side_raw in {"SELL", "SHORT"}: + return TradeSide.SHORT + if signed_qty < 0: + return TradeSide.SHORT + if signed_qty > 0: + return TradeSide.LONG + return fallback + + +def _venue_event_status_from_row(status: str) -> VenueEventStatus: + normalized = _normalize_status(status) + if normalized in {"NEW", "ACKED", "PENDING", "CREATED"}: + return VenueEventStatus.ACKED + if normalized in {"RATE_LIMITED", "THROTTLED"}: + return VenueEventStatus.RATE_LIMITED + if normalized in {"PARTIALLY_FILLED", "PARTIAL_FILL"}: + return VenueEventStatus.PARTIALLY_FILLED + if normalized in {"FILLED", "FULL_FILL"}: + return VenueEventStatus.FILLED + if normalized in {"CANCELED", "CANCELLED", "EXPIRED"}: + return VenueEventStatus.CANCELED + if normalized in {"REJECTED", "FAILED"}: + return VenueEventStatus.REJECTED + if normalized in {"CANCEL_REJECTED", "CANCEL_REJECT"}: + return VenueEventStatus.CANCELED_REJECTED + return VenueEventStatus.ACKED + + +def _venue_order_status_from_row(status: str) -> VenueOrderStatus: + normalized = _normalize_status(status) + if normalized in {"NEW", "ACKED", "PENDING", "CREATED"}: + return VenueOrderStatus.NEW + if normalized in {"RATE_LIMITED", "THROTTLED"}: + return VenueOrderStatus.NEW + if normalized in {"PARTIALLY_FILLED", "PARTIAL_FILL"}: + return VenueOrderStatus.PARTIALLY_FILLED + if normalized in {"FILLED", "FULL_FILL"}: + return VenueOrderStatus.FILLED + if normalized in {"CANCELED", "CANCELLED", "EXPIRED"}: + return VenueOrderStatus.CANCELED + if normalized in {"REJECTED", "FAILED"}: + return VenueOrderStatus.REJECTED + return VenueOrderStatus.NEW + + +def _position_qty(row: dict[str, Any]) -> float: + qty = _row_float(row, "positionAmt", "positionQty", "positionSize", "quantity", "pa", default=0.0) + if qty != 0.0: + return abs(qty) + return abs(_row_float(row, "executedQty", "filledQty", "z", default=0.0)) + + +def _position_price(row: dict[str, Any]) -> float: + return _row_float(row, "entryPrice", "avgPrice", "avgEntryPrice", "ep", "ap", "price", "lastFillPrice", "tradePrice") + + +def _mapping_for_snapshot(rows: Iterable[dict[str, Any]]) -> dict[str, dict[str, Any]]: + mapping: dict[str, dict[str, Any]] = {} + for row in rows: + client_id = _row_text(row, "clientOrderID", "clientOrderId", default="") + order_id = _row_text(row, "orderId", "orderID", "id", default="") + key = client_id or order_id + if key: + mapping[key] = dict(row) + if order_id and order_id not in mapping: + mapping[order_id] = dict(row) + return mapping + + +def _venue_order_from_row( + row: dict[str, Any], + *, + internal_trade_id: str = "", + fallback_side: TradeSide = TradeSide.FLAT, +) -> VenueOrder: + side = _trade_side_from_row(row, fallback=fallback_side) + client_id = _row_text(row, "clientOrderID", "clientOrderId", default="") + order_id = _row_text(row, "orderId", "orderID", "id", default="") + intended = _row_float(row, "origQty", "quantity", "q", "positionAmt", "positionQty", default=0.0) + if intended <= 0: + intended = _position_qty(row) + return VenueOrder( + internal_trade_id=internal_trade_id or client_id or order_id, + venue_order_id=order_id, + venue_client_id=client_id, + side=side, + intended_size=abs(float(intended or 0.0)), + filled_size=abs(_row_float(row, "executedQty", "filledQty", "z", "lastFilledQty", default=0.0)), + average_fill_price=_position_price(row), + status=_venue_order_status_from_row(_row_text(row, "status", "X", default="NEW")), + metadata={"raw": dict(row)}, + ) + + +def _event_id(seq: itertools.count) -> str: + return f"EV-{next(seq):08d}" + + +def _rate_limit_retry_after_ms(row: dict[str, Any]) -> int: + raw_retry = row.get("retryAfter") or row.get("retry_after_ms") or row.get("retryAfterMs") + if raw_retry is None: + msg = _row_text(row, "msg", "message", default="") + match = re.search(r"unblocked after (\d+)", msg) + if match: + try: + ts = int(match.group(1)) + now_ms = int(datetime.now(timezone.utc).timestamp() * 1000) + return max(0, ts - now_ms) + except Exception: + return 0 + return 0 + try: + return max(0, int(float(raw_retry))) + except Exception: + return 0 + + +class BingxVenueAdapter(VenueAdapter): + """Normalizes BingX execution responses into DITAv2 venue events.""" + + # Shared thread-pool executor reused across all adapter instances and + # all calls. Threads are created once and recycled, eliminating the + # per-call creation/destruction overhead of the old pattern. + _EXECUTOR: concurrent.futures.ThreadPoolExecutor | None = None + _EXECUTOR_LOCK: threading.Lock = threading.Lock() + + @classmethod + def _get_executor(cls) -> concurrent.futures.ThreadPoolExecutor: + if cls._EXECUTOR is None: + with cls._EXECUTOR_LOCK: + if cls._EXECUTOR is None: + # max_workers=3 so three concurrent HTTP calls (balance, + # positions, openOrders) can proceed simultaneously without + # serialising on the pool. + cls._EXECUTOR = concurrent.futures.ThreadPoolExecutor( + max_workers=3, + thread_name_prefix="bingx_adapter", + ) + return cls._EXECUTOR + + def __init__(self, backend: Any | None = None, *, config: Any | None = None) -> None: + if backend is None: + if config is None: + raise ValueError("BingxVenueAdapter requires a backend or config") + from prod.clean_arch.adapters.bingx_direct import BingxDirectExecutionAdapter + + backend = BingxDirectExecutionAdapter(config) + self.backend = backend + self._event_seq = itertools.count(1) + # Thread-safe snapshot cache — reads from a snapshot may arrive from + # the kernel thread while _backend_snapshot writes from the pool thread. + self._snap_lock = threading.Lock() + self._last_snapshot = None + self._snapshot_ready = threading.Event() + self._snapshot_ready.set() # initially ready (no pending write) + + # Maximum seconds to wait for a single backend HTTP call. BingX REST + # round-trips are ~0.5–2 s in normal conditions; 30 s is generous enough + # to survive transient slowdowns without hanging the process forever (O5). + _BACKEND_TIMEOUT_S: float = 30.0 + + def _run(self, result: Any) -> Any: + if inspect.isawaitable(result): + try: + asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(result) + # Inside a running event loop: submit to the shared singleton + # executor so threads are reused across calls. + pool = self._get_executor() + try: + return pool.submit(asyncio.run, result).result(timeout=self._BACKEND_TIMEOUT_S) + except TimeoutError as exc: + raise TimeoutError( + f"BingX backend call exceeded {self._BACKEND_TIMEOUT_S}s timeout" + ) from exc + return result + + def _call_backend(self, method_name: str, *args: Any, **kwargs: Any) -> Any: + method = getattr(self.backend, method_name, None) + if method is None: + raise AttributeError(f"backend has no method {method_name}") + return self._run(method(*args, **kwargs)) + + def _backend_snapshot(self, *, include_history: bool = False, timeout_ms: float = 5000.0): + """Fetch a fresh snapshot from the backend and cache it thread-safely. + + Design (industry best-practice reader-writer pattern): + - A caller that needs a fresh snapshot *waits* on ``_snapshot_ready`` + before reading, so it never sees a stale partial write. + - While a snapshot fetch is in-flight, the lock is cleared; concurrent + callers block on ``_snapshot_ready`` with a timeout. If the fetch + succeeds in time they get the fresh snapshot; if it times out they + fall back to ``_last_snapshot`` (an eventually-consistent design — + stale data that *was* consistent is safer than no data). + - The write is guarded by ``_snap_lock`` so concurrent writes are + serialised and ``_last_snapshot`` is never partially assigned. + """ + if not self._snapshot_ready.wait(timeout=timeout_ms / 1000.0): + # Timeout waiting for a previous snapshot write — return the + # last-known-good snapshot rather than blocking the caller. + with self._snap_lock: + return self._last_snapshot + + self._snapshot_ready.clear() + try: + snapshot = self._call_backend("refresh_state", None, include_history=include_history) + except Exception: + self._snapshot_ready.set() + raise + + with self._snap_lock: + self._last_snapshot = snapshot + self._snapshot_ready.set() + return snapshot + + @staticmethod + def _legacy_intent(intent: KernelIntent) -> LegacyIntent: + action = LegacyDecisionAction.ENTER if intent.action == KernelCommandType.ENTER else LegacyDecisionAction.EXIT + side = LegacyTradeSide.SHORT if intent.side == TradeSide.SHORT else LegacyTradeSide.LONG + metadata = dict(intent.metadata) + metadata["_order_type"] = getattr(intent, "order_type", "MARKET") + metadata["_limit_price"] = float(getattr(intent, "limit_price", 0.0) or 0.0) + return LegacyIntent( + timestamp=intent.timestamp, + trade_id=intent.trade_id, + decision_id=intent.intent_id, + asset=intent.asset, + action=action, + side=side, + reason=intent.reason, + target_size=float(intent.target_size), + leverage=float(intent.leverage), + reference_price=float(intent.reference_price), + confidence=1.0, + bars_held=0, + exit_leg_ratios=tuple(intent.exit_leg_ratios or (1.0,)), + metadata=metadata, + ) + + def connect(self) -> bool: + result = getattr(self.backend, "connect", None) + if result is not None: + self._run(result()) + self._backend_snapshot(include_history=True) + return True + + def cancel(self, order: VenueOrder, *, reason: str = "") -> List[VenueEvent]: + snapshot_before = self._backend_snapshot(include_history=True) + response = None + if hasattr(self.backend, "cancel_order"): + response = self._call_backend("cancel_order", order, reason=reason) + elif hasattr(self.backend, "cancel"): + response = self._call_backend("cancel", order, reason=reason) + else: + client = getattr(self.backend, "_client", None) + instrument_symbol = "" + if hasattr(self.backend, "_instrument_venue_symbol"): + asset = str(order.metadata.get("asset") or "") + if not asset: + slot_id = int(order.metadata.get("slot_id", 0) or 0) + if hasattr(self, "_kernel_ref") and self._kernel_ref is not None: + try: + asset = self._kernel_ref.slot(slot_id).asset + except Exception: + pass + if not asset: + asset = str(order.metadata.get("asset") or "") + instrument_symbol = str(self.backend._instrument_venue_symbol(asset)) if asset else "" + if client is None or not instrument_symbol: + raise RuntimeError("backend does not expose a cancel surface") + params = {"symbol": instrument_symbol} + if order.venue_order_id: + params["orderId"] = order.venue_order_id + else: + params["clientOrderId"] = order.venue_client_id + try: + response = self._run(client.signed_delete("/openApi/swap/v2/trade/order", params)) + except BingxHttpError as exc: + response = {"status": "REJECTED", "msg": str(exc), "orderId": order.venue_order_id, "clientOrderId": order.venue_client_id} + snapshot_after = self._backend_snapshot(include_history=True) + return self._events_from_cancel(order, response, snapshot_before, snapshot_after, reason=reason) + + def open_orders(self) -> List[VenueOrder]: + snapshot = self._backend_snapshot(include_history=False) + return [_venue_order_from_row(row) for row in (snapshot.open_orders or [])] + + def open_positions(self) -> List[dict[str, Any]]: + snapshot = self._backend_snapshot(include_history=False) + return [dict(row) for row in (snapshot.open_positions or {}).values()] + + def reconcile(self) -> List[VenueEvent]: + snapshot = self._backend_snapshot(include_history=True) + return self._events_from_snapshot(snapshot) + + def submit(self, intent: KernelIntent) -> List[VenueEvent]: + snapshot_before = self._backend_snapshot(include_history=True) + receipt = self._call_backend("submit_intent", self._legacy_intent(intent)) + snapshot_after = self._backend_snapshot(include_history=True) + return self._events_from_submit(intent, receipt, snapshot_before, snapshot_after) + + def _events_from_submit(self, intent: KernelIntent, receipt: Any, before, after) -> List[VenueEvent]: # noqa: ANN001 + ack_row = dict(getattr(receipt, "raw_ack", {}) or {}) + status = _normalize_status(getattr(receipt, "status", "") or _row_text(ack_row, "status", default="NEW")) + order_id = _row_text(ack_row, "orderId", "orderID", default=str(getattr(receipt, "order_id", "") or "")) + client_order_id = _row_text(ack_row, "clientOrderID", "clientOrderId", default=str(getattr(receipt, "client_order_id", "") or intent.intent_id)) + if status in {"RATE_LIMITED", "THROTTLED"}: + return [ + VenueEvent( + timestamp=getattr(receipt, "timestamp", datetime.now(timezone.utc)), + event_id=_event_id(self._event_seq), + trade_id=intent.trade_id, + slot_id=intent.slot_id, + kind=KernelEventKind.RATE_LIMITED, + status=VenueEventStatus.RATE_LIMITED, + venue_order_id=order_id, + venue_client_id=client_order_id, + side=intent.side, + asset=intent.asset, + price=safe_float(getattr(receipt, "price", 0.0), 0.0), + size=float(intent.target_size or 0.0), + filled_size=0.0, + remaining_size=float(intent.target_size or 0.0), + reason=_row_text(ack_row, "msg", "message", default="BINGX_RATE_LIMITED"), + raw_payload=ack_row or json_safe(receipt), + metadata={"intent_id": intent.intent_id, "action": intent.action.value, "retry_after_ms": _rate_limit_retry_after_ms(ack_row)}, + ) + ] + base_event = VenueEvent( + timestamp=getattr(receipt, "timestamp", datetime.now(timezone.utc)), + event_id=_event_id(self._event_seq), + trade_id=intent.trade_id, + slot_id=intent.slot_id, + kind=KernelEventKind.ORDER_ACK, + status=VenueEventStatus.ACKED, + venue_order_id=order_id, + venue_client_id=client_order_id, + side=intent.side, + asset=intent.asset, + price=safe_float(getattr(receipt, "price", 0.0), 0.0), + size=float(intent.target_size or 0.0), + filled_size=0.0, + remaining_size=float(intent.target_size or 0.0), + reason="", + raw_payload=ack_row or json_safe(receipt), + metadata={"intent_id": intent.intent_id, "action": intent.action.value}, + ) + if status in {"REJECTED", "FAILED"}: + return [ + VenueEvent( + **{**base_event.__dict__, "event_id": _event_id(self._event_seq), "kind": KernelEventKind.ORDER_REJECT, "status": VenueEventStatus.REJECTED, "reason": _row_text(ack_row, "msg", "message", default="BINGX_ORDER_REJECTED")}, + ) + ] + events = [base_event] + fill_status = _venue_event_status_from_row(status) + filled_size = _row_float(ack_row, "executedQty", "cumFilledQty", "filledQty", "lastFilledQty", default=0.0) + snapshot_fill_size = self._filled_size_from_snapshots(before, after, intent.asset) + if filled_size <= 0: + filled_size = snapshot_fill_size + emit_fill = fill_status in {VenueEventStatus.PARTIALLY_FILLED, VenueEventStatus.FILLED} or snapshot_fill_size > 0.0 + if emit_fill: + if filled_size <= 0: + filled_size = float(intent.target_size or 0.0) + remaining_size = max(0.0, float(intent.target_size or 0.0) - float(filled_size)) + fill_kind = KernelEventKind.FULL_FILL if fill_status == VenueEventStatus.FILLED or remaining_size <= 1e-12 else KernelEventKind.PARTIAL_FILL + events.append( + VenueEvent( + timestamp=base_event.timestamp, + event_id=_event_id(self._event_seq), + trade_id=intent.trade_id, + slot_id=intent.slot_id, + kind=fill_kind, + status=VenueEventStatus.FILLED if fill_kind == KernelEventKind.FULL_FILL else VenueEventStatus.PARTIALLY_FILLED, + venue_order_id=order_id, + venue_client_id=client_order_id, + side=intent.side, + asset=intent.asset, + price=safe_float(_row_float(ack_row, "avgPrice", "ap", "price", "lastFillPrice", default=getattr(receipt, "price", 0.0)), 0.0), + size=float(intent.target_size or 0.0), + filled_size=float(filled_size), + remaining_size=float(remaining_size), + reason="", + raw_payload=ack_row or json_safe(receipt), + metadata={"intent_id": intent.intent_id, "action": intent.action.value}, + ) + ) + return events + + def _events_from_cancel(self, order: VenueOrder, response: Any, before, after, *, reason: str = "") -> List[VenueEvent]: # noqa: ANN001 + raw = response if isinstance(response, dict) else {} + status = _normalize_status(_row_text(raw, "status", default="CANCELED")) + if status in {"RATE_LIMITED", "THROTTLED"}: + return [ + VenueEvent( + timestamp=datetime.now(timezone.utc), + event_id=_event_id(self._event_seq), + trade_id=order.internal_trade_id or order.venue_client_id, + slot_id=int(order.metadata.get("slot_id", 0) or 0), + kind=KernelEventKind.RATE_LIMITED, + status=VenueEventStatus.RATE_LIMITED, + venue_order_id=order.venue_order_id, + venue_client_id=order.venue_client_id, + side=order.side, + asset=str(order.metadata.get("asset") or ""), + price=safe_float(_row_float(raw, "avgPrice", "ap", "price", "lastFillPrice", default=order.average_fill_price), 0.0), + size=float(order.intended_size or 0.0), + filled_size=float(order.filled_size or 0.0), + remaining_size=float(order.remaining_size), + reason=reason or _row_text(raw, "msg", "message", default="BINGX_RATE_LIMITED"), + raw_payload=raw or {"orderId": order.venue_order_id, "clientOrderId": order.venue_client_id, "status": status or "RATE_LIMITED"}, + metadata={**dict(order.metadata), "retry_after_ms": _rate_limit_retry_after_ms(raw)}, + ) + ] + event_status = _venue_event_status_from_row(status) + kind = KernelEventKind.CANCEL_ACK if event_status == VenueEventStatus.CANCELED else KernelEventKind.CANCEL_REJECT + if event_status == VenueEventStatus.CANCELED_REJECTED: + kind = KernelEventKind.CANCEL_REJECT + return [ + VenueEvent( + timestamp=datetime.now(timezone.utc), + event_id=_event_id(self._event_seq), + trade_id=order.internal_trade_id or order.venue_client_id, + slot_id=int(order.metadata.get("slot_id", 0) or 0), + kind=kind, + status=event_status, + venue_order_id=order.venue_order_id, + venue_client_id=order.venue_client_id, + side=order.side, + asset=str(order.metadata.get("asset") or ""), + price=safe_float(_row_float(raw, "avgPrice", "ap", "price", "lastFillPrice", default=order.average_fill_price), 0.0), + size=float(order.intended_size or 0.0), + filled_size=float(order.filled_size or 0.0), + remaining_size=float(order.remaining_size), + reason=reason or _row_text(raw, "msg", "message", default="BINGX_CANCEL_ACK" if kind == KernelEventKind.CANCEL_ACK else "BINGX_CANCEL_REJECT"), + raw_payload=raw or {"orderId": order.venue_order_id, "clientOrderId": order.venue_client_id, "status": status or event_status.value}, + metadata=dict(order.metadata), + ) + ] + + def _events_from_snapshot(self, snapshot: Any) -> List[VenueEvent]: # noqa: ANN001 + events: list[VenueEvent] = [] + seen: set[tuple[str, str, str]] = set() + for row in getattr(snapshot, "open_orders", []) or []: + if not isinstance(row, dict): + continue + event = self._event_from_row(row, slot_id=0) + key = (event.venue_client_id, event.venue_order_id, event.kind.value) + if key not in seen: + seen.add(key) + events.append(event) + for row in getattr(snapshot, "all_orders", []) or []: + if not isinstance(row, dict): + continue + event = self._event_from_row(row, slot_id=0) + key = (event.venue_client_id, event.venue_order_id, event.kind.value) + if key not in seen: + seen.add(key) + events.append(event) + for row in getattr(snapshot, "all_fills", []) or []: + if not isinstance(row, dict): + continue + event = self._fill_event_from_row(row) + key = (event.venue_client_id, event.venue_order_id, event.kind.value) + if key not in seen: + seen.add(key) + events.append(event) + return events + + def _event_from_row(self, row: dict[str, Any], *, slot_id: int) -> VenueEvent: + status = _normalize_status(_row_text(row, "status", "X", default="NEW")) + event_status = _venue_event_status_from_row(status) + kind = { + VenueEventStatus.ACKED: KernelEventKind.ORDER_ACK, + VenueEventStatus.PARTIALLY_FILLED: KernelEventKind.PARTIAL_FILL, + VenueEventStatus.FILLED: KernelEventKind.FULL_FILL, + VenueEventStatus.CANCELED: KernelEventKind.CANCEL_ACK, + VenueEventStatus.REJECTED: KernelEventKind.ORDER_REJECT, + VenueEventStatus.CANCELED_REJECTED: KernelEventKind.CANCEL_REJECT, + VenueEventStatus.RATE_LIMITED: KernelEventKind.RATE_LIMITED, + }.get(event_status, KernelEventKind.ORDER_ACK) + size = _row_float(row, "origQty", "quantity", "q", "positionAmt", default=0.0) + filled = _row_float(row, "executedQty", "cumFilledQty", "filledQty", "z", "lastFilledQty", default=0.0) + if filled <= 0.0 and kind in {KernelEventKind.PARTIAL_FILL, KernelEventKind.FULL_FILL}: + filled = size + return VenueEvent( + timestamp=datetime.now(timezone.utc), + event_id=_event_id(self._event_seq), + trade_id=_row_text(row, "tradeId", "trade_id", default=_row_text(row, "clientOrderId", "clientOrderID", default="")), + slot_id=slot_id, + kind=kind, + status=event_status, + venue_order_id=_row_text(row, "orderId", "orderID", "id", default=""), + venue_client_id=_row_text(row, "clientOrderID", "clientOrderId", "c", default=""), + side=_trade_side_from_row(row), + asset=_row_text(row, "symbol", default=""), + price=safe_float(_row_float(row, "avgPrice", "ap", "price", "lastFillPrice", default=0.0), 0.0), + size=abs(float(size or 0.0)), + filled_size=abs(float(filled or 0.0)), + remaining_size=max(0.0, abs(float(size or 0.0)) - abs(float(filled or 0.0))), + reason=_row_text(row, "msg", "message", default=""), + raw_payload=dict(row), + metadata={"source": "bingx"}, + ) + + def _fill_event_from_row(self, row: dict[str, Any]) -> VenueEvent: + status = _normalize_status(_row_text(row, "status", "X", default="FILLED")) + event_status = _venue_event_status_from_row(status) + kind = KernelEventKind.FULL_FILL if event_status == VenueEventStatus.FILLED else KernelEventKind.PARTIAL_FILL + return VenueEvent( + timestamp=datetime.now(timezone.utc), + event_id=_event_id(self._event_seq), + trade_id=_row_text(row, "tradeId", "trade_id", default=_row_text(row, "clientOrderId", "clientOrderID", default="")), + slot_id=0, + kind=kind, + status=event_status, + venue_order_id=_row_text(row, "orderId", "orderID", "id", default=""), + venue_client_id=_row_text(row, "clientOrderID", "clientOrderId", "c", default=""), + side=_trade_side_from_row(row), + asset=_row_text(row, "symbol", default=""), + price=safe_float(_row_float(row, "lastFillPrice", "L", "price", "ap", default=0.0), 0.0), + size=abs(_row_float(row, "executedQty", "z", "lastFilledQty", default=0.0)), + filled_size=abs(_row_float(row, "lastFilledQty", "l", "z", default=0.0)), + remaining_size=max(0.0, abs(_row_float(row, "executedQty", "z", "lastFilledQty", default=0.0)) - abs(_row_float(row, "lastFilledQty", "l", "z", default=0.0))), + reason=_row_text(row, "msg", "message", default=""), + raw_payload=dict(row), + metadata={"source": "bingx"}, + ) + + @staticmethod + def _filled_size_from_snapshots(before: Any, after: Any, asset: str) -> float: # noqa: ANN001 + def _lookup(snapshot: Any) -> float: + positions = getattr(snapshot, "open_positions", {}) or {} + for key, row in positions.items(): + symbol = _row_text(row, "symbol", default=str(key)) + if symbol.replace("-", "").replace("_", "").upper() == asset.replace("-", "").replace("_", "").upper(): + return _position_qty(row) + return 0.0 + + before_qty = _lookup(before) + after_qty = _lookup(after) + diff = abs(before_qty - after_qty) + return diff diff --git a/prod/clean_arch/dita_v2/exchange_event.py b/prod/clean_arch/dita_v2/exchange_event.py index ed9a0d2..593108b 100644 --- a/prod/clean_arch/dita_v2/exchange_event.py +++ b/prod/clean_arch/dita_v2/exchange_event.py @@ -82,6 +82,9 @@ class ExchangeEvent: funding_amount: float = 0.0 # positive = received, negative = paid funding_ts: int = 0 + # --- Order type / maker-taker --- + is_maker: bool = False # True when limit order rested and filled (maker) + # --- Source metadata --- source: str = "ws" # "ws" | "poll" raw: Dict = field(default_factory=dict) # original frame (debug only) diff --git a/prod/clean_arch/dita_v2/launcher.py b/prod/clean_arch/dita_v2/launcher.py new file mode 100644 index 0000000..df859c9 --- /dev/null +++ b/prod/clean_arch/dita_v2/launcher.py @@ -0,0 +1,359 @@ +"""Operator-facing bootstrap helpers for DITAv2. + +This module keeps the wiring explicit: +- control plane selection +- Zinc plane selection +- projection sink selection +- venue adapter selection + +The defaults stay safe and testable. Real shared-memory or live BingX wiring +is only enabled when the caller opts in via arguments or environment. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +import asyncio +import inspect +import os +from pathlib import Path +from typing import Any, Optional + +from dotenv import load_dotenv + +from prod.bingx.config import BingxExecClientConfig +from prod.bingx.config import BingxInstrumentProviderConfig +from prod.bingx.enums import BingxEnvironment + +from .bingx_venue import BingxVenueAdapter +from .control import BackendMode +from .control import ControlPlane +from .control import ControlUpdate +from .control import KernelControlSnapshot +from .control import KernelMode +from .control import KernelVerbosity +from .control import build_control_plane +from .mock_venue import MockVenueAdapter +from .mock_venue import MockVenueScenario +from .projection import HazelcastProjection +from .projection import build_projection +from .real_control_plane import RealZincControlPlane +from .real_control_plane import RealZincUnavailable +from .real_zinc_plane import RealZincPlane +from .real_zinc_plane import RealZincUnavailable as RealZincPlaneUnavailable +from .rust_backend import ExecutionKernel +from .venue import VenueAdapter +from .zinc_plane import InMemoryZincPlane +from .zinc_plane import ZincPlane + +PROJECT_ROOT = Path(__file__).resolve().parents[3] +load_dotenv(PROJECT_ROOT / ".env") + + +class LauncherVenueMode(str, Enum): + MOCK = "MOCK" + BINGX = "BINGX" + + +class LauncherZincMode(str, Enum): + IN_MEMORY = "IN_MEMORY" + REAL = "REAL" + + +@dataclass +class DITAv2LauncherBundle: + """Concrete runtime components assembled by the launcher.""" + + kernel: ExecutionKernel + control_plane: ControlPlane + projection: HazelcastProjection + zinc_plane: ZincPlane + venue: VenueAdapter + + def close(self) -> None: + _maybe_close(self.venue) + _maybe_close(self.zinc_plane) + _maybe_close(self.control_plane) + + +def _env_upper(name: str, default: str = "") -> str: + return str(os.environ.get(name, default)).strip().upper() + + +def _env_bool(name: str, default: bool = False) -> bool: + raw = os.environ.get(name) + if raw is None: + return default + return str(raw).strip().lower() in {"1", "true", "yes", "on"} + + +def _resolve_control_mode() -> KernelMode | None: + raw = _env_upper("DITA_V2_MODE", "") + if raw == KernelMode.DEBUG.value: + return KernelMode.DEBUG + if raw == KernelMode.NORMAL.value: + return KernelMode.NORMAL + return None + + +def _resolve_control_verbosity() -> KernelVerbosity | None: + raw = _env_upper("DITA_V2_VERBOSITY", "") + if raw == KernelVerbosity.TRACE.value: + return KernelVerbosity.TRACE + if raw == KernelVerbosity.VERBOSE.value: + return KernelVerbosity.VERBOSE + if raw == KernelVerbosity.QUIET.value: + return KernelVerbosity.QUIET + return None + + +def _resolve_backend_mode() -> BackendMode | None: + raw = _env_upper("DITA_V2_BACKEND_MODE", "") + if raw == BackendMode.BINGX.value: + return BackendMode.BINGX + if raw == BackendMode.MOCK.value: + return BackendMode.MOCK + return None + + +def _control_update_from_env() -> ControlUpdate | None: + fields: dict[str, Any] = {} + mode = _resolve_control_mode() + if mode is not None: + fields["mode"] = mode + verbosity = _resolve_control_verbosity() + if verbosity is not None: + fields["verbosity"] = verbosity + backend_mode = _resolve_backend_mode() + if backend_mode is not None: + fields["backend_mode"] = backend_mode + raw = os.environ.get("DITA_V2_DEBUG_CLICKHOUSE") + if raw is not None: + fields["debug_clickhouse_enabled"] = _env_bool("DITA_V2_DEBUG_CLICKHOUSE", True) + raw = os.environ.get("DITA_V2_TRACE_TRANSITIONS") + if raw is not None: + fields["trace_transitions"] = _env_bool("DITA_V2_TRACE_TRANSITIONS", False) + raw = os.environ.get("DITA_V2_MIRROR_TO_HAZELCAST") + if raw is not None: + fields["mirror_to_hazelcast"] = _env_bool("DITA_V2_MIRROR_TO_HAZELCAST", True) + raw = os.environ.get("DITA_V2_ACTIVE_SLOT_LIMIT") + if raw is not None: + try: + fields["active_slot_limit"] = max(1, int(str(raw).strip())) + except Exception: + pass + raw = os.environ.get("DITA_V2_RECONCILE_ON_RESTART") + if raw is not None: + fields["reconcile_on_restart"] = _env_bool("DITA_V2_RECONCILE_ON_RESTART", True) + return ControlUpdate(**fields) if fields else None + + +def _resolve_venue_mode(venue_mode: Optional[str] = None) -> LauncherVenueMode: + raw = _env_upper("DITA_V2_VENUE", venue_mode or LauncherVenueMode.MOCK.value) + if raw == LauncherVenueMode.BINGX.value: + return LauncherVenueMode.BINGX + return LauncherVenueMode.MOCK + + +def _resolve_zinc_mode(zinc_mode: Optional[str] = None) -> LauncherZincMode: + raw = _env_upper("DITA_V2_ZINC", zinc_mode or LauncherZincMode.IN_MEMORY.value) + if raw == LauncherZincMode.REAL.value: + return LauncherZincMode.REAL + return LauncherZincMode.IN_MEMORY + + +def _resolve_hazelcast_real(prefer_real_hazelcast: Optional[bool] = None) -> bool: + if prefer_real_hazelcast is not None: + return bool(prefer_real_hazelcast) + raw = _env_upper("DITA_V2_HAZELCAST", "") + return raw in {"REAL", "REAL_HZ", "HAZELCAST"} + + +def build_bingx_exec_client_config( + *, + environment: Optional[BingxEnvironment] = None, + allow_mainnet: Optional[bool] = None, + recv_window_ms: Optional[int] = None, + default_leverage: Optional[int] = None, + exchange_leverage_cap: Optional[int] = None, + prefer_websocket: Optional[bool] = None, + sizing_mode: Optional[str] = None, +) -> BingxExecClientConfig: + """Build the direct BingX config used by the DITAv2 launcher.""" + + resolved_environment = environment or ( + BingxEnvironment.LIVE if _env_upper("DOLPHIN_BINGX_ENV", "VST") == "LIVE" else BingxEnvironment.VST + ) + resolved_allow_mainnet = _env_bool("DOLPHIN_BINGX_ALLOW_MAINNET", False) if allow_mainnet is None else bool(allow_mainnet) + resolved_recv_window = int(os.environ.get("DOLPHIN_BINGX_RECV_WINDOW_MS", "5000")) if recv_window_ms is None else int(recv_window_ms) + resolved_default_leverage = int(os.environ.get("DOLPHIN_BINGX_DEFAULT_LEVERAGE", "1")) if default_leverage is None else int(default_leverage) + resolved_exchange_cap = int(os.environ.get("DOLPHIN_BINGX_EXCHANGE_LEVERAGE_CAP", "3")) if exchange_leverage_cap is None else int(exchange_leverage_cap) + resolved_prefer_ws = _env_bool("DOLPHIN_BINGX_PREFER_WEBSOCKET", False) if prefer_websocket is None else bool(prefer_websocket) + resolved_sizing_mode = sizing_mode or os.environ.get("DOLPHIN_BINGX_SIZING_MODE", "testnet") + return BingxExecClientConfig( + api_key=os.environ.get("BINGX_API_KEY"), + secret_key=os.environ.get("BINGX_SECRET_KEY"), + environment=resolved_environment, + allow_mainnet=resolved_allow_mainnet, + recv_window_ms=max(1, resolved_recv_window), + default_leverage=max(1, resolved_default_leverage), + exchange_leverage_cap=max(1, resolved_exchange_cap), + prefer_websocket=resolved_prefer_ws, + sizing_mode=resolved_sizing_mode, + journal_strategy=os.environ.get("DOLPHIN_BINGX_JOURNAL_STRATEGY", "dita_v2"), + journal_db=os.environ.get("DOLPHIN_BINGX_JOURNAL_DB", "dolphin_pink"), + instrument_provider=BingxInstrumentProviderConfig(load_all=True), + ) + + +def _build_control_plane( + *, + prefix: str, + control_plane: Optional[ControlPlane] = None, +) -> ControlPlane: + plane = control_plane or build_control_plane(prefix=prefix) + update = _control_update_from_env() + if update is not None: + plane.update(update) + return plane + + +def _build_zinc_plane( + *, + prefix: str, + slot_count: int, + zinc_mode: Optional[LauncherZincMode] = None, + zinc_plane: Optional[ZincPlane] = None, +) -> ZincPlane: + if zinc_plane is not None: + return zinc_plane + resolved_mode = zinc_mode or _resolve_zinc_mode() + if resolved_mode is LauncherZincMode.REAL: + try: + return RealZincPlane(prefix=prefix, slot_count=slot_count, create=True) + except (RealZincPlaneUnavailable, RealZincUnavailable, Exception): + pass + return InMemoryZincPlane() + + +def _build_venue( + *, + venue_mode: Optional[LauncherVenueMode] = None, + mock_scenario: Optional[MockVenueScenario] = None, + bingx_config: Optional[BingxExecClientConfig] = None, + bingx_backend: Optional[Any] = None, + venue: Optional[VenueAdapter] = None, +) -> VenueAdapter: + if venue is not None: + return venue + resolved_mode = venue_mode or _resolve_venue_mode() + if resolved_mode is LauncherVenueMode.BINGX: + backend = bingx_backend + if backend is None: + from prod.clean_arch.adapters.bingx_direct import BingxDirectExecutionAdapter + + backend = BingxDirectExecutionAdapter(bingx_config or build_bingx_exec_client_config()) + return BingxVenueAdapter(backend=backend) + return MockVenueAdapter(mock_scenario) + + +def _maybe_close(obj: Any) -> None: + for method_name in ("close", "disconnect"): + method = getattr(obj, method_name, None) + if method is None: + continue + try: + result = method() + except TypeError: + continue + if inspect.isawaitable(result): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + if loop is not None and loop.is_running(): + # Called from within an async context — schedule on the + # shared executor so asyncio.run() can create its own loop + # without conflicting with the caller's loop (O1). + import concurrent.futures as _cf + with _cf.ThreadPoolExecutor(max_workers=1) as _pool: + _pool.submit(asyncio.run, result).result(timeout=10.0) + else: + asyncio.run(result) + break + + +def build_launcher_bundle( + *, + max_slots: int = 10, + prefix: Optional[str] = None, + control_plane: Optional[ControlPlane] = None, + projection: Optional[HazelcastProjection] = None, + projection_client: Optional[Any] = None, + zinc_plane: Optional[ZincPlane] = None, + venue: Optional[VenueAdapter] = None, + venue_mode: Optional[LauncherVenueMode | str] = None, + zinc_mode: Optional[LauncherZincMode | str] = None, + bingx_config: Optional[BingxExecClientConfig] = None, + bingx_backend: Optional[Any] = None, + mock_scenario: Optional[MockVenueScenario] = None, +) -> DITAv2LauncherBundle: + """Build a fully wired DITAv2 runtime bundle. + + Defaults stay non-destructive: + - in-memory Zinc plane + - in-process control plane + - mock venue + - callback projection unless a Hazelcast client is supplied + """ + + resolved_prefix = (prefix or os.environ.get("DITA_V2_PREFIX", "dita_v2")).strip() or "dita_v2" + if isinstance(venue_mode, LauncherVenueMode): + resolved_venue_mode = venue_mode + elif isinstance(venue_mode, str): + resolved_venue_mode = LauncherVenueMode(venue_mode.strip().upper()) + else: + resolved_venue_mode = None + if isinstance(zinc_mode, LauncherZincMode): + resolved_zinc_mode = zinc_mode + elif isinstance(zinc_mode, str): + resolved_zinc_mode = LauncherZincMode(zinc_mode.strip().upper()) + else: + resolved_zinc_mode = None + + active_control_plane = _build_control_plane(prefix=resolved_prefix, control_plane=control_plane) + control_snapshot = active_control_plane.read() + active_projection = projection or build_projection( + client=projection_client, + prefer_real_hazelcast=_resolve_hazelcast_real(), + control_snapshot=control_snapshot, + ) + active_zinc_plane = _build_zinc_plane( + prefix=resolved_prefix, + slot_count=int(max_slots), + zinc_mode=resolved_zinc_mode, + zinc_plane=zinc_plane, + ) + active_venue = _build_venue( + venue_mode=resolved_venue_mode, + mock_scenario=mock_scenario, + bingx_config=bingx_config, + bingx_backend=bingx_backend, + venue=venue, + ) + kernel = ExecutionKernel( + max_slots=int(max_slots), + control_plane=active_control_plane, + venue=active_venue, + projection=active_projection, + projection_client=projection_client, + zinc_plane=active_zinc_plane, + ) + return DITAv2LauncherBundle( + kernel=kernel, + control_plane=active_control_plane, + projection=active_projection, + zinc_plane=active_zinc_plane, + venue=active_venue, + ) diff --git a/prod/clean_arch/dita_v2/rust_backend.py b/prod/clean_arch/dita_v2/rust_backend.py index c8d3eac..3875c1e 100644 --- a/prod/clean_arch/dita_v2/rust_backend.py +++ b/prod/clean_arch/dita_v2/rust_backend.py @@ -130,6 +130,12 @@ class _RustKernelLib: ctypes.c_char_p, ] self.lib.dita_kernel_on_account_event_json.restype = ctypes.c_void_p + self.lib.dita_kernel_save_state_json.argtypes = [ctypes.c_void_p] + self.lib.dita_kernel_save_state_json.restype = ctypes.c_void_p + self.lib.dita_kernel_restore_state_json.argtypes = [ctypes.c_void_p, ctypes.c_char_p] + self.lib.dita_kernel_restore_state_json.restype = ctypes.c_int + self.lib.dita_kernel_is_capital_frozen.argtypes = [ctypes.c_void_p] + self.lib.dita_kernel_is_capital_frozen.restype = ctypes.c_int def create(self, max_slots: int) -> ctypes.c_void_p: handle = self.lib.dita_kernel_create(ctypes.c_size_t(max_slots)) @@ -229,8 +235,8 @@ class _RustKernelLib: rc = self.lib.dita_kernel_set_exchange_config_json(handle, ctypes.c_char_p(encoded)) return rc == 0 - def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float) -> Dict[str, Any]: - payload = json.dumps({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee}).encode("utf-8") + def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float, is_maker: bool = False) -> Dict[str, Any]: + payload = json.dumps({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker}).encode("utf-8") raw = self.lib.dita_kernel_calibrate_fee_json(handle, ctypes.c_char_p(payload)) if not raw: return {} @@ -245,6 +251,23 @@ class _RustKernelLib: return {} return json.loads(self._take_string(raw)) + def save_state(self, handle: ctypes.c_void_p) -> str: + """Serialise full kernel state (slots + account + fee calibration) to JSON.""" + raw = self.lib.dita_kernel_save_state_json(handle) + if not raw: + raise RuntimeError("dita_kernel_save_state_json returned NULL") + return self._take_string(raw) + + def restore_state(self, handle: ctypes.c_void_p, json_str: str) -> bool: + """Restore kernel from a previously saved JSON blob. Returns True on success.""" + rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(json_str.encode("utf-8"))) + return rc == 0 + + def is_capital_frozen(self, handle: ctypes.c_void_p) -> bool: + """Return True if the kernel's capital is frozen (reconcile ERROR active).""" + rc = self.lib.dita_kernel_is_capital_frozen(handle) + return rc == 1 + _RUST: _RustKernelLib | None = None # lazy init — avoids Rust build on import @@ -553,15 +576,37 @@ class ExecutionKernel: self.zinc_plane.update_control(self._control_snapshot) self.state = KernelStateView(self) self.account.observe_slots([self._get_slot(slot_id) for slot_id in range(self.max_slots)]) + # I14: restore any non-idle slot state that survived in Zinc across + # a restart. A fresh kernel has all slots IDLE; if Zinc holds slots + # from a prior session the kernel must re-anchor them so the FSM + # correctly reflects open/working positions on re-entry. + _zinc_live = [s for s in self.zinc_plane.read_slots() if not s.is_free()] + if _zinc_live: + self.reconcile_from_slots(_zinc_live) - def __del__(self) -> None: # pragma: no cover - cleanup best effort - backend = getattr(self, "_backend", None) + def close(self) -> None: + """Release the Rust kernel handle deterministically (O10). + + Safe to call multiple times. After close(), all FFI methods will + raise RuntimeError — the kernel is no longer usable. + """ + backend = self._backend if backend is not None: + self._backend = None # prevent double-free via __del__ try: _get_rust().destroy(backend) except Exception: pass + def __enter__(self) -> "ExecutionKernel": + return self + + def __exit__(self, *_: object) -> None: + self.close() + + def __del__(self) -> None: # pragma: no cover - backup for non-with use + self.close() + @property def control(self) -> KernelControlSnapshot: return self.control_plane.read() @@ -798,6 +843,7 @@ class ExecutionKernel: fill_price: float, fill_qty: float, actual_fee: float, + is_maker: bool = False, ) -> Dict[str, Any]: """ Validate the fee model against one known fill. @@ -815,7 +861,7 @@ class ExecutionKernel: enabling live trading. If status == ERROR, the fee model needs manual review before K-capital figures can be trusted. """ - return _get_rust().calibrate_fee(self._backend, float(fill_price), float(fill_qty), float(actual_fee)) + return _get_rust().calibrate_fee(self._backend, float(fill_price), float(fill_qty), float(actual_fee), bool(is_maker)) def on_account_event(self, event: Dict[str, Any]) -> Dict[str, Any]: """ @@ -825,10 +871,44 @@ class ExecutionKernel: plus the relevant numeric fields (see Rust FFI doc). Returns the resulting account state dict including reconcile_status, - available_capital (E rules when present), k_capital, event_seq. + available_capital (E rules when present), k_capital, event_seq, + capital_frozen (bool), duplicate_event (bool if deduplicated). """ return _get_rust().on_account_event(self._backend, event) + # ------------------------------------------------------------------ + # Snapshot / restore — session-to-session state continuity + # ------------------------------------------------------------------ + + def save_state(self) -> str: + """Serialise the full kernel state (slots + account + fee calibration) to JSON. + + The returned string is opaque — pass it verbatim to restore_state() on the + next session start to resume without losing fee calibration or slot state. + """ + return _get_rust().save_state(self._backend) + + def restore_state(self, json_str: str) -> bool: + """Restore kernel from a previously saved state JSON blob. + + Returns True on success. Returns False (and leaves state unchanged) on: + - schema version mismatch + - slot count mismatch + - parse error + - non-finite capital + + Safe to call on a fresh kernel (e.g. after startup) before any trades. + """ + return _get_rust().restore_state(self._backend, json_str) + + def is_capital_frozen(self) -> bool: + """Return True if the kernel's capital is frozen (reconcile ERROR active). + + When frozen, process_intent will reject all ENTER intents with CAPITAL_FROZEN + until the next ACCOUNT_UPDATE that brings reconcile to OK. + """ + return _get_rust().is_capital_frozen(self._backend) + def snapshot(self) -> Dict[str, Any]: # Merge kernel Rust snapshot (includes AccountState) with Python state. rust_snap = _get_rust().snapshot(self._backend) diff --git a/prod/clean_arch/dita_v2/test_flaws.py b/prod/clean_arch/dita_v2/test_flaws.py new file mode 100644 index 0000000..63a6db8 --- /dev/null +++ b/prod/clean_arch/dita_v2/test_flaws.py @@ -0,0 +1,970 @@ +"""Comprehensive test battery for all 13 CRITICAL DITAv2 flaws. + +Each test verifies that the specific flaw exists (pre-fix) and would pass +once the flaw is addressed. Tests use the MockVenueAdapter to avoid +requiring live BingX connectivity. + +Run with: + python -m pytest prod/clean_arch/dita_v2/test_flaws.py -v +""" +from __future__ import annotations + +import sys +sys.path.insert(0, "/mnt/dolphinng5_predict") + +import asyncio +from datetime import datetime, timezone +from typing import Any, Dict, List +import pytest + +from prod.clean_arch.dita_v2.contracts import ( + KernelCommandType, + KernelDiagnosticCode, + KernelEventKind, + KernelIntent, + KernelOutcome, + KernelSeverity, + KernelTransition, + TradeSide, + TradeSlot, + TradeStage, + VenueEvent, + VenueEventStatus, + VenueOrder, + VenueOrderStatus, +) +from prod.clean_arch.dita_v2.mock_venue import MockVenueAdapter, MockVenueScenario +from prod.clean_arch.dita_v2.rust_backend import ExecutionKernel +from prod.clean_arch.dita_v2.account import AccountProjection + +E = KernelCommandType +TS = TradeSide + + +def _mk_intent( + action: KernelCommandType = KernelCommandType.ENTER, + trade_id: str = "t1", + slot_id: int = 0, + asset: str = "BTCUSDT", + side: TradeSide = TradeSide.SHORT, + price: float = 100.0, + size: float = 1.0, + leverage: float = 1.0, + exit_leg_ratios: tuple = (1.0,), + **kw, +) -> KernelIntent: + return KernelIntent( + timestamp=datetime.now(timezone.utc), + intent_id=kw.pop("intent_id", trade_id), + trade_id=trade_id, + slot_id=slot_id, + asset=asset, + side=side, + action=action, + reference_price=price, + target_size=size, + leverage=leverage, + exit_leg_ratios=exit_leg_ratios, + reason=kw.pop("reason", f"auto_{action.value.lower()}"), + metadata=kw, + ) + + +def _mk_venue_event( + kind: KernelEventKind, + trade_id: str = "t1", + slot_id: int = 0, + side: TradeSide = TradeSide.SHORT, + asset: str = "BTCUSDT", + price: float = 100.0, + size: float = 1.0, + filled_size: float = 1.0, + remaining_size: float = 0.0, + event_id: str = "", + venue_order_id: str = "V-1", + venue_client_id: str = "t1:t1", + status: VenueEventStatus = VenueEventStatus.FILLED, + reason: str = "", +) -> VenueEvent: + return VenueEvent( + timestamp=datetime.now(timezone.utc), + event_id=event_id or f"ev-{kind.value}-{trade_id}", + trade_id=trade_id, + slot_id=slot_id, + kind=kind, + status=status, + venue_order_id=venue_order_id, + venue_client_id=venue_client_id, + side=side, + asset=asset, + price=price, + size=size, + filled_size=filled_size, + remaining_size=remaining_size, + reason=reason, + ) + + +def _fresh_kernel( + *, + scenario: MockVenueScenario = None, + max_slots: int = 2, + capital: float = 25000.0, +) -> ExecutionKernel: + venue = MockVenueAdapter(scenario=scenario or MockVenueScenario()) + k = ExecutionKernel(max_slots=max_slots, venue=venue) + k.account.snapshot.capital = capital + k.account.snapshot.peak_capital = capital + k.account.snapshot.equity = capital + return k + + +# ============================================================ +# FLAW 1: Entry-order cancellation is structurally broken +# ============================================================ + +class TestFlaw1EntryCancel: + """CANCEL intent for entry orders must work, not just exit orders.""" + + def test_cancel_entry_order_accepted_by_rust(self): + """Rust kernel must accept CANCEL for an entry order in ENTRY_WORKING.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + r = k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce1")) + assert r.accepted, f"ENTER rejected: {r.diagnostic_code}" + + slot = k._get_slot(0) + assert slot.fsm_state in {TradeStage.ORDER_REQUESTED, TradeStage.ENTRY_WORKING} + + cancel_result = k.process_intent(_mk_intent(action=E.CANCEL, trade_id="ce1")) + assert cancel_result.accepted, ( + f"CANCEL for entry order should be accepted, got " + f"accepted={cancel_result.accepted} " + f"diag={cancel_result.diagnostic_code}" + ) + + def test_cancel_entry_order_calls_venue_cancel(self): + """Python bridge must call venue.cancel() on active_entry_order.""" + scenario = MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False) + k = _fresh_kernel(scenario=scenario) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce2")) + + entry_order = k.slot(0).active_entry_order + assert entry_order is not None, "Entry order should be attached" + + cancel_result = k.process_intent(_mk_intent(action=E.CANCEL, trade_id="ce2")) + assert cancel_result.accepted, f"CANCEL not accepted: {cancel_result.diagnostic_code}" + + def test_cancel_entry_no_fill_returns_to_idle(self): + """After cancelling an entry order that hasn't filled, slot must be IDLE.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce3")) + k.process_intent(_mk_intent(action=E.CANCEL, trade_id="ce3")) + + slot = k._get_slot(0) + assert slot.is_free(), ( + f"Slot should be free/IDLE after entry cancel, " + f"got state={slot.fsm_state} closed={slot.closed} " + f"entry_order={slot.active_entry_order} exit_order={slot.active_exit_order} " + f"size={slot.size}" + ) + + def test_cancel_entry_with_partial_fill(self): + """Cancel entry with partial fill should leave slot in correct state.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.5)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce4", size=0.002)) + slot_after = k._get_slot(0) + assert slot_after.size > 0, "Should have partial fill" + + def test_cancel_entry_then_reenter(self): + """After entry cancel, a new ENTER should succeed.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce5a")) + k.process_intent(_mk_intent(action=E.CANCEL, trade_id="ce5a")) + + r = k.process_intent(_mk_intent(action=E.ENTER, trade_id="ce5b")) + assert r.accepted, f"Re-entry after cancel should succeed: {r.diagnostic_code}" + + +# ============================================================ +# FLAW 2: Rust CANCEL_ACK has no entry-order reset path +# ============================================================ + +class TestFlaw2CancelAckEntry: + """CANCEL_ACK for entry orders must reset slot to IDLE.""" + + def test_cancel_ack_resets_entry_working_to_idle(self): + """When CANCEL_ACK arrives for an entry order, slot goes IDLE.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ca1")) + + slot = k._get_slot(0) + assert slot.active_entry_order is not None + + venue_order = slot.active_entry_order + ack = _mk_venue_event( + kind=KernelEventKind.CANCEL_ACK, + trade_id="ca1", + venue_order_id=venue_order.venue_order_id, + venue_client_id=venue_order.venue_client_id, + status=VenueEventStatus.CANCELED, + ) + k.on_venue_event(ack) + + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.IDLE, ( + f"Slot should be IDLE after CANCEL_ACK on entry, got {slot.fsm_state}" + ) + assert slot.active_entry_order is None, "Entry order should be cleared" + assert slot.trade_id == "", "Trade ID should be cleared" + assert slot.size == 0.0, "Size should be zero" + + def test_cancel_ack_exit_still_works(self): + """Existing exit-order CANCEL_ACK path must still work. + + Deterministic setup: entry fills fully (POSITION_OPEN) but the exit only + partially fills, so the exit order stays live and the CANCEL_ACK exit + branch is genuinely exercised (no vacuous guard). + """ + k = _fresh_kernel(scenario=MockVenueScenario(exit_partial_fill_ratio=0.5)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ca2", size=0.002)) + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN, ( + f"Entry should fill fully, got {slot.fsm_state}" + ) + + k.process_intent(_mk_intent(action=E.EXIT, trade_id="ca2", size=0.002)) + slot = k._get_slot(0) + assert slot.active_exit_order is not None, ( + "Exit order must remain live after a partial exit fill" + ) + ack = _mk_venue_event( + kind=KernelEventKind.CANCEL_ACK, + trade_id="ca2", + venue_order_id=slot.active_exit_order.venue_order_id, + venue_client_id=slot.active_exit_order.venue_client_id, + status=VenueEventStatus.CANCELED, + ) + k.on_venue_event(ack) + slot = k._get_slot(0) + assert slot.active_exit_order is None, "Exit order should be cleared by CANCEL_ACK" + assert slot.fsm_state == TradeStage.POSITION_OPEN, ( + f"Exit cancel must return slot to POSITION_OPEN, got {slot.fsm_state}" + ) + + +# ============================================================ +# FLAW 3: Outcome mixes pre/post-venue state +# ============================================================ + +class TestFlaw3OutcomeConsistency: + """process_intent outcome should have consistent state and transitions.""" + + def test_outcome_state_matches_actual_slot(self): + """The outcome.state should reflect the final state after venue events.""" + k = _fresh_kernel() + result = k.process_intent(_mk_intent(action=E.ENTER, trade_id="oc1")) + slot = k._get_slot(0) + assert result.state == slot.fsm_state, ( + f"Outcome state {result.state} != actual slot state {slot.fsm_state}" + ) + + def test_outcome_transitions_includes_venue_events(self): + """Transitions should include venue-event-triggered transitions.""" + k = _fresh_kernel() + result = k.process_intent(_mk_intent(action=E.ENTER, trade_id="oc2")) + transition_triggers = [t.trigger for t in result.transitions] + assert len(result.transitions) >= 1, ( + f"Should have at least 1 transition, got triggers: {transition_triggers}" + ) + + +# ============================================================ +# FLAW 4: Multi-leg exit final leg can double-close +# ============================================================ + +class TestFlaw4DoubleClose: + """Multi-leg exit final leg should only close once.""" + + def test_single_close_after_final_leg(self): + """After the last leg fills, slot.closed should be set exactly once.""" + k = _fresh_kernel(scenario=MockVenueScenario()) + k.process_intent( + _mk_intent( + action=E.ENTER, + trade_id="dc1", + size=0.002, + exit_leg_ratios=(0.5, 1.0), + ) + ) + k.process_intent( + _mk_intent( + action=E.EXIT, + trade_id="dc1", + size=0.001, + exit_leg_ratios=(0.5, 1.0), + ) + ) + k.process_intent( + _mk_intent( + action=E.EXIT, + trade_id="dc1", + size=0.001, + exit_leg_ratios=(1.0,), + ) + ) + slot = k._get_slot(0) + assert slot.closed, "Slot should be closed after final leg" + assert slot.fsm_state == TradeStage.CLOSED + + def test_no_extra_entry_order_clear_on_close(self): + """After close via multi-leg, active_entry_order should be consistent.""" + k = _fresh_kernel(scenario=MockVenueScenario()) + k.process_intent( + _mk_intent( + action=E.ENTER, + trade_id="dc2", + size=0.002, + exit_leg_ratios=(0.5, 1.0), + ) + ) + k.process_intent( + _mk_intent( + action=E.EXIT, + trade_id="dc2", + size=0.001, + exit_leg_ratios=(0.5, 1.0), + ) + ) + k.process_intent( + _mk_intent( + action=E.EXIT, + trade_id="dc2", + size=0.001, + exit_leg_ratios=(1.0,), + ) + ) + slot = k._get_slot(0) + assert slot.active_exit_order is None, "Exit order should be cleared" + assert slot.active_entry_order is None or slot.active_entry_order.status == VenueOrderStatus.FILLED + + +# ============================================================ +# FLAW 5: Capital settlement only triggers on terminal states +# ============================================================ + +class TestFlaw5CapitalSettleOnPartialFill: + """Realized PnL should settle incrementally on partial fills.""" + + def test_partial_exit_settles_pnl_incrementally(self): + """Exit fill must settle realized PnL into capital — EXACTLY. + + This is the single most important invariant in DITAv2: capital is + the kernel account's authority and must move by precisely the + realized PnL of the fill (no balance-poll overwrite). The entry and + exit prices differ so realized PnL is strictly nonzero and the + capital-change assertion fires unconditionally (no vacuous guard). + """ + k = _fresh_kernel() + cap_before = k.account.snapshot.capital + + # SHORT entry at 100. + k.process_intent( + _mk_intent(action=E.ENTER, trade_id="ps1", side=TradeSide.SHORT, price=100.0, size=0.002) + ) + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN + + # Exit at 90 -> SHORT closes in profit, realized PnL strictly positive. + k.process_intent( + _mk_intent(action=E.EXIT, trade_id="ps1", side=TradeSide.SHORT, price=90.0, size=0.002) + ) + slot = k._get_slot(0) + + assert slot.realized_pnl > 0.0, ( + f"SHORT exit below entry must realize positive PnL, got {slot.realized_pnl}" + ) + cap_after = k.account.snapshot.capital + # Single-authority invariant: capital moved by EXACTLY realized PnL. + assert abs((cap_after - cap_before) - slot.realized_pnl) < 1e-9, ( + f"Capital delta {cap_after - cap_before} != realized_pnl {slot.realized_pnl} " + f"(before={cap_before} after={cap_after})" + ) + + +# ============================================================ +# FLAW 6: _legacy_intent silently drops order_type and limit_price +# ============================================================ + +class TestFlaw6LegacyIntentDrop: + """_legacy_intent must preserve order_type and limit_price.""" + + def test_legacy_intent_preserves_order_type(self): + """LegacyIntent conversion must include order_type.""" + from prod.clean_arch.dita_v2.bingx_venue import BingxVenueAdapter + + intent = _mk_intent( + action=E.ENTER, + trade_id="li1", + order_type="LIMIT", + limit_price=50000.0, + ) + legacy = BingxVenueAdapter._legacy_intent(intent) + + assert getattr(legacy, "order_type", None) == "LIMIT" or \ + legacy.metadata.get("_order_type") == "LIMIT" or \ + legacy.metadata.get("order_type") == "LIMIT", ( + f"order_type not preserved in legacy intent. " + f"Legacy fields: {dir(legacy)}, metadata: {legacy.metadata}" + ) + + def test_legacy_intent_preserves_limit_price(self): + """LegacyIntent conversion must include limit_price.""" + from prod.clean_arch.dita_v2.bingx_venue import BingxVenueAdapter + + intent = _mk_intent( + action=E.ENTER, + trade_id="li2", + order_type="LIMIT", + limit_price=50000.0, + ) + legacy = BingxVenueAdapter._legacy_intent(intent) + + assert getattr(legacy, "limit_price", 0) == 50000.0 or \ + legacy.metadata.get("_limit_price") == 50000.0 or \ + legacy.metadata.get("limit_price") == 50000.0, ( + f"limit_price not preserved in legacy intent. " + f"Legacy metadata: {legacy.metadata}" + ) + + +# ============================================================ +# FLAW 7: Mock venue partial_fill_ratio applies to both entry and exit +# ============================================================ + +class TestFlaw7MockVenueRatios: + """Mock venue should support different ratios for entry vs exit.""" + + def test_entry_exit_different_ratios(self): + """Entry can fill fully while exit fills partially.""" + k = _fresh_kernel(scenario=MockVenueScenario( + entry_partial_fill_ratio=1.0, + exit_partial_fill_ratio=0.5, + )) + r = k.process_intent(_mk_intent(action=E.ENTER, trade_id="mv1", size=0.002)) + assert r.accepted + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN, f"Entry should fill fully: {slot.fsm_state}" + + def test_per_action_type_ratios(self): + """entry_partial_fill_ratio and exit_partial_fill_ratio should work independently.""" + scenario = MockVenueScenario( + entry_partial_fill_ratio=1.0, + exit_partial_fill_ratio=0.3, + ) + k = _fresh_kernel(scenario=scenario) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="mv2", size=0.001)) + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN + assert slot.size == 0.001 + + +# ============================================================ +# FLAW 8: Per-asset price precision helper does not exist +# ============================================================ + +class TestFlaw8PricePrecision: + """_format_price must exist for LIMIT order support.""" + + def test_format_price_exists_in_bingx_direct(self): + """BingxDirectExecutionAdapter should have _format_price method.""" + try: + from prod.clean_arch.adapters.bingx_direct import BingxDirectExecutionAdapter + assert hasattr(BingxDirectExecutionAdapter, "_format_price"), ( + "_format_price method missing from BingxDirectExecutionAdapter" + ) + except ImportError: + pytest.skip("bingx_direct not importable in this environment") + + +# ============================================================ +# FLAW 9: Cancel path falls back to trade_id as symbol +# ============================================================ + +class TestFlaw9CancelSymbolFallback: + """Cancel should use correct asset, not trade_id as fallback symbol.""" + + def test_cancel_uses_slot_asset_not_trade_id(self): + """When cancel is called, the asset should come from the slot, not trade_id.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="cs1", asset="TRXUSDT")) + slot = k._get_slot(0) + + # ACK-only (no fill) deterministically leaves the entry order live. + assert slot.active_entry_order is not None, ( + "ACK-only entry must leave the entry order live for cancel-symbol fallback" + ) + metadata = slot.active_entry_order.metadata + assert metadata.get("asset") == "TRXUSDT", ( + f"Entry order metadata should contain asset. Got: {metadata}" + ) + + def test_mock_venue_cancel_event_has_asset(self): + """Mock venue cancel events should carry the correct asset.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="cs2", asset="XRPUSDT")) + slot = k._get_slot(0) + order = slot.active_entry_order + assert order is not None + assert order.metadata.get("asset") is not None or order.metadata.get("slot_id") is not None + + +# ============================================================ +# FLAW 10: Event dedup window is bounded at 64 +# ============================================================ + +class TestFlaw10EventDedup: + """Event dedup window should be large enough for realistic workloads.""" + + def test_dedup_window_accepts_many_events(self): + """A slot should handle > 64 events without dedup eviction.""" + k = _fresh_kernel() + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ed1")) + + for i in range(70): + ev = _mk_venue_event( + kind=KernelEventKind.MARK_PRICE, + trade_id="ed1", + event_id=f"mp-{i:04d}", + price=100.0 + i * 0.01, + size=0.0, + filled_size=0.0, + ) + k.on_venue_event(ev) + + slot = k._get_slot(0) + assert len(slot.seen_event_ids) >= 70, ( + f"Expected >= 70 seen_event_ids, got {len(slot.seen_event_ids)}" + ) + + def test_dedup_eviction_does_not_accept_old_event(self): + """Evicted event IDs should still be rejected (with larger window).""" + k = _fresh_kernel() + k.process_intent(_mk_intent(action=E.ENTER, trade_id="ed2")) + + for i in range(70): + ev = _mk_venue_event( + kind=KernelEventKind.MARK_PRICE, + trade_id="ed2", + event_id=f"mp2-{i:04d}", + price=100.0 + i * 0.01, + size=0.0, + filled_size=0.0, + ) + k.on_venue_event(ev) + + old_ev = _mk_venue_event( + kind=KernelEventKind.MARK_PRICE, + trade_id="ed2", + event_id="mp2-0000", + price=99.0, + size=0.0, + filled_size=0.0, + ) + result = k.on_venue_event(old_ev) + assert result.diagnostic_code == KernelDiagnosticCode.DUPLICATE_EVENT, ( + f"Old evicted event should still be deduplicated, " + f"got {result.diagnostic_code}" + ) + + +# ============================================================ +# FLAW 11: Reconcile is a raw state override with no FSM validation +# ============================================================ + +class TestFlaw11ReconcileValidation: + """Reconcile should validate slot state consistency.""" + + def test_reconcile_rejects_position_open_with_zero_size(self): + """Reconciling with POSITION_OPEN but zero size should be rejected.""" + k = _fresh_kernel() + bad_slot = TradeSlot( + slot_id=0, + fsm_state=TradeStage.POSITION_OPEN, + size=0.0, + asset="BTCUSDT", + trade_id="bad1", + ) + result = k.reconcile_from_slots([bad_slot]) + slot = k._get_slot(0) + assert slot.fsm_state != TradeStage.POSITION_OPEN or slot.size > 0, ( + f"Reconcile should reject POSITION_OPEN with size=0, " + f"got state={slot.fsm_state} size={slot.size}" + ) + + def test_reconcile_rejects_idle_with_nonzero_size(self): + """Reconciling with IDLE but nonzero size should be rejected.""" + k = _fresh_kernel() + bad_slot = TradeSlot( + slot_id=0, + fsm_state=TradeStage.IDLE, + size=5.0, + asset="BTCUSDT", + trade_id="bad2", + ) + result = k.reconcile_from_slots([bad_slot]) + slot = k._get_slot(0) + assert slot.size == 0.0 or slot.fsm_state != TradeStage.IDLE, ( + f"Reconcile should reject IDLE with size > 0, " + f"got state={slot.fsm_state} size={slot.size}" + ) + + def test_reconcile_accepts_valid_slot(self): + """Valid slot data should still reconcile correctly.""" + k = _fresh_kernel() + k.process_intent(_mk_intent(action=E.ENTER, trade_id="rv1")) + slot_data = k._get_slot(0) + result = k.reconcile_from_slots([slot_data]) + assert result.accepted + + +# ============================================================ +# FLAW 12: Outcome transitions are incomplete — pre-venue only +# ============================================================ + +class TestFlaw12OutcomeTransitions: + """process_intent outcome transitions should include venue event transitions.""" + + def test_transitions_include_post_venue(self): + """After a full entry cycle, transitions should include ORDER_ACK and FULL_FILL.""" + k = _fresh_kernel() + result = k.process_intent(_mk_intent(action=E.ENTER, trade_id="ot1")) + triggers = [t.trigger for t in result.transitions] + assert any(t in triggers for t in ["ENTER_INTENT", "ORDER_ACK", "FULL_FILL"]), ( + f"Transitions should include venue event triggers. Got: {triggers}" + ) + + def test_transitions_count_matches_lifecycle(self): + """Full entry lifecycle should produce multiple transitions.""" + k = _fresh_kernel() + result = k.process_intent(_mk_intent(action=E.ENTER, trade_id="ot2")) + slot = k._get_slot(0) + assert slot.fsm_state in {TradeStage.POSITION_OPEN, TradeStage.ENTRY_WORKING}, ( + f"Default full-fill entry must open the position, got {slot.fsm_state}" + ) + assert len(result.transitions) >= 2, ( + f"Full entry should produce >= 2 transitions " + f"(intent + venue ack/fill), got {len(result.transitions)}: " + f"{[t.trigger for t in result.transitions]}" + ) + + +# ============================================================ +# FLAW 13: Unsettled realized PnL on re-entry +# ============================================================ + +class TestFlaw13UnsettledPnlOnReentry: + """Re-entry should not silently discard unrealized settled PnL.""" + + def test_reentry_after_full_close_no_pnl_loss(self): + """After full close and settle, re-entry should not lose PnL.""" + k = _fresh_kernel() + cap_before = k.account.snapshot.capital + + k.process_intent(_mk_intent(action=E.ENTER, trade_id="rp1")) + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN + + k.process_intent( + _mk_intent(action=E.EXIT, trade_id="rp1", price=100.5) + ) + slot = k._get_slot(0) + assert slot.is_free() + + cap_after_first = k.account.snapshot.capital + + k.process_intent(_mk_intent(action=E.ENTER, trade_id="rp2")) + k.process_intent( + _mk_intent(action=E.EXIT, trade_id="rp2", price=101.0) + ) + + cap_after_second = k.account.snapshot.capital + assert cap_after_second > 0, "Capital should remain positive" + assert abs(cap_after_second - cap_before) < cap_before * 0.5 + + def test_pnl_warning_on_unsettled_reentry(self): + """Re-entry on a slot with unsettled PnL should at least warn.""" + k = _fresh_kernel(scenario=MockVenueScenario()) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="rw1")) + k.process_intent(_mk_intent(action=E.EXIT, trade_id="rw1")) + slot = k._get_slot(0) + assert slot.is_free(), "Full close must free the slot for re-entry" + r = k.process_intent(_mk_intent(action=E.ENTER, trade_id="rw2")) + assert r.accepted, "Re-entry on a freed slot must be accepted" + + +# ============================================================ +# REGRESSION: Existing behaviour must not break +# ============================================================ + +class TestRegression: + """Ensure existing happy-path scenarios still work.""" + + def test_basic_entry_exit(self): + k = _fresh_kernel() + cap_before = k.account.snapshot.capital + r1 = k.process_intent(_mk_intent(action=E.ENTER, trade_id="re1")) + assert r1.accepted + r2 = k.process_intent(_mk_intent(action=E.EXIT, trade_id="re1")) + assert r2.accepted + slot = k._get_slot(0) + assert slot.is_free() + + def test_multi_leg_exit(self): + k = _fresh_kernel() + k.process_intent( + _mk_intent(action=E.ENTER, trade_id="re2", size=0.002, exit_leg_ratios=(0.5, 1.0)) + ) + k.process_intent( + _mk_intent(action=E.EXIT, trade_id="re2", size=0.001, exit_leg_ratios=(0.5, 1.0)) + ) + k.process_intent( + _mk_intent(action=E.EXIT, trade_id="re2", size=0.001, exit_leg_ratios=(1.0,)) + ) + slot = k._get_slot(0) + assert slot.is_free() + + def test_slot_busy_rejection(self): + k = _fresh_kernel() + r1 = k.process_intent(_mk_intent(action=E.ENTER, trade_id="re3a")) + assert r1.accepted + r2 = k.process_intent(_mk_intent(action=E.ENTER, trade_id="re3b")) + assert not r2.accepted + assert r2.diagnostic_code == KernelDiagnosticCode.SLOT_BUSY + + def test_exit_on_idle_rejected(self): + k = _fresh_kernel() + r = k.process_intent(_mk_intent(action=E.EXIT, trade_id="re4")) + assert not r.accepted + + def test_reconcile_preserves_state(self): + k = _fresh_kernel() + k.process_intent(_mk_intent(action=E.ENTER, trade_id="re5")) + slot_data = k._get_slot(0) + k.reconcile_from_slots([slot_data]) + slot_after = k._get_slot(0) + assert slot_after.trade_id == "re5" + + def test_dedup_duplicate_event(self): + k = _fresh_kernel() + k.process_intent(_mk_intent(action=E.ENTER, trade_id="re6")) + slot = k._get_slot(0) + dup = _mk_venue_event( + kind=KernelEventKind.FULL_FILL, + trade_id="re6", + event_id="dedup-regression", + price=100.0, + size=1.0, + filled_size=1.0, + ) + k.on_venue_event(dup) + result = k.on_venue_event(dup) + assert result.diagnostic_code == KernelDiagnosticCode.DUPLICATE_EVENT + + def test_ten_cycles_no_leak(self): + k = _fresh_kernel() + for i in range(10): + k.process_intent(_mk_intent(action=E.ENTER, trade_id=f"tc{i}")) + k.process_intent(_mk_intent(action=E.EXIT, trade_id=f"tc{i}")) + slot = k._get_slot(0) + assert slot.is_free() + assert k.account.snapshot.capital > 0 + + +# ============================================================ +# I15: CANCEL_REJECT must un-stick EXIT_WORKING slot +# ============================================================ + +class TestI15CancelRejectUnstick: + """CANCEL_REJECT on an exit order must clear active_exit_order and return + the slot to POSITION_OPEN so the algo can retry the exit.""" + + def _enter_to_position_open(self, k: ExecutionKernel, trade_id: str) -> None: + r = k.process_intent(_mk_intent(action=E.ENTER, trade_id=trade_id)) + assert r.accepted, f"ENTER rejected: {r.diagnostic_code}" + slot = k._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN, ( + f"Expected POSITION_OPEN after ENTER, got {slot.fsm_state}" + ) + + def test_cancel_reject_exits_working_returns_to_position_open(self): + """Core I15 regression: CANCEL_REJECT on EXIT_WORKING must unstick slot.""" + # partial_fill_ratio=0 prevents fills on submit; fills are injected manually. + k_no_fill = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k_no_fill.process_intent(_mk_intent(action=E.ENTER, trade_id="i15b")) + # Manually force POSITION_OPEN by injecting FULL_FILL + fill = _mk_venue_event( + kind=KernelEventKind.FULL_FILL, + trade_id="i15b", + event_id="fill-i15b", + price=100.0, + size=1.0, + filled_size=1.0, + ) + k_no_fill.on_venue_event(fill) + slot = k_no_fill._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN, ( + f"Setup failed: expected POSITION_OPEN, got {slot.fsm_state}" + ) + + # Submit exit (no fill emitted) — slot enters EXIT_WORKING + k_no_fill.process_intent(_mk_intent(action=E.EXIT, trade_id="i15b")) + slot = k_no_fill._get_slot(0) + assert slot.fsm_state in (TradeStage.EXIT_WORKING, TradeStage.EXIT_REQUESTED, TradeStage.EXIT_SENT), ( + f"Setup failed: expected an exit state, got {slot.fsm_state}" + ) + assert slot.active_exit_order is not None, "Setup: active_exit_order should be set" + + # Now deliver CANCEL_REJECT + cancel_rej = _mk_venue_event( + kind=KernelEventKind.CANCEL_REJECT, + trade_id="i15b", + event_id="cr-i15b", + status=VenueEventStatus.CANCELED, + ) + result = k_no_fill.on_venue_event(cancel_rej) + + slot = k_no_fill._get_slot(0) + assert slot.fsm_state == TradeStage.POSITION_OPEN, ( + f"I15: slot must return to POSITION_OPEN after CANCEL_REJECT, got {slot.fsm_state}" + ) + assert slot.active_exit_order is None, ( + "I15: active_exit_order must be cleared by CANCEL_REJECT" + ) + assert result.diagnostic_code == KernelDiagnosticCode.CANCEL_REJECTED + + def test_after_cancel_reject_exit_can_be_resubmitted(self): + """After CANCEL_REJECT un-sticks the slot, a new EXIT must be accepted.""" + k = _fresh_kernel(scenario=MockVenueScenario(partial_fill_ratio=0.0, emit_fill_on_submit=False)) + k.process_intent(_mk_intent(action=E.ENTER, trade_id="i15c")) + fill = _mk_venue_event( + kind=KernelEventKind.FULL_FILL, + trade_id="i15c", + event_id="fill-i15c", + price=100.0, + size=1.0, + filled_size=1.0, + ) + k.on_venue_event(fill) + + k.process_intent(_mk_intent(action=E.EXIT, trade_id="i15c")) + cancel_rej = _mk_venue_event( + kind=KernelEventKind.CANCEL_REJECT, + trade_id="i15c", + event_id="cr-i15c", + status=VenueEventStatus.CANCELED, + ) + k.on_venue_event(cancel_rej) + + # Slot is back to POSITION_OPEN — a new EXIT intent must be accepted + r = k.process_intent(_mk_intent(action=E.EXIT, trade_id="i15c")) + assert r.accepted, ( + f"I15: retry EXIT after CANCEL_REJECT must be accepted, got {r.diagnostic_code}" + ) + + +# ============================================================ +# O5: _run() thread-pool path must time out, not hang forever +# ============================================================ + +class TestO5RunTimeout: + """O5: BingxVenueAdapter._run() must raise TimeoutError instead of freezing + when the backend call exceeds the configured deadline.""" + + def test_run_raises_timeout_from_async_context(self, monkeypatch): + """When called from inside an event loop and the backend is slow, + _run() must raise TimeoutError within the configured deadline.""" + from prod.clean_arch.dita_v2.bingx_venue import BingxVenueAdapter + import asyncio + + adapter = object.__new__(BingxVenueAdapter) + # Patch to a very short deadline so the test completes fast. + monkeypatch.setattr(BingxVenueAdapter, "_BACKEND_TIMEOUT_S", 0.15) + + async def _slow_coroutine(): + await asyncio.sleep(5.0) + return "never" + + async def _run_from_async(): + with pytest.raises(TimeoutError): + adapter._run(_slow_coroutine()) + + asyncio.run(_run_from_async()) + + def test_run_returns_normally_within_deadline(self, monkeypatch): + """Fast backend calls must succeed and return their value.""" + from prod.clean_arch.dita_v2.bingx_venue import BingxVenueAdapter + import asyncio + + adapter = object.__new__(BingxVenueAdapter) + monkeypatch.setattr(BingxVenueAdapter, "_BACKEND_TIMEOUT_S", 2.0) + + async def _fast_coroutine(): + return 42 + + async def _run_from_async(): + result = adapter._run(_fast_coroutine()) + assert result == 42 + + asyncio.run(_run_from_async()) + + +# ============================================================ +# O1: _maybe_close() must not silently skip close from async context +# ============================================================ + +class TestO1MaybeCloseAsyncSafe: + """O1: _maybe_close() must run the coroutine even when called from an + async context (previously it swallowed RuntimeError and skipped close).""" + + def test_maybe_close_from_sync_context(self): + """Sync caller: asyncio.run() path must run the close coroutine.""" + from prod.clean_arch.dita_v2.launcher import _maybe_close + + closed = [] + + class _FakeAsync: + async def close(self) -> None: + closed.append(True) + + _maybe_close(_FakeAsync()) + assert closed == [True], "close() coroutine must run from sync context" + + def test_maybe_close_from_async_context(self): + """Async caller: thread-pool path must run the close coroutine without + raising RuntimeError (the old silent-skip bug).""" + from prod.clean_arch.dita_v2.launcher import _maybe_close + + closed = [] + + class _FakeAsync: + async def close(self) -> None: + closed.append(True) + + async def _caller(): + _maybe_close(_FakeAsync()) + + asyncio.run(_caller()) + assert closed == [True], "close() coroutine must run from async context" + + def test_maybe_close_sync_method_still_works(self): + """Non-coroutine close() must still be called (no regression).""" + from prod.clean_arch.dita_v2.launcher import _maybe_close + + closed = [] + + class _FakeSync: + def close(self) -> None: + closed.append(True) + + _maybe_close(_FakeSync()) + assert closed == [True], "sync close() must still be called" diff --git a/prod/clean_arch/dita_v2/test_kernel_reliability.py b/prod/clean_arch/dita_v2/test_kernel_reliability.py new file mode 100644 index 0000000..f0ad642 --- /dev/null +++ b/prod/clean_arch/dita_v2/test_kernel_reliability.py @@ -0,0 +1,385 @@ +"""Kernel nuclear-reliability test suite. + +Covers: +- G1: catch_unwind — FFI boundary survives Rust panics (process doesn't abort) +- G2: IndexSet dedup — 1024-entry account-event dedup, idempotent re-play +- G3: snapshot/restore — full state round-trip, version check, slot-count check +- G4: capital_frozen — reconcile ERROR blocks ENTERs; OK unfreezes +""" +from __future__ import annotations + +import json +import sys +sys.path.insert(0, "/mnt/dolphinng5_predict") + +import pytest +from datetime import datetime, timezone +from prod.clean_arch.dita_v2.rust_backend import ExecutionKernel +from prod.clean_arch.dita_v2.contracts import KernelIntent, KernelCommandType, TradeSide + + +def _kernel(seed: float = 10_000.0) -> ExecutionKernel: + k = ExecutionKernel(max_slots=4) + k.set_seed_capital(seed) + return k + + +def _acct(k: ExecutionKernel) -> dict: + return k.snapshot()["account"] + + +# --------------------------------------------------------------------------- +# G2: Account-event dedup — idempotent replay +# --------------------------------------------------------------------------- + +class TestAccountEventDedup: + def test_same_event_id_not_double_applied(self): + k = _kernel(10_000.0) + k.on_account_event({ + "kind": "FILL_SETTLED", + "event_id": "fill-001", + "realized_pnl": 0.0, + "fee": 5.0, + "is_maker": False, + }) + r2 = k.on_account_event({ + "kind": "FILL_SETTLED", + "event_id": "fill-001", # duplicate + "realized_pnl": 0.0, + "fee": 5.0, + "is_maker": False, + }) + # k_taker_fees must still be 5.0, not 10.0 + assert r2.get("k_taker_fees") == pytest.approx(5.0), \ + f"duplicate event applied twice: k_taker_fees={r2.get('k_taker_fees')}" + assert r2.get("duplicate_event") is True + + def test_unique_event_ids_all_applied(self): + k = _kernel(10_000.0) + n = 20 + for i in range(n): + k.on_account_event({ + "kind": "FILL_SETTLED", + "event_id": f"fill-{i:04d}", + "realized_pnl": 0.0, + "fee": 1.0, + "is_maker": False, + }) + assert _acct(k)["k_fees_paid"] == pytest.approx(float(n)) + + def test_dedup_beyond_64_events_no_false_duplicate(self): + """Prove the new IndexSet (1024 cap) does not evict at 65 like the old Vec did.""" + k = _kernel(10_000.0) + # Feed 100 unique FILL_SETTLED events (well beyond old 64-entry rolling window) + for i in range(100): + k.on_account_event({ + "kind": "FILL_SETTLED", + "event_id": f"x-{i:04d}", + "realized_pnl": 0.0, + "fee": 1.0, + "is_maker": False, + }) + # Replay event 0 — must still be recognised as duplicate + r = k.on_account_event({ + "kind": "FILL_SETTLED", + "event_id": "x-0000", + "realized_pnl": 0.0, + "fee": 1.0, + "is_maker": False, + }) + assert r.get("duplicate_event") is True, \ + "event x-0000 should be deduped after 100 events; IndexSet eviction too aggressive" + assert _acct(k)["k_fees_paid"] == pytest.approx(100.0) + + def test_predicted_fill_not_deduplicated(self): + """PREDICTED_FILL is always applied (stateless preview).""" + k = _kernel(10_000.0) + ev = {"kind": "PREDICTED_FILL", "event_id": "pred-001", + "fill_price": 100.0, "fill_qty": 1.0, "realized_pnl": 0.0, "is_maker": False} + k.on_account_event(ev) + r2 = k.on_account_event(ev) + # PREDICTED_FILL is not deduplicated — applied twice → taker_fees = 0.05*2 = 0.10 + # (This is intentional: predictions are overwritten on settle, not state-changing) + assert r2.get("duplicate_event") is not True + + def test_empty_event_id_never_deduplicated(self): + """Empty event_id bypasses dedup (some events have no id).""" + k = _kernel(10_000.0) + k.on_account_event({"kind": "FILL_SETTLED", "event_id": "", + "realized_pnl": 0.0, "fee": 3.0, "is_maker": False}) + r2 = k.on_account_event({"kind": "FILL_SETTLED", "event_id": "", + "realized_pnl": 0.0, "fee": 3.0, "is_maker": False}) + # Both applied (empty id = no dedup) + assert r2.get("duplicate_event") is not True + assert _acct(k)["k_fees_paid"] == pytest.approx(6.0) + + +# --------------------------------------------------------------------------- +# G3: Snapshot / restore — full state round-trip +# --------------------------------------------------------------------------- + +class TestSnapshotRestore: + def test_save_state_returns_valid_json(self): + k = _kernel(10_000.0) + s = k.save_state() + obj = json.loads(s) + assert obj["version"] == 1 + assert "slots" in obj + assert "account" in obj + assert "snapshot_ts_ms" in obj + + def test_save_includes_fee_calibration(self): + k = _kernel(10_000.0) + k.calibrate_fee(fill_price=100.0, fill_qty=1.0, actual_fee=0.055) # ratio ≠ 1.0 + s = k.save_state() + obj = json.loads(s) + # fee_config should be persisted with updated calibration_ratio + fc = obj["account"]["fee_config"] + assert fc["calibration_ratio"] != pytest.approx(1.0) + assert fc["calibration_samples"] == 1 + + def test_restore_round_trip_capital_and_fees(self): + k1 = _kernel(10_000.0) + k1.on_account_event({"kind": "FILL_SETTLED", "event_id": "f1", + "realized_pnl": 150.0, "fee": 3.5, "is_maker": False}) + k1.on_account_event({"kind": "FILL_SETTLED", "event_id": "f2", + "realized_pnl": -30.0, "fee": 1.5, "is_maker": True}) + state = k1.save_state() + + k2 = _kernel(10_000.0) # fresh kernel, same max_slots + assert k2.restore_state(state) is True + + a1 = _acct(k1) + a2 = _acct(k2) + assert a2["k_capital"] == pytest.approx(a1["k_capital"]) + assert a2["k_fees_paid"] == pytest.approx(a1["k_fees_paid"]) + assert a2["k_realized_pnl"] == pytest.approx(a1["k_realized_pnl"]) + + def test_restore_dedup_set_preserved(self): + """After restore, previously-seen event_ids must still be deduped.""" + k1 = _kernel(10_000.0) + k1.on_account_event({"kind": "FILL_SETTLED", "event_id": "fill-99", + "realized_pnl": 0.0, "fee": 5.0, "is_maker": False}) + state = k1.save_state() + + k2 = _kernel(10_000.0) + k2.restore_state(state) + r = k2.on_account_event({"kind": "FILL_SETTLED", "event_id": "fill-99", + "realized_pnl": 0.0, "fee": 5.0, "is_maker": False}) + assert r.get("duplicate_event") is True, \ + "event fill-99 should be deduped in restored kernel" + + def test_restore_version_mismatch_rejected(self): + k = _kernel(10_000.0) + state_obj = json.loads(k.save_state()) + state_obj["version"] = 999 # wrong version + result = k.restore_state(json.dumps(state_obj)) + assert result is False + + def test_restore_slot_count_mismatch_rejected(self): + k_4 = ExecutionKernel(max_slots=4) + k_4.set_seed_capital(10_000.0) + state = k_4.save_state() + + k_2 = ExecutionKernel(max_slots=2) # different max_slots + k_2.set_seed_capital(10_000.0) + result = k_2.restore_state(state) + assert result is False + + def test_restore_corrupt_json_rejected(self): + k = _kernel(10_000.0) + result = k.restore_state("{not valid json") + assert result is False + + def test_save_state_snapshot_ts_positive(self): + k = _kernel(10_000.0) + obj = json.loads(k.save_state()) + assert obj["snapshot_ts_ms"] > 0 + + +# --------------------------------------------------------------------------- +# G4: capital_frozen — reconcile ERROR blocks ENTERs +# --------------------------------------------------------------------------- + +class TestCapitalFrozen: + def _trigger_error(self, k: ExecutionKernel) -> None: + """Inject a large balance divergence to force reconcile ERROR.""" + k.on_account_event({ + "kind": "ACCOUNT_UPDATE", + "wallet_balance": 1_000.0, # kernel thinks 10_000 → delta = 9_000 > 20 + "available_margin": 1_000.0, + "used_margin": 0.0, + "maint_margin": 0.0, + }) + + def _restore_ok(self, k: ExecutionKernel) -> None: + """Sync E-facts to match K so reconcile returns OK.""" + k.on_account_event({ + "kind": "ACCOUNT_UPDATE", + "wallet_balance": 10_000.0, + "available_margin": 10_000.0, + "used_margin": 0.0, + "maint_margin": 0.0, + }) + + def test_capital_frozen_false_initially(self): + k = _kernel(10_000.0) + assert k.is_capital_frozen() is False + + def test_capital_frozen_true_on_error(self): + k = _kernel(10_000.0) + self._trigger_error(k) + assert k.is_capital_frozen() is True + + def test_capital_frozen_in_on_account_event_response(self): + k = _kernel(10_000.0) + self._trigger_error(k) + r = k.on_account_event({"kind": "FUNDING_FEE", "funding_amount": 0.0}) + assert r.get("capital_frozen") is True + + def _mk_intent(self, trade_id: str, slot_id: int = 0) -> KernelIntent: + return KernelIntent( + timestamp=datetime.now(tz=timezone.utc), + intent_id=f"i-{trade_id}", + trade_id=trade_id, + slot_id=slot_id, + asset="TRXUSDT", + side=TradeSide.SHORT, + action=KernelCommandType.ENTER, + reference_price=0.15, + target_size=100.0, + leverage=10.0, + ) + + def test_enter_blocked_when_capital_frozen(self): + k = _kernel(10_000.0) + self._trigger_error(k) + result = k.process_intent(self._mk_intent("test-frozen")) + assert result.accepted is False, f"ENTER should be blocked; got {result.diagnostic_code}" + assert result.diagnostic_code == "CAPITAL_FROZEN" or \ + (hasattr(result.diagnostic_code, 'value') and result.diagnostic_code.value == "CAPITAL_FROZEN"), \ + f"expected CAPITAL_FROZEN, got {result.diagnostic_code}" + + def test_capital_unfrozen_on_ok_reconcile(self): + k = _kernel(10_000.0) + self._trigger_error(k) + assert k.is_capital_frozen() is True + self._restore_ok(k) + assert k.is_capital_frozen() is False + + def test_enter_allowed_after_unfreeze(self): + k = _kernel(10_000.0) + self._trigger_error(k) + self._restore_ok(k) + # After reconcile OK, ENTERs should be accepted again (FSM may reject for other + # reasons but NOT capital_frozen) + result = k.process_intent(self._mk_intent("test-unfrozen")) + code = result.diagnostic_code.value if hasattr(result.diagnostic_code, "value") else str(result.diagnostic_code) + assert code != "CAPITAL_FROZEN", f"ENTER should not be frozen after OK reconcile; got {code}" + + def test_frozen_snapshot_round_trip(self): + """capital_frozen flag must survive snapshot/restore.""" + k1 = _kernel(10_000.0) + self._trigger_error(k1) + assert k1.is_capital_frozen() is True + state = k1.save_state() + + k2 = _kernel(10_000.0) + k2.restore_state(state) + assert k2.is_capital_frozen() is True + + +# --------------------------------------------------------------------------- +# I14: Startup reconcile from Zinc — non-idle slots must be re-anchored +# --------------------------------------------------------------------------- + +class TestI14StartupZincRestore: + """I14: A freshly-created ExecutionKernel must load any non-idle slot state + from the ZincPlane so that a restart after a crash doesn't silently treat + live positions as IDLE and allow duplicate ENTERs.""" + + def _mk_intent(self, trade_id: str) -> KernelIntent: + return KernelIntent( + timestamp=datetime.now(timezone.utc), + intent_id=trade_id, + trade_id=trade_id, + slot_id=0, + asset="BTCUSDT", + side=TradeSide.SHORT, + action=KernelCommandType.ENTER, + reference_price=100.0, + target_size=1.0, + leverage=1.0, + exit_leg_ratios=(1.0,), + reason="i14-test", + ) + + def test_fresh_kernel_loads_zinc_non_idle_slots(self): + """Kernel 2 (simulated restart) must see the slot left by Kernel 1.""" + from prod.clean_arch.dita_v2.zinc_plane import InMemoryZincPlane + from prod.clean_arch.dita_v2.contracts import TradeStage + + shared_zinc = InMemoryZincPlane() + + # Kernel 1: ENTER a trade → Zinc gets the POSITION_OPEN slot + k1 = ExecutionKernel(max_slots=4, zinc_plane=shared_zinc) + k1.set_seed_capital(10_000.0) + result = k1.process_intent(self._mk_intent("i14-trade")) + assert result.accepted, f"ENTER failed: {result.diagnostic_code}" + + slot_k1 = k1._get_slot(0) + assert not slot_k1.is_free(), ( + f"Setup: slot should be non-idle after ENTER, got {slot_k1.fsm_state}" + ) + + # Kernel 2: fresh instance with the SAME shared_zinc (simulates restart) + k2 = ExecutionKernel(max_slots=4, zinc_plane=shared_zinc) + k2.set_seed_capital(10_000.0) + slot_k2 = k2._get_slot(0) + + assert not slot_k2.is_free(), ( + f"I14: restarted kernel must not see IDLE for a live slot; " + f"got fsm_state={slot_k2.fsm_state}" + ) + assert slot_k2.trade_id == "i14-trade", ( + f"I14: trade_id must survive restart, got {slot_k2.trade_id!r}" + ) + + def test_all_idle_zinc_does_not_corrupt(self): + """If Zinc only has IDLE slots, startup reconcile is a no-op.""" + from prod.clean_arch.dita_v2.zinc_plane import InMemoryZincPlane + + shared_zinc = InMemoryZincPlane() + # No trades — Zinc is empty + k = ExecutionKernel(max_slots=4, zinc_plane=shared_zinc) + k.set_seed_capital(10_000.0) + for sid in range(4): + assert k._get_slot(sid).is_free(), ( + f"Slot {sid} must be IDLE when Zinc has no live state" + ) + + +# --------------------------------------------------------------------------- +# O10: ExecutionKernel.close() + context manager +# --------------------------------------------------------------------------- + +class TestO10KernelClose: + """O10: close() must release the Rust handle deterministically; calling it + multiple times must not raise; context manager must call close().""" + + def test_close_nulls_backend(self): + k = _kernel() + assert k._backend is not None + k.close() + assert k._backend is None, "close() must null _backend to prevent double-free" + + def test_close_idempotent(self): + k = _kernel() + k.close() + k.close() # must not raise + + def test_context_manager_calls_close(self): + with ExecutionKernel(max_slots=2) as k: + k.set_seed_capital(5_000.0) + assert k._backend is not None + assert k._backend is None, "__exit__ must have called close()"