From a89e766da1bc07c1c7f9526de1cf90b27ccd5023 Mon Sep 17 00:00:00 2001 From: Codex Date: Wed, 3 Jun 2026 18:30:10 +0200 Subject: [PATCH] PINK: fix ctypes c_char_p null-byte truncation (INVALID_INTENT_PARSE) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _to_rust_bytes() centralises all Python→Rust JSON serialisation: - _json_null_clean() strips U+0000 from all string values recursively - ensure_ascii=True guarantees no 0x00 in output bytes - All _json() call sites migrated; mode/verbosity now .encode("ascii") - 9 null-safety unit tests added to TestRustBytesNullSafety Root cause: ctypes.c_char_p silently truncates at first 0x00 byte, causing serde_json "premature end of input at column 41" on EXIT intents with BNB-USDT leverage values. Long-term fix: Rust FFI (ptr, len) pairs. Co-Authored-By: Claude Sonnet 4.6 --- prod/clean_arch/dita_v2/rust_backend.py | 89 +++++++++++++++++++------ prod/clean_arch/dita_v2/test_flaws.py | 77 +++++++++++++++++++++ 2 files changed, 147 insertions(+), 19 deletions(-) diff --git a/prod/clean_arch/dita_v2/rust_backend.py b/prod/clean_arch/dita_v2/rust_backend.py index 390c26a..add817f 100644 --- a/prod/clean_arch/dita_v2/rust_backend.py +++ b/prod/clean_arch/dita_v2/rust_backend.py @@ -50,6 +50,59 @@ def _repo_root() -> Path: return Path(__file__).resolve().parents[3] +# ── Rust FFI JSON encoding ──────────────────────────────────────────────────── +# +# All JSON that crosses the Python→Rust boundary via ctypes.c_char_p MUST be +# null-byte-free. ctypes.c_char_p is a C null-terminated string: the first +# 0x00 byte silently terminates the string, so Rust's serde_json parser only +# sees a truncated payload → "premature end of input at column N". +# +# Root cause: json.dumps(..., ensure_ascii=False).encode("utf-8") can produce +# 0x00 bytes when any string value contains the Unicode null character U+0000 +# (e.g. from BingX event IDs, venue order IDs, or metadata fields). +# +# Two-layer fix: +# 1. _json_null_clean() — strip null chars from all string values before JSON +# 2. ensure_ascii=True — encode to ASCII; guarantees no 0x00 in output +# (UTF-8 encodes U+0000 as 0x00; ASCII refuses non-ASCII entirely) +# +# Long-term path: change the Rust FFI to accept (ptr, len) pairs instead of +# null-terminated strings, removing the c_char_p truncation class entirely. +# That requires Rust changes; this fix is safe and sufficient until then. + +def _json_null_clean(obj: Any) -> Any: + """Recursively replace U+0000 null chars in string values with U+FFFD. + + Null chars are invisible in logs and repr() but produce 0x00 bytes in + UTF-8 encoding that silently truncate ctypes c_char_p payloads. + """ + if isinstance(obj, str): + return obj.replace("\x00", "�") if "\x00" in obj else obj + if isinstance(obj, dict): + return {_json_null_clean(k): _json_null_clean(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [_json_null_clean(v) for v in obj] + return obj + + +def _to_rust_bytes(obj: Any) -> bytes: + """Serialize *obj* to ASCII JSON bytes safe for ctypes.c_char_p. + + Never raises; any remaining null byte after sanitization is replaced and + the anomaly is logged — so a future regression is visible immediately. + """ + cleaned = _json_null_clean(json_safe(obj)) + encoded = json.dumps(cleaned, separators=(",", ":"), ensure_ascii=True).encode("ascii") + if b"\x00" in encoded: + import logging as _log + _log.getLogger(__name__).error( + "BUG: null byte in Rust-bound JSON after sanitization — replacing. " + "Inspect _json_null_clean() for uncovered type." + ) + encoded = encoded.replace(b"\x00", b"?") + return encoded + + def _crate_dir() -> Path: return Path(__file__).resolve().with_name("_rust_kernel") @@ -166,8 +219,7 @@ class _RustKernelLib: return json.loads(self._take_string(raw)) def set_slot_json(self, handle: ctypes.c_void_p, slot_id: int, payload: Dict[str, Any]) -> None: - encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8") - rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(encoded)) + rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(_to_rust_bytes(payload))) if rc != 0: raise RuntimeError(f"dita_kernel_set_slot_json failed rc={rc}") @@ -179,12 +231,11 @@ class _RustKernelLib: mode: str, verbosity: str, ) -> Dict[str, Any]: - encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8") raw = self.lib.dita_kernel_process_intent_json( handle, - ctypes.c_char_p(encoded), - ctypes.c_char_p(mode.encode("utf-8")), - ctypes.c_char_p(verbosity.encode("utf-8")), + ctypes.c_char_p(_to_rust_bytes(payload)), + ctypes.c_char_p(mode.encode("ascii")), + ctypes.c_char_p(verbosity.encode("ascii")), ) return json.loads(self._take_string(raw)) @@ -196,12 +247,11 @@ class _RustKernelLib: mode: str, verbosity: str, ) -> Dict[str, Any]: - encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8") raw = self.lib.dita_kernel_on_venue_event_json( handle, - ctypes.c_char_p(encoded), - ctypes.c_char_p(mode.encode("utf-8")), - ctypes.c_char_p(verbosity.encode("utf-8")), + ctypes.c_char_p(_to_rust_bytes(payload)), + ctypes.c_char_p(mode.encode("ascii")), + ctypes.c_char_p(verbosity.encode("ascii")), ) return json.loads(self._take_string(raw)) @@ -213,12 +263,11 @@ class _RustKernelLib: mode: str, verbosity: str, ) -> Dict[str, Any]: - encoded = json.dumps(json_safe(list(payload)), separators=(",", ":"), ensure_ascii=False).encode("utf-8") raw = self.lib.dita_kernel_reconcile_slots_json( handle, - ctypes.c_char_p(encoded), - ctypes.c_char_p(mode.encode("utf-8")), - ctypes.c_char_p(verbosity.encode("utf-8")), + ctypes.c_char_p(_to_rust_bytes(list(payload))), + ctypes.c_char_p(mode.encode("ascii")), + ctypes.c_char_p(verbosity.encode("ascii")), ) return json.loads(self._take_string(raw)) @@ -231,12 +280,12 @@ class _RustKernelLib: return rc == 0 def set_exchange_config(self, handle: ctypes.c_void_p, config: Dict[str, Any]) -> bool: - encoded = json.dumps(config, separators=(",", ":")).encode("utf-8") + encoded = _to_rust_bytes(config) rc = self.lib.dita_kernel_set_exchange_config_json(handle, ctypes.c_char_p(encoded)) return rc == 0 def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float, is_maker: bool = False) -> Dict[str, Any]: - payload = json.dumps({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker}).encode("utf-8") + payload = _to_rust_bytes({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker}) raw = self.lib.dita_kernel_calibrate_fee_json(handle, ctypes.c_char_p(payload)) if not raw: return {} @@ -245,8 +294,7 @@ class _RustKernelLib: def on_account_event( self, handle: ctypes.c_void_p, event: Dict[str, Any] ) -> Dict[str, Any]: - encoded = json.dumps(json_safe(event), separators=(",", ":"), ensure_ascii=False).encode("utf-8") - raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(encoded)) + raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(_to_rust_bytes(event))) if not raw: return {} return json.loads(self._take_string(raw)) @@ -260,7 +308,10 @@ class _RustKernelLib: def restore_state(self, handle: ctypes.c_void_p, json_str: str) -> bool: """Restore kernel from a previously saved JSON blob. Returns True on success.""" - rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(json_str.encode("utf-8"))) + # json_str comes from Rust's save_state_json — re-encode via _to_rust_bytes so any + # embedded null chars are sanitized rather than silently truncating the restore. + encoded = _to_rust_bytes(json.loads(json_str)) + rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(encoded)) return rc == 0 def is_capital_frozen(self, handle: ctypes.c_void_p) -> bool: diff --git a/prod/clean_arch/dita_v2/test_flaws.py b/prod/clean_arch/dita_v2/test_flaws.py index bebc8cc..0426449 100644 --- a/prod/clean_arch/dita_v2/test_flaws.py +++ b/prod/clean_arch/dita_v2/test_flaws.py @@ -1479,3 +1479,80 @@ class TestNormalizeEngForTui: eng = {"open_positions": 0, "slot": {}} out = self._norm(eng) assert out["open_positions"] == [], "zero open_positions must become empty list" + + +# ============================================================ +# _to_rust_bytes / _json_null_clean — null-byte safety +# ============================================================ + +class TestRustBytesNullSafety: + """_to_rust_bytes must never produce a 0x00 byte in its output. + + Root cause: ctypes.c_char_p treats the first 0x00 as a C null terminator, + silently truncating the JSON before Rust's serde_json sees the full payload. + Reproduces the INVALID_INTENT_PARSE bug seen during BingX VST smoke test. + """ + + def _encode(self, payload): + from prod.clean_arch.dita_v2.rust_backend import _to_rust_bytes + return _to_rust_bytes(payload) + + def _clean(self, obj): + from prod.clean_arch.dita_v2.rust_backend import _json_null_clean + return _json_null_clean(obj) + + def test_no_null_bytes_in_normal_exit_intent(self): + payload = { + "action": "EXIT", + "asset": "BNB-USDT", + "leverage": 1.3465735902799727, + "target_size": 1.76, + "reference_price": 66337.09, + "limit_price": 0.0, + "trade_id": "t1", + "metadata": {}, + } + encoded = self._encode(payload) + assert b"\x00" not in encoded, "EXIT intent must have no null bytes" + + def test_no_null_bytes_when_string_contains_u0000(self): + """A string value containing \\u0000 must not produce a null byte in output.""" + payload = {"event_id": "BX\x00data", "price": 100.0} + encoded = self._encode(payload) + assert b"\x00" not in encoded, "Null char in string must not produce null byte" + + def test_no_null_bytes_in_seen_event_ids(self): + """seen_event_ids list is serialized with all other slot fields.""" + payload = {"seen_event_ids": ["123", "456\x00789", "999"], "size": 1.76} + encoded = self._encode(payload) + assert b"\x00" not in encoded, "seen_event_ids with null chars must be clean" + + def test_no_null_bytes_in_nested_metadata(self): + payload = {"metadata": {"venue_note": "order\x00ok", "id": 42}, "asset": "ENJ-USDT"} + encoded = self._encode(payload) + assert b"\x00" not in encoded, "Nested metadata null chars must be sanitized" + + def test_output_is_valid_json(self): + import json + payload = {"action": "ENTER", "asset": "BNB-USDT", "leverage": 2.7, "seen_event_ids": ["e1"]} + encoded = self._encode(payload) + parsed = json.loads(encoded) + assert parsed["action"] == "ENTER" + + def test_json_null_clean_replaces_null_in_string(self): + result = self._clean({"key": "val\x00ue"}) + assert "\x00" not in result["key"] + assert "val" in result["key"] + + def test_json_null_clean_recursion(self): + obj = {"nested": {"list": ["a\x00b", 1, {"deep": "x\x00y"}]}} + cleaned = self._clean(obj) + assert "\x00" not in cleaned["nested"]["list"][0] + assert "\x00" not in cleaned["nested"]["list"][2]["deep"] + + def test_normal_ascii_payload_roundtrips_intact(self): + import json + payload = {"action": "EXIT", "asset": "BTC-USDT", "leverage": 1.5, "size": 0.001} + encoded = self._encode(payload) + assert json.loads(encoded)["asset"] == "BTC-USDT" + assert json.loads(encoded)["leverage"] == 1.5