PINK: fix ctypes c_char_p null-byte truncation (INVALID_INTENT_PARSE)

_to_rust_bytes() centralises all Python→Rust JSON serialisation:
- _json_null_clean() strips U+0000 from all string values recursively
- ensure_ascii=True guarantees no 0x00 in output bytes
- All _json() call sites migrated; mode/verbosity now .encode("ascii")
- 9 null-safety unit tests added to TestRustBytesNullSafety

Root cause: ctypes.c_char_p silently truncates at first 0x00 byte,
causing serde_json "premature end of input at column 41" on EXIT intents
with BNB-USDT leverage values. Long-term fix: Rust FFI (ptr, len) pairs.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Codex
2026-06-03 18:30:10 +02:00
parent beef39eaf5
commit a89e766da1
2 changed files with 147 additions and 19 deletions

View File

@@ -50,6 +50,59 @@ def _repo_root() -> Path:
return Path(__file__).resolve().parents[3]
# ── Rust FFI JSON encoding ────────────────────────────────────────────────────
#
# All JSON that crosses the Python→Rust boundary via ctypes.c_char_p MUST be
# null-byte-free. ctypes.c_char_p is a C null-terminated string: the first
# 0x00 byte silently terminates the string, so Rust's serde_json parser only
# sees a truncated payload → "premature end of input at column N".
#
# Root cause: json.dumps(..., ensure_ascii=False).encode("utf-8") can produce
# 0x00 bytes when any string value contains the Unicode null character U+0000
# (e.g. from BingX event IDs, venue order IDs, or metadata fields).
#
# Two-layer fix:
# 1. _json_null_clean() — strip null chars from all string values before JSON
# 2. ensure_ascii=True — encode to ASCII; guarantees no 0x00 in output
# (UTF-8 encodes U+0000 as 0x00; ASCII refuses non-ASCII entirely)
#
# Long-term path: change the Rust FFI to accept (ptr, len) pairs instead of
# null-terminated strings, removing the c_char_p truncation class entirely.
# That requires Rust changes; this fix is safe and sufficient until then.
def _json_null_clean(obj: Any) -> Any:
"""Recursively replace U+0000 null chars in string values with U+FFFD.
Null chars are invisible in logs and repr() but produce 0x00 bytes in
UTF-8 encoding that silently truncate ctypes c_char_p payloads.
"""
if isinstance(obj, str):
return obj.replace("\x00", "<EFBFBD>") if "\x00" in obj else obj
if isinstance(obj, dict):
return {_json_null_clean(k): _json_null_clean(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [_json_null_clean(v) for v in obj]
return obj
def _to_rust_bytes(obj: Any) -> bytes:
"""Serialize *obj* to ASCII JSON bytes safe for ctypes.c_char_p.
Never raises; any remaining null byte after sanitization is replaced and
the anomaly is logged — so a future regression is visible immediately.
"""
cleaned = _json_null_clean(json_safe(obj))
encoded = json.dumps(cleaned, separators=(",", ":"), ensure_ascii=True).encode("ascii")
if b"\x00" in encoded:
import logging as _log
_log.getLogger(__name__).error(
"BUG: null byte in Rust-bound JSON after sanitization — replacing. "
"Inspect _json_null_clean() for uncovered type."
)
encoded = encoded.replace(b"\x00", b"?")
return encoded
def _crate_dir() -> Path:
return Path(__file__).resolve().with_name("_rust_kernel")
@@ -166,8 +219,7 @@ class _RustKernelLib:
return json.loads(self._take_string(raw))
def set_slot_json(self, handle: ctypes.c_void_p, slot_id: int, payload: Dict[str, Any]) -> None:
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(encoded))
rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(_to_rust_bytes(payload)))
if rc != 0:
raise RuntimeError(f"dita_kernel_set_slot_json failed rc={rc}")
@@ -179,12 +231,11 @@ class _RustKernelLib:
mode: str,
verbosity: str,
) -> Dict[str, Any]:
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
raw = self.lib.dita_kernel_process_intent_json(
handle,
ctypes.c_char_p(encoded),
ctypes.c_char_p(mode.encode("utf-8")),
ctypes.c_char_p(verbosity.encode("utf-8")),
ctypes.c_char_p(_to_rust_bytes(payload)),
ctypes.c_char_p(mode.encode("ascii")),
ctypes.c_char_p(verbosity.encode("ascii")),
)
return json.loads(self._take_string(raw))
@@ -196,12 +247,11 @@ class _RustKernelLib:
mode: str,
verbosity: str,
) -> Dict[str, Any]:
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
raw = self.lib.dita_kernel_on_venue_event_json(
handle,
ctypes.c_char_p(encoded),
ctypes.c_char_p(mode.encode("utf-8")),
ctypes.c_char_p(verbosity.encode("utf-8")),
ctypes.c_char_p(_to_rust_bytes(payload)),
ctypes.c_char_p(mode.encode("ascii")),
ctypes.c_char_p(verbosity.encode("ascii")),
)
return json.loads(self._take_string(raw))
@@ -213,12 +263,11 @@ class _RustKernelLib:
mode: str,
verbosity: str,
) -> Dict[str, Any]:
encoded = json.dumps(json_safe(list(payload)), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
raw = self.lib.dita_kernel_reconcile_slots_json(
handle,
ctypes.c_char_p(encoded),
ctypes.c_char_p(mode.encode("utf-8")),
ctypes.c_char_p(verbosity.encode("utf-8")),
ctypes.c_char_p(_to_rust_bytes(list(payload))),
ctypes.c_char_p(mode.encode("ascii")),
ctypes.c_char_p(verbosity.encode("ascii")),
)
return json.loads(self._take_string(raw))
@@ -231,12 +280,12 @@ class _RustKernelLib:
return rc == 0
def set_exchange_config(self, handle: ctypes.c_void_p, config: Dict[str, Any]) -> bool:
encoded = json.dumps(config, separators=(",", ":")).encode("utf-8")
encoded = _to_rust_bytes(config)
rc = self.lib.dita_kernel_set_exchange_config_json(handle, ctypes.c_char_p(encoded))
return rc == 0
def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float, is_maker: bool = False) -> Dict[str, Any]:
payload = json.dumps({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker}).encode("utf-8")
payload = _to_rust_bytes({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker})
raw = self.lib.dita_kernel_calibrate_fee_json(handle, ctypes.c_char_p(payload))
if not raw:
return {}
@@ -245,8 +294,7 @@ class _RustKernelLib:
def on_account_event(
self, handle: ctypes.c_void_p, event: Dict[str, Any]
) -> Dict[str, Any]:
encoded = json.dumps(json_safe(event), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(encoded))
raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(_to_rust_bytes(event)))
if not raw:
return {}
return json.loads(self._take_string(raw))
@@ -260,7 +308,10 @@ class _RustKernelLib:
def restore_state(self, handle: ctypes.c_void_p, json_str: str) -> bool:
"""Restore kernel from a previously saved JSON blob. Returns True on success."""
rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(json_str.encode("utf-8")))
# json_str comes from Rust's save_state_json — re-encode via _to_rust_bytes so any
# embedded null chars are sanitized rather than silently truncating the restore.
encoded = _to_rust_bytes(json.loads(json_str))
rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(encoded))
return rc == 0
def is_capital_frozen(self, handle: ctypes.c_void_p) -> bool:

View File

@@ -1479,3 +1479,80 @@ class TestNormalizeEngForTui:
eng = {"open_positions": 0, "slot": {}}
out = self._norm(eng)
assert out["open_positions"] == [], "zero open_positions must become empty list"
# ============================================================
# _to_rust_bytes / _json_null_clean — null-byte safety
# ============================================================
class TestRustBytesNullSafety:
"""_to_rust_bytes must never produce a 0x00 byte in its output.
Root cause: ctypes.c_char_p treats the first 0x00 as a C null terminator,
silently truncating the JSON before Rust's serde_json sees the full payload.
Reproduces the INVALID_INTENT_PARSE bug seen during BingX VST smoke test.
"""
def _encode(self, payload):
from prod.clean_arch.dita_v2.rust_backend import _to_rust_bytes
return _to_rust_bytes(payload)
def _clean(self, obj):
from prod.clean_arch.dita_v2.rust_backend import _json_null_clean
return _json_null_clean(obj)
def test_no_null_bytes_in_normal_exit_intent(self):
payload = {
"action": "EXIT",
"asset": "BNB-USDT",
"leverage": 1.3465735902799727,
"target_size": 1.76,
"reference_price": 66337.09,
"limit_price": 0.0,
"trade_id": "t1",
"metadata": {},
}
encoded = self._encode(payload)
assert b"\x00" not in encoded, "EXIT intent must have no null bytes"
def test_no_null_bytes_when_string_contains_u0000(self):
"""A string value containing \\u0000 must not produce a null byte in output."""
payload = {"event_id": "BX\x00data", "price": 100.0}
encoded = self._encode(payload)
assert b"\x00" not in encoded, "Null char in string must not produce null byte"
def test_no_null_bytes_in_seen_event_ids(self):
"""seen_event_ids list is serialized with all other slot fields."""
payload = {"seen_event_ids": ["123", "456\x00789", "999"], "size": 1.76}
encoded = self._encode(payload)
assert b"\x00" not in encoded, "seen_event_ids with null chars must be clean"
def test_no_null_bytes_in_nested_metadata(self):
payload = {"metadata": {"venue_note": "order\x00ok", "id": 42}, "asset": "ENJ-USDT"}
encoded = self._encode(payload)
assert b"\x00" not in encoded, "Nested metadata null chars must be sanitized"
def test_output_is_valid_json(self):
import json
payload = {"action": "ENTER", "asset": "BNB-USDT", "leverage": 2.7, "seen_event_ids": ["e1"]}
encoded = self._encode(payload)
parsed = json.loads(encoded)
assert parsed["action"] == "ENTER"
def test_json_null_clean_replaces_null_in_string(self):
result = self._clean({"key": "val\x00ue"})
assert "\x00" not in result["key"]
assert "val" in result["key"]
def test_json_null_clean_recursion(self):
obj = {"nested": {"list": ["a\x00b", 1, {"deep": "x\x00y"}]}}
cleaned = self._clean(obj)
assert "\x00" not in cleaned["nested"]["list"][0]
assert "\x00" not in cleaned["nested"]["list"][2]["deep"]
def test_normal_ascii_payload_roundtrips_intact(self):
import json
payload = {"action": "EXIT", "asset": "BTC-USDT", "leverage": 1.5, "size": 0.001}
encoded = self._encode(payload)
assert json.loads(encoded)["asset"] == "BTC-USDT"
assert json.loads(encoded)["leverage"] == 1.5