PINK: fix ctypes c_char_p null-byte truncation (INVALID_INTENT_PARSE)
_to_rust_bytes() centralises all Python→Rust JSON serialisation:
- _json_null_clean() strips U+0000 from all string values recursively
- ensure_ascii=True guarantees no 0x00 in output bytes
- All _json() call sites migrated; mode/verbosity now .encode("ascii")
- 9 null-safety unit tests added to TestRustBytesNullSafety
Root cause: ctypes.c_char_p silently truncates at first 0x00 byte,
causing serde_json "premature end of input at column 41" on EXIT intents
with BNB-USDT leverage values. Long-term fix: Rust FFI (ptr, len) pairs.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -50,6 +50,59 @@ def _repo_root() -> Path:
|
|||||||
return Path(__file__).resolve().parents[3]
|
return Path(__file__).resolve().parents[3]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Rust FFI JSON encoding ────────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# All JSON that crosses the Python→Rust boundary via ctypes.c_char_p MUST be
|
||||||
|
# null-byte-free. ctypes.c_char_p is a C null-terminated string: the first
|
||||||
|
# 0x00 byte silently terminates the string, so Rust's serde_json parser only
|
||||||
|
# sees a truncated payload → "premature end of input at column N".
|
||||||
|
#
|
||||||
|
# Root cause: json.dumps(..., ensure_ascii=False).encode("utf-8") can produce
|
||||||
|
# 0x00 bytes when any string value contains the Unicode null character U+0000
|
||||||
|
# (e.g. from BingX event IDs, venue order IDs, or metadata fields).
|
||||||
|
#
|
||||||
|
# Two-layer fix:
|
||||||
|
# 1. _json_null_clean() — strip null chars from all string values before JSON
|
||||||
|
# 2. ensure_ascii=True — encode to ASCII; guarantees no 0x00 in output
|
||||||
|
# (UTF-8 encodes U+0000 as 0x00; ASCII refuses non-ASCII entirely)
|
||||||
|
#
|
||||||
|
# Long-term path: change the Rust FFI to accept (ptr, len) pairs instead of
|
||||||
|
# null-terminated strings, removing the c_char_p truncation class entirely.
|
||||||
|
# That requires Rust changes; this fix is safe and sufficient until then.
|
||||||
|
|
||||||
|
def _json_null_clean(obj: Any) -> Any:
|
||||||
|
"""Recursively replace U+0000 null chars in string values with U+FFFD.
|
||||||
|
|
||||||
|
Null chars are invisible in logs and repr() but produce 0x00 bytes in
|
||||||
|
UTF-8 encoding that silently truncate ctypes c_char_p payloads.
|
||||||
|
"""
|
||||||
|
if isinstance(obj, str):
|
||||||
|
return obj.replace("\x00", "<EFBFBD>") if "\x00" in obj else obj
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {_json_null_clean(k): _json_null_clean(v) for k, v in obj.items()}
|
||||||
|
if isinstance(obj, (list, tuple)):
|
||||||
|
return [_json_null_clean(v) for v in obj]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def _to_rust_bytes(obj: Any) -> bytes:
|
||||||
|
"""Serialize *obj* to ASCII JSON bytes safe for ctypes.c_char_p.
|
||||||
|
|
||||||
|
Never raises; any remaining null byte after sanitization is replaced and
|
||||||
|
the anomaly is logged — so a future regression is visible immediately.
|
||||||
|
"""
|
||||||
|
cleaned = _json_null_clean(json_safe(obj))
|
||||||
|
encoded = json.dumps(cleaned, separators=(",", ":"), ensure_ascii=True).encode("ascii")
|
||||||
|
if b"\x00" in encoded:
|
||||||
|
import logging as _log
|
||||||
|
_log.getLogger(__name__).error(
|
||||||
|
"BUG: null byte in Rust-bound JSON after sanitization — replacing. "
|
||||||
|
"Inspect _json_null_clean() for uncovered type."
|
||||||
|
)
|
||||||
|
encoded = encoded.replace(b"\x00", b"?")
|
||||||
|
return encoded
|
||||||
|
|
||||||
|
|
||||||
def _crate_dir() -> Path:
|
def _crate_dir() -> Path:
|
||||||
return Path(__file__).resolve().with_name("_rust_kernel")
|
return Path(__file__).resolve().with_name("_rust_kernel")
|
||||||
|
|
||||||
@@ -166,8 +219,7 @@ class _RustKernelLib:
|
|||||||
return json.loads(self._take_string(raw))
|
return json.loads(self._take_string(raw))
|
||||||
|
|
||||||
def set_slot_json(self, handle: ctypes.c_void_p, slot_id: int, payload: Dict[str, Any]) -> None:
|
def set_slot_json(self, handle: ctypes.c_void_p, slot_id: int, payload: Dict[str, Any]) -> None:
|
||||||
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(_to_rust_bytes(payload)))
|
||||||
rc = self.lib.dita_kernel_set_slot_json(handle, ctypes.c_size_t(slot_id), ctypes.c_char_p(encoded))
|
|
||||||
if rc != 0:
|
if rc != 0:
|
||||||
raise RuntimeError(f"dita_kernel_set_slot_json failed rc={rc}")
|
raise RuntimeError(f"dita_kernel_set_slot_json failed rc={rc}")
|
||||||
|
|
||||||
@@ -179,12 +231,11 @@ class _RustKernelLib:
|
|||||||
mode: str,
|
mode: str,
|
||||||
verbosity: str,
|
verbosity: str,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
|
||||||
raw = self.lib.dita_kernel_process_intent_json(
|
raw = self.lib.dita_kernel_process_intent_json(
|
||||||
handle,
|
handle,
|
||||||
ctypes.c_char_p(encoded),
|
ctypes.c_char_p(_to_rust_bytes(payload)),
|
||||||
ctypes.c_char_p(mode.encode("utf-8")),
|
ctypes.c_char_p(mode.encode("ascii")),
|
||||||
ctypes.c_char_p(verbosity.encode("utf-8")),
|
ctypes.c_char_p(verbosity.encode("ascii")),
|
||||||
)
|
)
|
||||||
return json.loads(self._take_string(raw))
|
return json.loads(self._take_string(raw))
|
||||||
|
|
||||||
@@ -196,12 +247,11 @@ class _RustKernelLib:
|
|||||||
mode: str,
|
mode: str,
|
||||||
verbosity: str,
|
verbosity: str,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
encoded = json.dumps(json_safe(payload), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
|
||||||
raw = self.lib.dita_kernel_on_venue_event_json(
|
raw = self.lib.dita_kernel_on_venue_event_json(
|
||||||
handle,
|
handle,
|
||||||
ctypes.c_char_p(encoded),
|
ctypes.c_char_p(_to_rust_bytes(payload)),
|
||||||
ctypes.c_char_p(mode.encode("utf-8")),
|
ctypes.c_char_p(mode.encode("ascii")),
|
||||||
ctypes.c_char_p(verbosity.encode("utf-8")),
|
ctypes.c_char_p(verbosity.encode("ascii")),
|
||||||
)
|
)
|
||||||
return json.loads(self._take_string(raw))
|
return json.loads(self._take_string(raw))
|
||||||
|
|
||||||
@@ -213,12 +263,11 @@ class _RustKernelLib:
|
|||||||
mode: str,
|
mode: str,
|
||||||
verbosity: str,
|
verbosity: str,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
encoded = json.dumps(json_safe(list(payload)), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
|
||||||
raw = self.lib.dita_kernel_reconcile_slots_json(
|
raw = self.lib.dita_kernel_reconcile_slots_json(
|
||||||
handle,
|
handle,
|
||||||
ctypes.c_char_p(encoded),
|
ctypes.c_char_p(_to_rust_bytes(list(payload))),
|
||||||
ctypes.c_char_p(mode.encode("utf-8")),
|
ctypes.c_char_p(mode.encode("ascii")),
|
||||||
ctypes.c_char_p(verbosity.encode("utf-8")),
|
ctypes.c_char_p(verbosity.encode("ascii")),
|
||||||
)
|
)
|
||||||
return json.loads(self._take_string(raw))
|
return json.loads(self._take_string(raw))
|
||||||
|
|
||||||
@@ -231,12 +280,12 @@ class _RustKernelLib:
|
|||||||
return rc == 0
|
return rc == 0
|
||||||
|
|
||||||
def set_exchange_config(self, handle: ctypes.c_void_p, config: Dict[str, Any]) -> bool:
|
def set_exchange_config(self, handle: ctypes.c_void_p, config: Dict[str, Any]) -> bool:
|
||||||
encoded = json.dumps(config, separators=(",", ":")).encode("utf-8")
|
encoded = _to_rust_bytes(config)
|
||||||
rc = self.lib.dita_kernel_set_exchange_config_json(handle, ctypes.c_char_p(encoded))
|
rc = self.lib.dita_kernel_set_exchange_config_json(handle, ctypes.c_char_p(encoded))
|
||||||
return rc == 0
|
return rc == 0
|
||||||
|
|
||||||
def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float, is_maker: bool = False) -> Dict[str, Any]:
|
def calibrate_fee(self, handle: ctypes.c_void_p, fill_price: float, fill_qty: float, actual_fee: float, is_maker: bool = False) -> Dict[str, Any]:
|
||||||
payload = json.dumps({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker}).encode("utf-8")
|
payload = _to_rust_bytes({"fill_price": fill_price, "fill_qty": fill_qty, "actual_fee": actual_fee, "is_maker": is_maker})
|
||||||
raw = self.lib.dita_kernel_calibrate_fee_json(handle, ctypes.c_char_p(payload))
|
raw = self.lib.dita_kernel_calibrate_fee_json(handle, ctypes.c_char_p(payload))
|
||||||
if not raw:
|
if not raw:
|
||||||
return {}
|
return {}
|
||||||
@@ -245,8 +294,7 @@ class _RustKernelLib:
|
|||||||
def on_account_event(
|
def on_account_event(
|
||||||
self, handle: ctypes.c_void_p, event: Dict[str, Any]
|
self, handle: ctypes.c_void_p, event: Dict[str, Any]
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
encoded = json.dumps(json_safe(event), separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(_to_rust_bytes(event)))
|
||||||
raw = self.lib.dita_kernel_on_account_event_json(handle, ctypes.c_char_p(encoded))
|
|
||||||
if not raw:
|
if not raw:
|
||||||
return {}
|
return {}
|
||||||
return json.loads(self._take_string(raw))
|
return json.loads(self._take_string(raw))
|
||||||
@@ -260,7 +308,10 @@ class _RustKernelLib:
|
|||||||
|
|
||||||
def restore_state(self, handle: ctypes.c_void_p, json_str: str) -> bool:
|
def restore_state(self, handle: ctypes.c_void_p, json_str: str) -> bool:
|
||||||
"""Restore kernel from a previously saved JSON blob. Returns True on success."""
|
"""Restore kernel from a previously saved JSON blob. Returns True on success."""
|
||||||
rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(json_str.encode("utf-8")))
|
# json_str comes from Rust's save_state_json — re-encode via _to_rust_bytes so any
|
||||||
|
# embedded null chars are sanitized rather than silently truncating the restore.
|
||||||
|
encoded = _to_rust_bytes(json.loads(json_str))
|
||||||
|
rc = self.lib.dita_kernel_restore_state_json(handle, ctypes.c_char_p(encoded))
|
||||||
return rc == 0
|
return rc == 0
|
||||||
|
|
||||||
def is_capital_frozen(self, handle: ctypes.c_void_p) -> bool:
|
def is_capital_frozen(self, handle: ctypes.c_void_p) -> bool:
|
||||||
|
|||||||
@@ -1479,3 +1479,80 @@ class TestNormalizeEngForTui:
|
|||||||
eng = {"open_positions": 0, "slot": {}}
|
eng = {"open_positions": 0, "slot": {}}
|
||||||
out = self._norm(eng)
|
out = self._norm(eng)
|
||||||
assert out["open_positions"] == [], "zero open_positions must become empty list"
|
assert out["open_positions"] == [], "zero open_positions must become empty list"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# _to_rust_bytes / _json_null_clean — null-byte safety
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
class TestRustBytesNullSafety:
|
||||||
|
"""_to_rust_bytes must never produce a 0x00 byte in its output.
|
||||||
|
|
||||||
|
Root cause: ctypes.c_char_p treats the first 0x00 as a C null terminator,
|
||||||
|
silently truncating the JSON before Rust's serde_json sees the full payload.
|
||||||
|
Reproduces the INVALID_INTENT_PARSE bug seen during BingX VST smoke test.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _encode(self, payload):
|
||||||
|
from prod.clean_arch.dita_v2.rust_backend import _to_rust_bytes
|
||||||
|
return _to_rust_bytes(payload)
|
||||||
|
|
||||||
|
def _clean(self, obj):
|
||||||
|
from prod.clean_arch.dita_v2.rust_backend import _json_null_clean
|
||||||
|
return _json_null_clean(obj)
|
||||||
|
|
||||||
|
def test_no_null_bytes_in_normal_exit_intent(self):
|
||||||
|
payload = {
|
||||||
|
"action": "EXIT",
|
||||||
|
"asset": "BNB-USDT",
|
||||||
|
"leverage": 1.3465735902799727,
|
||||||
|
"target_size": 1.76,
|
||||||
|
"reference_price": 66337.09,
|
||||||
|
"limit_price": 0.0,
|
||||||
|
"trade_id": "t1",
|
||||||
|
"metadata": {},
|
||||||
|
}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
assert b"\x00" not in encoded, "EXIT intent must have no null bytes"
|
||||||
|
|
||||||
|
def test_no_null_bytes_when_string_contains_u0000(self):
|
||||||
|
"""A string value containing \\u0000 must not produce a null byte in output."""
|
||||||
|
payload = {"event_id": "BX\x00data", "price": 100.0}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
assert b"\x00" not in encoded, "Null char in string must not produce null byte"
|
||||||
|
|
||||||
|
def test_no_null_bytes_in_seen_event_ids(self):
|
||||||
|
"""seen_event_ids list is serialized with all other slot fields."""
|
||||||
|
payload = {"seen_event_ids": ["123", "456\x00789", "999"], "size": 1.76}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
assert b"\x00" not in encoded, "seen_event_ids with null chars must be clean"
|
||||||
|
|
||||||
|
def test_no_null_bytes_in_nested_metadata(self):
|
||||||
|
payload = {"metadata": {"venue_note": "order\x00ok", "id": 42}, "asset": "ENJ-USDT"}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
assert b"\x00" not in encoded, "Nested metadata null chars must be sanitized"
|
||||||
|
|
||||||
|
def test_output_is_valid_json(self):
|
||||||
|
import json
|
||||||
|
payload = {"action": "ENTER", "asset": "BNB-USDT", "leverage": 2.7, "seen_event_ids": ["e1"]}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
parsed = json.loads(encoded)
|
||||||
|
assert parsed["action"] == "ENTER"
|
||||||
|
|
||||||
|
def test_json_null_clean_replaces_null_in_string(self):
|
||||||
|
result = self._clean({"key": "val\x00ue"})
|
||||||
|
assert "\x00" not in result["key"]
|
||||||
|
assert "val" in result["key"]
|
||||||
|
|
||||||
|
def test_json_null_clean_recursion(self):
|
||||||
|
obj = {"nested": {"list": ["a\x00b", 1, {"deep": "x\x00y"}]}}
|
||||||
|
cleaned = self._clean(obj)
|
||||||
|
assert "\x00" not in cleaned["nested"]["list"][0]
|
||||||
|
assert "\x00" not in cleaned["nested"]["list"][2]["deep"]
|
||||||
|
|
||||||
|
def test_normal_ascii_payload_roundtrips_intact(self):
|
||||||
|
import json
|
||||||
|
payload = {"action": "EXIT", "asset": "BTC-USDT", "leverage": 1.5, "size": 0.001}
|
||||||
|
encoded = self._encode(payload)
|
||||||
|
assert json.loads(encoded)["asset"] == "BTC-USDT"
|
||||||
|
assert json.loads(encoded)["leverage"] == 1.5
|
||||||
|
|||||||
Reference in New Issue
Block a user