54 lines
2.4 KiB
Python
54 lines
2.4 KiB
Python
|
|
"""Quick smoke test: Arrow adapter + Arrow->Parquet converter."""
|
||
|
|
import sys, logging
|
||
|
|
sys.path.insert(0, r'.')
|
||
|
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s %(message)s')
|
||
|
|
|
||
|
|
from datetime import datetime
|
||
|
|
from nautilus_dolphin.nautilus.arrow_data_adapter import (
|
||
|
|
ArrowEigenvalueDataAdapter,
|
||
|
|
ArrowToParquetBatchConverter,
|
||
|
|
)
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
ARROW_DIR = r'c:\Users\Lenovo\Documents\- Dolphin NG5\correlation_arb512\arrow_scans'
|
||
|
|
PARQUET_OUT = r'vbt_cache_ng5'
|
||
|
|
DATE = '2026-02-25'
|
||
|
|
|
||
|
|
# ── 1: Arrow adapter ──────────────────────────────────────────────────────────
|
||
|
|
print("\n=== ARROW ADAPTER ===")
|
||
|
|
adapter = ArrowEigenvalueDataAdapter(ARROW_DIR, venue='BINANCE_FUTURES')
|
||
|
|
files = adapter.load_date_range(datetime(2026,2,25), datetime(2026,2,25))
|
||
|
|
print(f"Files found: {len(files)}")
|
||
|
|
assert len(files) > 0, "No files found!"
|
||
|
|
|
||
|
|
ok = 0
|
||
|
|
for f in files[:5]:
|
||
|
|
scan = adapter.load_scan_file(f)
|
||
|
|
if scan is None:
|
||
|
|
print(f" SKIP: {f.name}")
|
||
|
|
continue
|
||
|
|
w50 = scan['windows'].get('50', {}).get('tracking_data', {})
|
||
|
|
w150 = scan['windows'].get('150', {}).get('tracking_data', {})
|
||
|
|
vd = w50.get('lambda_max_velocity', 0) - w150.get('lambda_max_velocity', 0)
|
||
|
|
prices = scan.get('pricing_data', {}).get('current_prices', {})
|
||
|
|
print(f" {f.name}: vel_div={vd:.6f} prices_count={len(prices)}")
|
||
|
|
ok += 1
|
||
|
|
|
||
|
|
assert ok > 0, "No scans could be read!"
|
||
|
|
print(f"[PASS] Arrow adapter: {ok}/5 scans read")
|
||
|
|
|
||
|
|
# ── 2: Arrow -> Parquet ───────────────────────────────────────────────────────
|
||
|
|
print("\n=== ARROW → PARQUET ===")
|
||
|
|
conv = ArrowToParquetBatchConverter(ARROW_DIR, PARQUET_OUT)
|
||
|
|
out = conv.convert_date(DATE)
|
||
|
|
assert out is not None, "Parquet conversion returned None!"
|
||
|
|
df = pd.read_parquet(out)
|
||
|
|
print(f"Parquet rows: {len(df)}")
|
||
|
|
print(f"Columns: {list(df.columns[:10])}")
|
||
|
|
assert len(df) > 0, "Empty Parquet!"
|
||
|
|
assert 'v50_lambda_max_velocity' in df.columns, "Missing velocity column!"
|
||
|
|
print(df[['timestamp','scan_number','v50_lambda_max_velocity','vel_div']].head(3).to_string())
|
||
|
|
print(f"[PASS] Arrow→Parquet conversion: {len(df)} rows, {len(df.columns)} columns")
|
||
|
|
|
||
|
|
print("\n===== ALL TESTS PASSED =====")
|