Files
DOLPHIN/nautilus_dolphin/test_arrow_adapter_quick.py

54 lines
2.4 KiB
Python
Raw Normal View History

"""Quick smoke test: Arrow adapter + Arrow->Parquet converter."""
import sys, logging
sys.path.insert(0, r'.')
logging.basicConfig(level=logging.INFO, format='%(levelname)s %(message)s')
from datetime import datetime
from nautilus_dolphin.nautilus.arrow_data_adapter import (
ArrowEigenvalueDataAdapter,
ArrowToParquetBatchConverter,
)
import pandas as pd
ARROW_DIR = r'c:\Users\Lenovo\Documents\- Dolphin NG5\correlation_arb512\arrow_scans'
PARQUET_OUT = r'vbt_cache_ng5'
DATE = '2026-02-25'
# ── 1: Arrow adapter ──────────────────────────────────────────────────────────
print("\n=== ARROW ADAPTER ===")
adapter = ArrowEigenvalueDataAdapter(ARROW_DIR, venue='BINANCE_FUTURES')
files = adapter.load_date_range(datetime(2026,2,25), datetime(2026,2,25))
print(f"Files found: {len(files)}")
assert len(files) > 0, "No files found!"
ok = 0
for f in files[:5]:
scan = adapter.load_scan_file(f)
if scan is None:
print(f" SKIP: {f.name}")
continue
w50 = scan['windows'].get('50', {}).get('tracking_data', {})
w150 = scan['windows'].get('150', {}).get('tracking_data', {})
vd = w50.get('lambda_max_velocity', 0) - w150.get('lambda_max_velocity', 0)
prices = scan.get('pricing_data', {}).get('current_prices', {})
print(f" {f.name}: vel_div={vd:.6f} prices_count={len(prices)}")
ok += 1
assert ok > 0, "No scans could be read!"
print(f"[PASS] Arrow adapter: {ok}/5 scans read")
# ── 2: Arrow -> Parquet ───────────────────────────────────────────────────────
print("\n=== ARROW → PARQUET ===")
conv = ArrowToParquetBatchConverter(ARROW_DIR, PARQUET_OUT)
out = conv.convert_date(DATE)
assert out is not None, "Parquet conversion returned None!"
df = pd.read_parquet(out)
print(f"Parquet rows: {len(df)}")
print(f"Columns: {list(df.columns[:10])}")
assert len(df) > 0, "Empty Parquet!"
assert 'v50_lambda_max_velocity' in df.columns, "Missing velocity column!"
print(df[['timestamp','scan_number','v50_lambda_max_velocity','vel_div']].head(3).to_string())
print(f"[PASS] Arrow→Parquet conversion: {len(df)} rows, {len(df.columns)} columns")
print("\n===== ALL TESTS PASSED =====")