Files
DOLPHIN/prod/tests/test_esof_advisor.py

493 lines
24 KiB
Python
Raw Normal View History

"""
EsoF Advisory unit + integration tests
=========================================
Tests:
1. compute_esof() deterministic outputs for known datetimes
2. Session classification boundary conditions
3. Weighted hours real vs fallback consistency
4. Advisory score scoring logic, clamping, labels
5. Expectancy tables internal consistency
6. HZ round-trip (integration, skipped if HZ down)
7. CH write (integration, skipped if CH down)
Run:
source /home/dolphin/siloqy_env/bin/activate
cd /mnt/dolphinng5_predict/prod && pytest tests/test_esof_advisor.py -v
"""
import sys
import json
import math
import pytest
from datetime import datetime, timezone, timedelta
from pathlib import Path
from unittest.mock import patch
# ── Path setup ────────────────────────────────────────────────────────────────
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Observability"))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "external_factors"))
from esof_advisor import (
compute_esof,
get_session,
get_advisory,
SESSION_STATS,
DOW_STATS,
LIQ_HOUR_STATS,
SLOT_STATS,
BASELINE_WR,
DOW_NAMES,
_get_weighted_hours,
_WEIGHTED_HOURS_AVAILABLE,
)
# ── Fixtures ──────────────────────────────────────────────────────────────────
KNOWN_TIMES = {
"sun_london": datetime(2026, 4, 19, 10, 0, tzinfo=timezone.utc), # Sun LDN — best cell
"thu_ovlp": datetime(2026, 4, 16, 15, 0, tzinfo=timezone.utc), # Thu OVLP — worst cell
"sun_ny": datetime(2026, 4, 19, 19, 0, tzinfo=timezone.utc), # Sun NY — near 0% WR
"mon_asia": datetime(2026, 4, 20, 3, 0, tzinfo=timezone.utc), # Mon ASIA — bad
"tue_asia": datetime(2026, 4, 21, 3, 0, tzinfo=timezone.utc), # Tue ASIA — best day
"midday_win": datetime(2026, 4, 15, 11, 30, tzinfo=timezone.utc), # 11:30 — 87.5% WR slot
}
# ══════════════════════════════════════════════════════════════════════════════
# 1. compute_esof() — output schema
# ══════════════════════════════════════════════════════════════════════════════
class TestComputeEsofSchema:
REQUIRED_KEYS = [
"ts", "_ts", "dow", "dow_name", "hour_utc", "slot_15m", "session",
"pop_weighted_hour", "liq_weighted_hour", "liq_bucket_3h",
"moon_illumination", "moon_phase", "mercury_retrograde",
"market_cycle_pos", "fib_strength",
"liq_wr_pct", "liq_net_pnl",
"slot_wr_pct", "slot_net_pnl",
"session_wr_pct", "session_net_pnl",
"dow_wr_pct", "dow_net_pnl",
"advisory_score", "advisory_label",
]
def test_all_keys_present(self):
d = compute_esof(KNOWN_TIMES["sun_london"])
for key in self.REQUIRED_KEYS:
assert key in d, f"Missing key: {key}"
def test_ts_matches_input(self):
t = KNOWN_TIMES["sun_london"]
d = compute_esof(t)
assert d["hour_utc"] == 10
assert d["dow"] == 6 # Sunday
assert d["dow_name"] == "Sun"
def test_slot_15m_format(self):
# At 11:37 UTC → slot should be 11:30
t = datetime(2026, 4, 15, 11, 37, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["slot_15m"] == "11:30"
def test_slot_15m_boundaries(self):
cases = [
(0, 0, "0:00"), (0, 14, "0:00"), (0, 15, "0:15"),
(0, 29, "0:15"), (0, 30, "0:30"), (0, 44, "0:30"),
(0, 45, "0:45"), (0, 59, "0:45"),
(23, 59, "23:45"),
]
for h, m, expected in cases:
t = datetime(2026, 4, 15, h, m, tzinfo=timezone.utc)
assert compute_esof(t)["slot_15m"] == expected, f"{h}:{m} → expected {expected}"
def test_advisory_score_clamped(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
sc = d["advisory_score"]
assert -1.0 <= sc <= 1.0, f"{name}: advisory_score {sc} out of [-1,1]"
def test_advisory_label_valid(self):
valid = {"FAVORABLE", "MILD_POSITIVE", "NEUTRAL", "MILD_NEGATIVE", "UNFAVORABLE"}
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
assert d["advisory_label"] in valid, f"{name}: bad label {d['advisory_label']}"
# ══════════════════════════════════════════════════════════════════════════════
# 2. Session classification
# ══════════════════════════════════════════════════════════════════════════════
class TestSessionClassification:
def test_all_sessions_reachable(self):
sessions = set()
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
for h in range(24):
sessions.add(get_session(h))
assert sessions == {
"ASIA_PACIFIC", "LONDON_MORNING", "LN_NY_OVERLAP",
"NY_AFTERNOON", "LOW_LIQUIDITY"
}
@pytest.mark.parametrize("hour,expected", [
(0, "ASIA_PACIFIC"),
(7, "ASIA_PACIFIC"),
(7.99, "ASIA_PACIFIC"),
(8, "LONDON_MORNING"),
(12, "LONDON_MORNING"),
(12.99, "LONDON_MORNING"),
(13, "LN_NY_OVERLAP"),
(16.99, "LN_NY_OVERLAP"),
(17, "NY_AFTERNOON"),
(20.99, "NY_AFTERNOON"),
(21, "LOW_LIQUIDITY"),
(23.99, "LOW_LIQUIDITY"),
])
def test_session_boundaries(self, hour, expected):
assert get_session(hour) == expected
def test_known_times_sessions(self):
assert compute_esof(KNOWN_TIMES["sun_london"])["session"] == "LONDON_MORNING"
assert compute_esof(KNOWN_TIMES["thu_ovlp"])["session"] == "LN_NY_OVERLAP"
assert compute_esof(KNOWN_TIMES["sun_ny"])["session"] == "NY_AFTERNOON"
assert compute_esof(KNOWN_TIMES["mon_asia"])["session"] == "ASIA_PACIFIC"
def test_session_stats_coverage(self):
"""Every reachable session must have an expectancy entry."""
for h in range(24):
sess = get_session(h)
assert sess in SESSION_STATS, f"Session {sess} missing from SESSION_STATS"
# ══════════════════════════════════════════════════════════════════════════════
# 3. Weighted hours
# ══════════════════════════════════════════════════════════════════════════════
class TestWeightedHours:
def test_pop_hour_range(self):
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
for h in range(24):
t = base + timedelta(hours=h)
ph, lh = _get_weighted_hours(t)
assert 0 <= ph < 24, f"pop_hour {ph} at {h}h out of range"
assert 0 <= lh < 24, f"liq_hour {lh} at {h}h out of range"
def test_liq_hour_monotone_utc(self):
"""liq_hour increases monotonically with UTC (within the same calendar day)."""
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
prev_lh = None
for h in range(23):
t = base + timedelta(hours=h)
_, lh = _get_weighted_hours(t)
if prev_lh is not None:
assert lh > prev_lh, f"liq_hour not monotone at {h}h: {lh} <= {prev_lh}"
prev_lh = lh
def test_fallback_consistency(self):
"""Fallback approximation should be within ±1h of real computation."""
if not _WEIGHTED_HOURS_AVAILABLE:
pytest.skip("MarketIndicators not available")
t = datetime(2026, 4, 15, 12, 0, tzinfo=timezone.utc)
real_ph, real_lh = _get_weighted_hours(t)
# Approximation offsets
h = 12.0
approx_ph = (h + 4.21) % 24
approx_lh = (h + 0.98) % 24
assert abs(real_ph - approx_ph) < 1.0, f"pop_hour fallback error: {real_ph} vs {approx_ph}"
assert abs(real_lh - approx_lh) < 1.0, f"liq_hour fallback error: {real_lh} vs {approx_lh}"
def test_liq_bucket_aligns(self):
"""liq_bucket_3h must match floor(liq_weighted_hour / 3) * 3."""
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
expected_bkt = int(d["liq_weighted_hour"] // 3) * 3
assert d["liq_bucket_3h"] == expected_bkt, (
f"{name}: liq_bucket {d['liq_bucket_3h']} != expected {expected_bkt}"
)
def test_liq_bucket_in_stats(self):
"""Every computed liq_bucket_3h must have a stats entry (0-21 in steps of 3)."""
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
bkt = d["liq_bucket_3h"]
assert bkt in LIQ_HOUR_STATS, f"{name}: liq_bucket {bkt} not in LIQ_HOUR_STATS"
# ══════════════════════════════════════════════════════════════════════════════
# 4. Advisory scoring logic
# ══════════════════════════════════════════════════════════════════════════════
class TestAdvisoryScoring:
def test_best_known_cell_is_positive(self):
"""Sun 10h UTC (LONDON_MORNING, best DoW cell) → positive score."""
d = compute_esof(KNOWN_TIMES["sun_london"])
assert d["advisory_score"] > 0, f"Sun LDN score={d['advisory_score']} expected positive"
def test_worst_known_cell_is_worse_than_best(self):
"""Thu OVLP score must be worse than Sun LDN score (best known cell)."""
d_best = compute_esof(KNOWN_TIMES["sun_london"])
d_worst = compute_esof(KNOWN_TIMES["thu_ovlp"])
assert d_best["advisory_score"] > d_worst["advisory_score"], (
f"Sun LDN {d_best['advisory_score']} not > Thu OVLP {d_worst['advisory_score']}"
)
def test_mon_worse_than_tue(self):
"""Monday score < Tuesday score (same time) — Mon WR 27% vs Tue WR 54%."""
t_mon = datetime(2026, 4, 20, 10, 0, tzinfo=timezone.utc) # Monday
t_tue = datetime(2026, 4, 21, 10, 0, tzinfo=timezone.utc) # Tuesday
d_mon = compute_esof(t_mon)
d_tue = compute_esof(t_tue)
assert d_mon["advisory_score"] < d_tue["advisory_score"], (
f"Mon {d_mon['advisory_score']} not < Tue {d_tue['advisory_score']}"
)
def test_sun_ny_negative(self):
"""Sun NY_AFTERNOON (6% WR) → negative or at most mild positive (DoW boost limited)."""
d = compute_esof(KNOWN_TIMES["sun_ny"])
# Session/liq drag should keep it from being FAVORABLE
assert d["advisory_label"] not in {"FAVORABLE"}, \
f"Sun NY labeled {d['advisory_label']} — expected not FAVORABLE"
def test_score_monotone_session_ordering(self):
"""LONDON_MORNING score > NY_AFTERNOON score for same DoW."""
base = datetime(2026, 4, 15, tzinfo=timezone.utc) # Tuesday
d_ldn = compute_esof(base.replace(hour=10))
d_ny = compute_esof(base.replace(hour=19))
assert d_ldn["advisory_score"] > d_ny["advisory_score"], \
f"LDN {d_ldn['advisory_score']} not > NY {d_ny['advisory_score']}"
def test_mercury_retrograde_penalty(self):
"""Mercury retrograde should reduce score by ~0.05."""
t = datetime(2026, 3, 15, 10, 0, tzinfo=timezone.utc) # known retro period
d = compute_esof(t)
assert d["mercury_retrograde"] is True, "Expected mercury retrograde on 2026-03-15"
# Score would be ~0.05 lower than without retrograde
assert d["advisory_score"] <= 0.95, "Score should not be at ceiling during retrograde"
def test_label_thresholds(self):
"""Labels must correspond to score ranges."""
cases = [
(0.30, "FAVORABLE"),
(0.10, "MILD_POSITIVE"),
(0.00, "NEUTRAL"),
(-0.10, "MILD_NEGATIVE"),
(-0.30, "UNFAVORABLE"),
]
for score, expected_label in cases:
# Patch compute to return known score
with patch("esof_advisor.compute_esof") as mock:
mock.return_value = {
"advisory_score": score,
"advisory_label": (
"FAVORABLE" if score > 0.25 else
"MILD_POSITIVE"if score > 0.05 else
"NEUTRAL" if score > -0.05 else
"MILD_NEGATIVE"if score > -0.25 else
"UNFAVORABLE"
),
}
result = mock()
assert result["advisory_label"] == expected_label, \
f"score={score}: got {result['advisory_label']} expected {expected_label}"
# ══════════════════════════════════════════════════════════════════════════════
# 5. Expectancy table internal consistency
# ══════════════════════════════════════════════════════════════════════════════
class TestExpectancyTables:
def test_session_stats_wr_range(self):
for sess, (n, wr, net, avg) in SESSION_STATS.items():
assert 0 <= wr <= 100, f"{sess}: WR {wr} out of range"
assert n > 0, f"{sess}: n={n}"
def test_dow_stats_completeness(self):
assert set(DOW_STATS.keys()) == set(range(7)), "DOW_STATS must cover Mon-Sun (0-6)"
def test_dow_names_alignment(self):
assert len(DOW_NAMES) == 7
assert DOW_NAMES[0] == "Mon" and DOW_NAMES[6] == "Sun"
def test_liq_hour_stats_completeness(self):
expected_buckets = {0, 3, 6, 9, 12, 15, 18, 21}
assert set(LIQ_HOUR_STATS.keys()) == expected_buckets
def test_liq_hour_best_bucket_is_12(self):
"""liq 12-15h should have highest WR and most positive net PnL."""
best_wr_bkt = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
best_net_bkt = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
assert best_wr_bkt == 12, f"Expected liq 12h best WR, got {best_wr_bkt}"
assert best_net_bkt == 12, f"Expected liq 12h best net, got {best_net_bkt}"
def test_liq_hour_worst_bucket_is_18(self):
"""liq 18-21h (NY afternoon) should have lowest WR and worst net PnL."""
worst_wr_bkt = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
worst_net_bkt = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
assert worst_wr_bkt == 18, f"Expected liq 18h worst WR, got {worst_wr_bkt}"
assert worst_net_bkt == 18, f"Expected liq 18h worst net, got {worst_net_bkt}"
def test_baseline_wr_is_reasonable(self):
# Overall WR from 637 trades was 278/637 = 43.6%
assert 42.0 < BASELINE_WR < 45.0, f"BASELINE_WR {BASELINE_WR} looks wrong"
def test_slot_stats_wr_range(self):
for slot, data in SLOT_STATS.items():
n, wr = data[0], data[1]
assert 0 <= wr <= 100, f"slot {slot}: WR {wr} out of range"
assert n >= 3, f"slot {slot}: n={n} below minimum threshold"
def test_moon_illumination_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
illum = d["moon_illumination"]
assert 0.0 <= illum <= 1.0, f"{name}: moon_illumination {illum} out of [0,1]"
def test_fib_strength_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
fs = d["fib_strength"]
assert 0.0 <= fs <= 1.0, f"{name}: fib_strength {fs} out of [0,1]"
def test_market_cycle_pos_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
cp = d["market_cycle_pos"]
assert 0.0 <= cp < 1.0, f"{name}: market_cycle_pos {cp} out of [0,1)"
# ══════════════════════════════════════════════════════════════════════════════
# 6. Moon approximation correctness
# ══════════════════════════════════════════════════════════════════════════════
class TestMoonApproximation:
# Known moon phases (approximate)
KNOWN_MOONS = [
(datetime(2026, 4, 7, tzinfo=timezone.utc), "NEW_MOON", 0.03),
(datetime(2026, 4, 20, tzinfo=timezone.utc), "FULL_MOON", 0.97),
(datetime(2026, 4, 13, tzinfo=timezone.utc), "WAXING", 0.45), # first quarter ≈
(datetime(2026, 4, 26, tzinfo=timezone.utc), "WANING", 0.50), # last quarter ≈
]
def test_new_moon_illumination_low(self):
# 28th new moon after ref Jan 11 2024: ~Apr 17 2026 (computed from synodic cycle)
# 28 * 29.53059 = 826.856 days → Jan 11 2024 + 826d = Apr 17 2026
t = datetime(2026, 4, 17, 12, 0, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["moon_illumination"] < 0.10, \
f"Expected near-new-moon illumination ~0, got {d['moon_illumination']}"
def test_full_moon_illumination_high(self):
# Halfway between 27th (Mar 18) and 28th (Apr 17) new moon = ~Apr 2 2026
t = datetime(2026, 4, 2, 12, 0, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["moon_illumination"] > 0.90, \
f"Expected near-full-moon illumination, got {d['moon_illumination']}"
def test_mercury_retrograde_period(self):
"""2026-03-07 to 2026-03-30 is Mercury retrograde."""
in_retro = datetime(2026, 3, 15, 12, 0, tzinfo=timezone.utc)
post_retro = datetime(2026, 4, 5, 12, 0, tzinfo=timezone.utc)
assert compute_esof(in_retro)["mercury_retrograde"] is True
assert compute_esof(post_retro)["mercury_retrograde"] is False
# ══════════════════════════════════════════════════════════════════════════════
# 7. get_advisory() public API
# ══════════════════════════════════════════════════════════════════════════════
class TestPublicAPI:
def test_get_advisory_no_args(self):
"""get_advisory() with no args should use current time."""
d = get_advisory()
assert "advisory_score" in d
assert "advisory_label" in d
def test_get_advisory_with_time(self):
d = get_advisory(KNOWN_TIMES["sun_london"])
assert d["dow_name"] == "Sun"
assert d["session"] == "LONDON_MORNING"
def test_deterministic(self):
"""Same input → same output."""
t = KNOWN_TIMES["midday_win"]
d1 = compute_esof(t)
d2 = compute_esof(t)
assert d1["advisory_score"] == d2["advisory_score"]
assert d1["advisory_label"] == d2["advisory_label"]
assert d1["session"] == d2["session"]
assert d1["liq_weighted_hour"] == d2["liq_weighted_hour"]
# ══════════════════════════════════════════════════════════════════════════════
# 8. Integration — HZ round-trip (skipped if HZ unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestHZIntegration:
@pytest.fixture(scope="class")
def hz_client(self):
try:
import hazelcast
client = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["localhost:5701"],
connection_timeout=2.0,
)
yield client
client.shutdown()
except Exception:
pytest.skip("Hazelcast not available")
def test_hz_write_and_read(self, hz_client):
from esof_advisor import _hz_write
d = compute_esof(KNOWN_TIMES["sun_london"])
_hz_write(d)
import time; time.sleep(0.3)
raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
assert raw is not None, "esof_advisor_latest not found in HZ after write"
parsed = json.loads(raw)
assert parsed["advisory_label"] == d["advisory_label"]
assert parsed["session"] == "LONDON_MORNING"
def test_hz_value_is_json(self, hz_client):
raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
if raw is None:
pytest.skip("No esof_advisor_latest in HZ yet")
parsed = json.loads(raw)
assert "advisory_score" in parsed
# ══════════════════════════════════════════════════════════════════════════════
# 9. Integration — CH write (skipped if CH unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestCHIntegration:
@pytest.fixture(scope="class")
def ch_available(self):
import urllib.request
try:
req = urllib.request.Request("http://localhost:8123/ping")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
urllib.request.urlopen(req, timeout=2)
except Exception:
pytest.skip("ClickHouse not available")
def test_ch_write_no_exception(self, ch_available):
from esof_advisor import _ch_write
d = compute_esof(KNOWN_TIMES["sun_london"])
# Should complete without raising
_ch_write(d)
def test_ch_table_has_data(self, ch_available):
import urllib.request
def ch(q):
url = "http://localhost:8123/?database=dolphin"
req = urllib.request.Request(url, data=q.encode(), method="POST")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
with urllib.request.urlopen(req, timeout=5) as r:
return r.read().decode().strip()
count = int(ch("SELECT count() FROM esof_advisory"))
assert count >= 0 # table exists (may be 0 if never written via daemon)
def test_ch_schema_correct(self, ch_available):
import urllib.request
def ch(q):
url = "http://localhost:8123/?database=dolphin"
req = urllib.request.Request(url, data=q.encode(), method="POST")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
with urllib.request.urlopen(req, timeout=5) as r:
return r.read().decode().strip()
cols = ch("SELECT name FROM system.columns WHERE table='esof_advisory' AND database='dolphin' FORMAT CSV")
assert "advisory_score" in cols
assert "liq_weighted_hour" in cols
assert "session" in cols