Files
DOLPHIN/prod/tests/test_esof_advisor.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

493 lines
24 KiB
Python
Executable File

"""
EsoF Advisory — unit + integration tests
=========================================
Tests:
1. compute_esof() — deterministic outputs for known datetimes
2. Session classification — boundary conditions
3. Weighted hours — real vs fallback consistency
4. Advisory score — scoring logic, clamping, labels
5. Expectancy tables — internal consistency
6. HZ round-trip (integration, skipped if HZ down)
7. CH write (integration, skipped if CH down)
Run:
source /home/dolphin/siloqy_env/bin/activate
cd /mnt/dolphinng5_predict/prod && pytest tests/test_esof_advisor.py -v
"""
import sys
import json
import math
import pytest
from datetime import datetime, timezone, timedelta
from pathlib import Path
from unittest.mock import patch
# ── Path setup ────────────────────────────────────────────────────────────────
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Observability"))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "external_factors"))
from esof_advisor import (
compute_esof,
get_session,
get_advisory,
SESSION_STATS,
DOW_STATS,
LIQ_HOUR_STATS,
SLOT_STATS,
BASELINE_WR,
DOW_NAMES,
_get_weighted_hours,
_WEIGHTED_HOURS_AVAILABLE,
)
# ── Fixtures ──────────────────────────────────────────────────────────────────
KNOWN_TIMES = {
"sun_london": datetime(2026, 4, 19, 10, 0, tzinfo=timezone.utc), # Sun LDN — best cell
"thu_ovlp": datetime(2026, 4, 16, 15, 0, tzinfo=timezone.utc), # Thu OVLP — worst cell
"sun_ny": datetime(2026, 4, 19, 19, 0, tzinfo=timezone.utc), # Sun NY — near 0% WR
"mon_asia": datetime(2026, 4, 20, 3, 0, tzinfo=timezone.utc), # Mon ASIA — bad
"tue_asia": datetime(2026, 4, 21, 3, 0, tzinfo=timezone.utc), # Tue ASIA — best day
"midday_win": datetime(2026, 4, 15, 11, 30, tzinfo=timezone.utc), # 11:30 — 87.5% WR slot
}
# ══════════════════════════════════════════════════════════════════════════════
# 1. compute_esof() — output schema
# ══════════════════════════════════════════════════════════════════════════════
class TestComputeEsofSchema:
REQUIRED_KEYS = [
"ts", "_ts", "dow", "dow_name", "hour_utc", "slot_15m", "session",
"pop_weighted_hour", "liq_weighted_hour", "liq_bucket_3h",
"moon_illumination", "moon_phase", "mercury_retrograde",
"market_cycle_pos", "fib_strength",
"liq_wr_pct", "liq_net_pnl",
"slot_wr_pct", "slot_net_pnl",
"session_wr_pct", "session_net_pnl",
"dow_wr_pct", "dow_net_pnl",
"advisory_score", "advisory_label",
]
def test_all_keys_present(self):
d = compute_esof(KNOWN_TIMES["sun_london"])
for key in self.REQUIRED_KEYS:
assert key in d, f"Missing key: {key}"
def test_ts_matches_input(self):
t = KNOWN_TIMES["sun_london"]
d = compute_esof(t)
assert d["hour_utc"] == 10
assert d["dow"] == 6 # Sunday
assert d["dow_name"] == "Sun"
def test_slot_15m_format(self):
# At 11:37 UTC → slot should be 11:30
t = datetime(2026, 4, 15, 11, 37, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["slot_15m"] == "11:30"
def test_slot_15m_boundaries(self):
cases = [
(0, 0, "0:00"), (0, 14, "0:00"), (0, 15, "0:15"),
(0, 29, "0:15"), (0, 30, "0:30"), (0, 44, "0:30"),
(0, 45, "0:45"), (0, 59, "0:45"),
(23, 59, "23:45"),
]
for h, m, expected in cases:
t = datetime(2026, 4, 15, h, m, tzinfo=timezone.utc)
assert compute_esof(t)["slot_15m"] == expected, f"{h}:{m} → expected {expected}"
def test_advisory_score_clamped(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
sc = d["advisory_score"]
assert -1.0 <= sc <= 1.0, f"{name}: advisory_score {sc} out of [-1,1]"
def test_advisory_label_valid(self):
valid = {"FAVORABLE", "MILD_POSITIVE", "NEUTRAL", "MILD_NEGATIVE", "UNFAVORABLE"}
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
assert d["advisory_label"] in valid, f"{name}: bad label {d['advisory_label']}"
# ══════════════════════════════════════════════════════════════════════════════
# 2. Session classification
# ══════════════════════════════════════════════════════════════════════════════
class TestSessionClassification:
def test_all_sessions_reachable(self):
sessions = set()
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
for h in range(24):
sessions.add(get_session(h))
assert sessions == {
"ASIA_PACIFIC", "LONDON_MORNING", "LN_NY_OVERLAP",
"NY_AFTERNOON", "LOW_LIQUIDITY"
}
@pytest.mark.parametrize("hour,expected", [
(0, "ASIA_PACIFIC"),
(7, "ASIA_PACIFIC"),
(7.99, "ASIA_PACIFIC"),
(8, "LONDON_MORNING"),
(12, "LONDON_MORNING"),
(12.99, "LONDON_MORNING"),
(13, "LN_NY_OVERLAP"),
(16.99, "LN_NY_OVERLAP"),
(17, "NY_AFTERNOON"),
(20.99, "NY_AFTERNOON"),
(21, "LOW_LIQUIDITY"),
(23.99, "LOW_LIQUIDITY"),
])
def test_session_boundaries(self, hour, expected):
assert get_session(hour) == expected
def test_known_times_sessions(self):
assert compute_esof(KNOWN_TIMES["sun_london"])["session"] == "LONDON_MORNING"
assert compute_esof(KNOWN_TIMES["thu_ovlp"])["session"] == "LN_NY_OVERLAP"
assert compute_esof(KNOWN_TIMES["sun_ny"])["session"] == "NY_AFTERNOON"
assert compute_esof(KNOWN_TIMES["mon_asia"])["session"] == "ASIA_PACIFIC"
def test_session_stats_coverage(self):
"""Every reachable session must have an expectancy entry."""
for h in range(24):
sess = get_session(h)
assert sess in SESSION_STATS, f"Session {sess} missing from SESSION_STATS"
# ══════════════════════════════════════════════════════════════════════════════
# 3. Weighted hours
# ══════════════════════════════════════════════════════════════════════════════
class TestWeightedHours:
def test_pop_hour_range(self):
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
for h in range(24):
t = base + timedelta(hours=h)
ph, lh = _get_weighted_hours(t)
assert 0 <= ph < 24, f"pop_hour {ph} at {h}h out of range"
assert 0 <= lh < 24, f"liq_hour {lh} at {h}h out of range"
def test_liq_hour_monotone_utc(self):
"""liq_hour increases monotonically with UTC (within the same calendar day)."""
base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
prev_lh = None
for h in range(23):
t = base + timedelta(hours=h)
_, lh = _get_weighted_hours(t)
if prev_lh is not None:
assert lh > prev_lh, f"liq_hour not monotone at {h}h: {lh} <= {prev_lh}"
prev_lh = lh
def test_fallback_consistency(self):
"""Fallback approximation should be within ±1h of real computation."""
if not _WEIGHTED_HOURS_AVAILABLE:
pytest.skip("MarketIndicators not available")
t = datetime(2026, 4, 15, 12, 0, tzinfo=timezone.utc)
real_ph, real_lh = _get_weighted_hours(t)
# Approximation offsets
h = 12.0
approx_ph = (h + 4.21) % 24
approx_lh = (h + 0.98) % 24
assert abs(real_ph - approx_ph) < 1.0, f"pop_hour fallback error: {real_ph} vs {approx_ph}"
assert abs(real_lh - approx_lh) < 1.0, f"liq_hour fallback error: {real_lh} vs {approx_lh}"
def test_liq_bucket_aligns(self):
"""liq_bucket_3h must match floor(liq_weighted_hour / 3) * 3."""
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
expected_bkt = int(d["liq_weighted_hour"] // 3) * 3
assert d["liq_bucket_3h"] == expected_bkt, (
f"{name}: liq_bucket {d['liq_bucket_3h']} != expected {expected_bkt}"
)
def test_liq_bucket_in_stats(self):
"""Every computed liq_bucket_3h must have a stats entry (0-21 in steps of 3)."""
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
bkt = d["liq_bucket_3h"]
assert bkt in LIQ_HOUR_STATS, f"{name}: liq_bucket {bkt} not in LIQ_HOUR_STATS"
# ══════════════════════════════════════════════════════════════════════════════
# 4. Advisory scoring logic
# ══════════════════════════════════════════════════════════════════════════════
class TestAdvisoryScoring:
def test_best_known_cell_is_positive(self):
"""Sun 10h UTC (LONDON_MORNING, best DoW cell) → positive score."""
d = compute_esof(KNOWN_TIMES["sun_london"])
assert d["advisory_score"] > 0, f"Sun LDN score={d['advisory_score']} expected positive"
def test_worst_known_cell_is_worse_than_best(self):
"""Thu OVLP score must be worse than Sun LDN score (best known cell)."""
d_best = compute_esof(KNOWN_TIMES["sun_london"])
d_worst = compute_esof(KNOWN_TIMES["thu_ovlp"])
assert d_best["advisory_score"] > d_worst["advisory_score"], (
f"Sun LDN {d_best['advisory_score']} not > Thu OVLP {d_worst['advisory_score']}"
)
def test_mon_worse_than_tue(self):
"""Monday score < Tuesday score (same time) — Mon WR 27% vs Tue WR 54%."""
t_mon = datetime(2026, 4, 20, 10, 0, tzinfo=timezone.utc) # Monday
t_tue = datetime(2026, 4, 21, 10, 0, tzinfo=timezone.utc) # Tuesday
d_mon = compute_esof(t_mon)
d_tue = compute_esof(t_tue)
assert d_mon["advisory_score"] < d_tue["advisory_score"], (
f"Mon {d_mon['advisory_score']} not < Tue {d_tue['advisory_score']}"
)
def test_sun_ny_negative(self):
"""Sun NY_AFTERNOON (6% WR) → negative or at most mild positive (DoW boost limited)."""
d = compute_esof(KNOWN_TIMES["sun_ny"])
# Session/liq drag should keep it from being FAVORABLE
assert d["advisory_label"] not in {"FAVORABLE"}, \
f"Sun NY labeled {d['advisory_label']} — expected not FAVORABLE"
def test_score_monotone_session_ordering(self):
"""LONDON_MORNING score > NY_AFTERNOON score for same DoW."""
base = datetime(2026, 4, 15, tzinfo=timezone.utc) # Tuesday
d_ldn = compute_esof(base.replace(hour=10))
d_ny = compute_esof(base.replace(hour=19))
assert d_ldn["advisory_score"] > d_ny["advisory_score"], \
f"LDN {d_ldn['advisory_score']} not > NY {d_ny['advisory_score']}"
def test_mercury_retrograde_penalty(self):
"""Mercury retrograde should reduce score by ~0.05."""
t = datetime(2026, 3, 15, 10, 0, tzinfo=timezone.utc) # known retro period
d = compute_esof(t)
assert d["mercury_retrograde"] is True, "Expected mercury retrograde on 2026-03-15"
# Score would be ~0.05 lower than without retrograde
assert d["advisory_score"] <= 0.95, "Score should not be at ceiling during retrograde"
def test_label_thresholds(self):
"""Labels must correspond to score ranges."""
cases = [
(0.30, "FAVORABLE"),
(0.10, "MILD_POSITIVE"),
(0.00, "NEUTRAL"),
(-0.10, "MILD_NEGATIVE"),
(-0.30, "UNFAVORABLE"),
]
for score, expected_label in cases:
# Patch compute to return known score
with patch("esof_advisor.compute_esof") as mock:
mock.return_value = {
"advisory_score": score,
"advisory_label": (
"FAVORABLE" if score > 0.25 else
"MILD_POSITIVE"if score > 0.05 else
"NEUTRAL" if score > -0.05 else
"MILD_NEGATIVE"if score > -0.25 else
"UNFAVORABLE"
),
}
result = mock()
assert result["advisory_label"] == expected_label, \
f"score={score}: got {result['advisory_label']} expected {expected_label}"
# ══════════════════════════════════════════════════════════════════════════════
# 5. Expectancy table internal consistency
# ══════════════════════════════════════════════════════════════════════════════
class TestExpectancyTables:
def test_session_stats_wr_range(self):
for sess, (n, wr, net, avg) in SESSION_STATS.items():
assert 0 <= wr <= 100, f"{sess}: WR {wr} out of range"
assert n > 0, f"{sess}: n={n}"
def test_dow_stats_completeness(self):
assert set(DOW_STATS.keys()) == set(range(7)), "DOW_STATS must cover Mon-Sun (0-6)"
def test_dow_names_alignment(self):
assert len(DOW_NAMES) == 7
assert DOW_NAMES[0] == "Mon" and DOW_NAMES[6] == "Sun"
def test_liq_hour_stats_completeness(self):
expected_buckets = {0, 3, 6, 9, 12, 15, 18, 21}
assert set(LIQ_HOUR_STATS.keys()) == expected_buckets
def test_liq_hour_best_bucket_is_12(self):
"""liq 12-15h should have highest WR and most positive net PnL."""
best_wr_bkt = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
best_net_bkt = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
assert best_wr_bkt == 12, f"Expected liq 12h best WR, got {best_wr_bkt}"
assert best_net_bkt == 12, f"Expected liq 12h best net, got {best_net_bkt}"
def test_liq_hour_worst_bucket_is_18(self):
"""liq 18-21h (NY afternoon) should have lowest WR and worst net PnL."""
worst_wr_bkt = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
worst_net_bkt = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
assert worst_wr_bkt == 18, f"Expected liq 18h worst WR, got {worst_wr_bkt}"
assert worst_net_bkt == 18, f"Expected liq 18h worst net, got {worst_net_bkt}"
def test_baseline_wr_is_reasonable(self):
# Overall WR from 637 trades was 278/637 = 43.6%
assert 42.0 < BASELINE_WR < 45.0, f"BASELINE_WR {BASELINE_WR} looks wrong"
def test_slot_stats_wr_range(self):
for slot, data in SLOT_STATS.items():
n, wr = data[0], data[1]
assert 0 <= wr <= 100, f"slot {slot}: WR {wr} out of range"
assert n >= 3, f"slot {slot}: n={n} below minimum threshold"
def test_moon_illumination_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
illum = d["moon_illumination"]
assert 0.0 <= illum <= 1.0, f"{name}: moon_illumination {illum} out of [0,1]"
def test_fib_strength_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
fs = d["fib_strength"]
assert 0.0 <= fs <= 1.0, f"{name}: fib_strength {fs} out of [0,1]"
def test_market_cycle_pos_range(self):
for name, t in KNOWN_TIMES.items():
d = compute_esof(t)
cp = d["market_cycle_pos"]
assert 0.0 <= cp < 1.0, f"{name}: market_cycle_pos {cp} out of [0,1)"
# ══════════════════════════════════════════════════════════════════════════════
# 6. Moon approximation correctness
# ══════════════════════════════════════════════════════════════════════════════
class TestMoonApproximation:
# Known moon phases (approximate)
KNOWN_MOONS = [
(datetime(2026, 4, 7, tzinfo=timezone.utc), "NEW_MOON", 0.03),
(datetime(2026, 4, 20, tzinfo=timezone.utc), "FULL_MOON", 0.97),
(datetime(2026, 4, 13, tzinfo=timezone.utc), "WAXING", 0.45), # first quarter ≈
(datetime(2026, 4, 26, tzinfo=timezone.utc), "WANING", 0.50), # last quarter ≈
]
def test_new_moon_illumination_low(self):
# 28th new moon after ref Jan 11 2024: ~Apr 17 2026 (computed from synodic cycle)
# 28 * 29.53059 = 826.856 days → Jan 11 2024 + 826d = Apr 17 2026
t = datetime(2026, 4, 17, 12, 0, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["moon_illumination"] < 0.10, \
f"Expected near-new-moon illumination ~0, got {d['moon_illumination']}"
def test_full_moon_illumination_high(self):
# Halfway between 27th (Mar 18) and 28th (Apr 17) new moon = ~Apr 2 2026
t = datetime(2026, 4, 2, 12, 0, tzinfo=timezone.utc)
d = compute_esof(t)
assert d["moon_illumination"] > 0.90, \
f"Expected near-full-moon illumination, got {d['moon_illumination']}"
def test_mercury_retrograde_period(self):
"""2026-03-07 to 2026-03-30 is Mercury retrograde."""
in_retro = datetime(2026, 3, 15, 12, 0, tzinfo=timezone.utc)
post_retro = datetime(2026, 4, 5, 12, 0, tzinfo=timezone.utc)
assert compute_esof(in_retro)["mercury_retrograde"] is True
assert compute_esof(post_retro)["mercury_retrograde"] is False
# ══════════════════════════════════════════════════════════════════════════════
# 7. get_advisory() public API
# ══════════════════════════════════════════════════════════════════════════════
class TestPublicAPI:
def test_get_advisory_no_args(self):
"""get_advisory() with no args should use current time."""
d = get_advisory()
assert "advisory_score" in d
assert "advisory_label" in d
def test_get_advisory_with_time(self):
d = get_advisory(KNOWN_TIMES["sun_london"])
assert d["dow_name"] == "Sun"
assert d["session"] == "LONDON_MORNING"
def test_deterministic(self):
"""Same input → same output."""
t = KNOWN_TIMES["midday_win"]
d1 = compute_esof(t)
d2 = compute_esof(t)
assert d1["advisory_score"] == d2["advisory_score"]
assert d1["advisory_label"] == d2["advisory_label"]
assert d1["session"] == d2["session"]
assert d1["liq_weighted_hour"] == d2["liq_weighted_hour"]
# ══════════════════════════════════════════════════════════════════════════════
# 8. Integration — HZ round-trip (skipped if HZ unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestHZIntegration:
@pytest.fixture(scope="class")
def hz_client(self):
try:
import hazelcast
client = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["localhost:5701"],
connection_timeout=2.0,
)
yield client
client.shutdown()
except Exception:
pytest.skip("Hazelcast not available")
def test_hz_write_and_read(self, hz_client):
from esof_advisor import _hz_write
d = compute_esof(KNOWN_TIMES["sun_london"])
_hz_write(d)
import time; time.sleep(0.3)
raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
assert raw is not None, "esof_advisor_latest not found in HZ after write"
parsed = json.loads(raw)
assert parsed["advisory_label"] == d["advisory_label"]
assert parsed["session"] == "LONDON_MORNING"
def test_hz_value_is_json(self, hz_client):
raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
if raw is None:
pytest.skip("No esof_advisor_latest in HZ yet")
parsed = json.loads(raw)
assert "advisory_score" in parsed
# ══════════════════════════════════════════════════════════════════════════════
# 9. Integration — CH write (skipped if CH unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestCHIntegration:
@pytest.fixture(scope="class")
def ch_available(self):
import urllib.request
try:
req = urllib.request.Request("http://localhost:8123/ping")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
urllib.request.urlopen(req, timeout=2)
except Exception:
pytest.skip("ClickHouse not available")
def test_ch_write_no_exception(self, ch_available):
from esof_advisor import _ch_write
d = compute_esof(KNOWN_TIMES["sun_london"])
# Should complete without raising
_ch_write(d)
def test_ch_table_has_data(self, ch_available):
import urllib.request
def ch(q):
url = "http://localhost:8123/?database=dolphin"
req = urllib.request.Request(url, data=q.encode(), method="POST")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
with urllib.request.urlopen(req, timeout=5) as r:
return r.read().decode().strip()
count = int(ch("SELECT count() FROM esof_advisory"))
assert count >= 0 # table exists (may be 0 if never written via daemon)
def test_ch_schema_correct(self, ch_available):
import urllib.request
def ch(q):
url = "http://localhost:8123/?database=dolphin"
req = urllib.request.Request(url, data=q.encode(), method="POST")
req.add_header("X-ClickHouse-User", "dolphin")
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
with urllib.request.urlopen(req, timeout=5) as r:
return r.read().decode().strip()
cols = ch("SELECT name FROM system.columns WHERE table='esof_advisory' AND database='dolphin' FORMAT CSV")
assert "advisory_score" in cols
assert "liq_weighted_hour" in cols
assert "session" in cols