DOLPHIN/prod/tests/test_esof_advisor.py

"""
EsoF Advisory — unit + integration tests
=========================================
Tests:
  1. compute_esof() — deterministic outputs for known datetimes
  2. Session classification — boundary conditions
  3. Weighted hours — real vs fallback consistency
  4. Advisory score — scoring logic, clamping, labels
  5. Expectancy tables — internal consistency
  6. HZ round-trip (integration, skipped if HZ down)
  7. CH write (integration, skipped if CH down)

Run:
  source /home/dolphin/siloqy_env/bin/activate
  cd /mnt/dolphinng5_predict/prod && pytest tests/test_esof_advisor.py -v
"""
import sys
import json
import math
import pytest
from datetime import datetime, timezone, timedelta
from pathlib import Path
from unittest.mock import patch

# ── Path setup ────────────────────────────────────────────────────────────────
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Observability"))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "external_factors"))

from esof_advisor import (
    compute_esof,
    get_session,
    get_advisory,
    SESSION_STATS,
    DOW_STATS,
    LIQ_HOUR_STATS,
    SLOT_STATS,
    BASELINE_WR,
    DOW_NAMES,
    _get_weighted_hours,
    _WEIGHTED_HOURS_AVAILABLE,
)

# ── Fixtures ──────────────────────────────────────────────────────────────────
KNOWN_TIMES = {
    "sun_london":    datetime(2026, 4, 19, 10,  0, tzinfo=timezone.utc),  # Sun LDN — best cell
    "thu_ovlp":      datetime(2026, 4, 16, 15,  0, tzinfo=timezone.utc),  # Thu OVLP — worst cell
    "sun_ny":        datetime(2026, 4, 19, 19,  0, tzinfo=timezone.utc),  # Sun NY — near 0% WR
    "mon_asia":      datetime(2026, 4, 20,  3,  0, tzinfo=timezone.utc),  # Mon ASIA — bad
    "tue_asia":      datetime(2026, 4, 21,  3,  0, tzinfo=timezone.utc),  # Tue ASIA — best day
    "midday_win":    datetime(2026, 4, 15, 11, 30, tzinfo=timezone.utc),  # 11:30 — 87.5% WR slot
}


# ══════════════════════════════════════════════════════════════════════════════
# 1. compute_esof() — output schema
# ══════════════════════════════════════════════════════════════════════════════
class TestComputeEsofSchema:
    REQUIRED_KEYS = [
        "ts", "_ts", "dow", "dow_name", "hour_utc", "slot_15m", "session",
        "pop_weighted_hour", "liq_weighted_hour", "liq_bucket_3h",
        "moon_illumination", "moon_phase", "mercury_retrograde",
        "market_cycle_pos", "fib_strength",
        "liq_wr_pct", "liq_net_pnl",
        "slot_wr_pct", "slot_net_pnl",
        "session_wr_pct", "session_net_pnl",
        "dow_wr_pct", "dow_net_pnl",
        "advisory_score", "advisory_label",
    ]

    def test_all_keys_present(self):
        d = compute_esof(KNOWN_TIMES["sun_london"])
        for key in self.REQUIRED_KEYS:
            assert key in d, f"Missing key: {key}"

    def test_ts_matches_input(self):
        t = KNOWN_TIMES["sun_london"]
        d = compute_esof(t)
        assert d["hour_utc"] == 10
        assert d["dow"] == 6          # Sunday
        assert d["dow_name"] == "Sun"

    def test_slot_15m_format(self):
        # At 11:37 UTC → slot should be 11:30
        t = datetime(2026, 4, 15, 11, 37, tzinfo=timezone.utc)
        d = compute_esof(t)
        assert d["slot_15m"] == "11:30"

    def test_slot_15m_boundaries(self):
        cases = [
            (0,  0,  "0:00"), (0, 14, "0:00"), (0, 15, "0:15"),
            (0, 29, "0:15"), (0, 30, "0:30"), (0, 44, "0:30"),
            (0, 45, "0:45"), (0, 59, "0:45"),
            (23, 59, "23:45"),
        ]
        for h, m, expected in cases:
            t = datetime(2026, 4, 15, h, m, tzinfo=timezone.utc)
            assert compute_esof(t)["slot_15m"] == expected, f"{h}:{m} → expected {expected}"

    def test_advisory_score_clamped(self):
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            sc = d["advisory_score"]
            assert -1.0 <= sc <= 1.0, f"{name}: advisory_score {sc} out of [-1,1]"

    def test_advisory_label_valid(self):
        valid = {"FAVORABLE", "MILD_POSITIVE", "NEUTRAL", "MILD_NEGATIVE", "UNFAVORABLE"}
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            assert d["advisory_label"] in valid, f"{name}: bad label {d['advisory_label']}"


# ══════════════════════════════════════════════════════════════════════════════
# 2. Session classification
# ══════════════════════════════════════════════════════════════════════════════
class TestSessionClassification:
    def test_all_sessions_reachable(self):
        sessions = set()
        base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
        for h in range(24):
            sessions.add(get_session(h))
        assert sessions == {
            "ASIA_PACIFIC", "LONDON_MORNING", "LN_NY_OVERLAP",
            "NY_AFTERNOON", "LOW_LIQUIDITY"
        }

    @pytest.mark.parametrize("hour,expected", [
        (0,  "ASIA_PACIFIC"),
        (7,  "ASIA_PACIFIC"),
        (7.99, "ASIA_PACIFIC"),
        (8,  "LONDON_MORNING"),
        (12, "LONDON_MORNING"),
        (12.99, "LONDON_MORNING"),
        (13, "LN_NY_OVERLAP"),
        (16.99, "LN_NY_OVERLAP"),
        (17, "NY_AFTERNOON"),
        (20.99, "NY_AFTERNOON"),
        (21, "LOW_LIQUIDITY"),
        (23.99, "LOW_LIQUIDITY"),
    ])
    def test_session_boundaries(self, hour, expected):
        assert get_session(hour) == expected

    def test_known_times_sessions(self):
        assert compute_esof(KNOWN_TIMES["sun_london"])["session"] == "LONDON_MORNING"
        assert compute_esof(KNOWN_TIMES["thu_ovlp"])["session"]   == "LN_NY_OVERLAP"
        assert compute_esof(KNOWN_TIMES["sun_ny"])["session"]     == "NY_AFTERNOON"
        assert compute_esof(KNOWN_TIMES["mon_asia"])["session"]   == "ASIA_PACIFIC"

    def test_session_stats_coverage(self):
        """Every reachable session must have an expectancy entry."""
        for h in range(24):
            sess = get_session(h)
            assert sess in SESSION_STATS, f"Session {sess} missing from SESSION_STATS"


# ══════════════════════════════════════════════════════════════════════════════
# 3. Weighted hours
# ══════════════════════════════════════════════════════════════════════════════
class TestWeightedHours:
    def test_pop_hour_range(self):
        base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
        for h in range(24):
            t = base + timedelta(hours=h)
            ph, lh = _get_weighted_hours(t)
            assert 0 <= ph < 24, f"pop_hour {ph} at {h}h out of range"
            assert 0 <= lh < 24, f"liq_hour {lh} at {h}h out of range"

    def test_liq_hour_monotone_utc(self):
        """liq_hour increases monotonically with UTC (within the same calendar day)."""
        base = datetime(2026, 4, 15, 0, 0, tzinfo=timezone.utc)
        prev_lh = None
        for h in range(23):
            t = base + timedelta(hours=h)
            _, lh = _get_weighted_hours(t)
            if prev_lh is not None:
                assert lh > prev_lh, f"liq_hour not monotone at {h}h: {lh} <= {prev_lh}"
            prev_lh = lh

    def test_fallback_consistency(self):
        """Fallback approximation should be within ±1h of real computation."""
        if not _WEIGHTED_HOURS_AVAILABLE:
            pytest.skip("MarketIndicators not available")
        t = datetime(2026, 4, 15, 12, 0, tzinfo=timezone.utc)
        real_ph, real_lh = _get_weighted_hours(t)
        # Approximation offsets
        h = 12.0
        approx_ph = (h + 4.21) % 24
        approx_lh = (h + 0.98) % 24
        assert abs(real_ph - approx_ph) < 1.0, f"pop_hour fallback error: {real_ph} vs {approx_ph}"
        assert abs(real_lh - approx_lh) < 1.0, f"liq_hour fallback error: {real_lh} vs {approx_lh}"

    def test_liq_bucket_aligns(self):
        """liq_bucket_3h must match floor(liq_weighted_hour / 3) * 3."""
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            expected_bkt = int(d["liq_weighted_hour"] // 3) * 3
            assert d["liq_bucket_3h"] == expected_bkt, (
                f"{name}: liq_bucket {d['liq_bucket_3h']} != expected {expected_bkt}"
            )

    def test_liq_bucket_in_stats(self):
        """Every computed liq_bucket_3h must have a stats entry (0-21 in steps of 3)."""
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            bkt = d["liq_bucket_3h"]
            assert bkt in LIQ_HOUR_STATS, f"{name}: liq_bucket {bkt} not in LIQ_HOUR_STATS"


# ══════════════════════════════════════════════════════════════════════════════
# 4. Advisory scoring logic
# ══════════════════════════════════════════════════════════════════════════════
class TestAdvisoryScoring:
    def test_best_known_cell_is_positive(self):
        """Sun 10h UTC (LONDON_MORNING, best DoW cell) → positive score."""
        d = compute_esof(KNOWN_TIMES["sun_london"])
        assert d["advisory_score"] > 0, f"Sun LDN score={d['advisory_score']} expected positive"

    def test_worst_known_cell_is_worse_than_best(self):
        """Thu OVLP score must be worse than Sun LDN score (best known cell)."""
        d_best  = compute_esof(KNOWN_TIMES["sun_london"])
        d_worst = compute_esof(KNOWN_TIMES["thu_ovlp"])
        assert d_best["advisory_score"] > d_worst["advisory_score"], (
            f"Sun LDN {d_best['advisory_score']} not > Thu OVLP {d_worst['advisory_score']}"
        )

    def test_mon_worse_than_tue(self):
        """Monday score < Tuesday score (same time) — Mon WR 27% vs Tue WR 54%."""
        t_mon = datetime(2026, 4, 20, 10, 0, tzinfo=timezone.utc)  # Monday
        t_tue = datetime(2026, 4, 21, 10, 0, tzinfo=timezone.utc)  # Tuesday
        d_mon = compute_esof(t_mon)
        d_tue = compute_esof(t_tue)
        assert d_mon["advisory_score"] < d_tue["advisory_score"], (
            f"Mon {d_mon['advisory_score']} not < Tue {d_tue['advisory_score']}"
        )

    def test_sun_ny_negative(self):
        """Sun NY_AFTERNOON (6% WR) → negative or at most mild positive (DoW boost limited)."""
        d = compute_esof(KNOWN_TIMES["sun_ny"])
        # Session/liq drag should keep it from being FAVORABLE
        assert d["advisory_label"] not in {"FAVORABLE"}, \
            f"Sun NY labeled {d['advisory_label']} — expected not FAVORABLE"

    def test_score_monotone_session_ordering(self):
        """LONDON_MORNING score > NY_AFTERNOON score for same DoW."""
        base = datetime(2026, 4, 15, tzinfo=timezone.utc)  # Tuesday
        d_ldn = compute_esof(base.replace(hour=10))
        d_ny  = compute_esof(base.replace(hour=19))
        assert d_ldn["advisory_score"] > d_ny["advisory_score"], \
            f"LDN {d_ldn['advisory_score']} not > NY {d_ny['advisory_score']}"

    def test_mercury_retrograde_penalty(self):
        """Mercury retrograde should reduce score by ~0.05."""
        t = datetime(2026, 3, 15, 10, 0, tzinfo=timezone.utc)  # known retro period
        d = compute_esof(t)
        assert d["mercury_retrograde"] is True, "Expected mercury retrograde on 2026-03-15"
        # Score would be ~0.05 lower than without retrograde
        assert d["advisory_score"] <= 0.95, "Score should not be at ceiling during retrograde"

    def test_label_thresholds(self):
        """Labels must correspond to score ranges."""
        cases = [
            (0.30,  "FAVORABLE"),
            (0.10,  "MILD_POSITIVE"),
            (0.00,  "NEUTRAL"),
            (-0.10, "MILD_NEGATIVE"),
            (-0.30, "UNFAVORABLE"),
        ]
        for score, expected_label in cases:
            # Patch compute to return known score
            with patch("esof_advisor.compute_esof") as mock:
                mock.return_value = {
                    "advisory_score": score,
                    "advisory_label": (
                        "FAVORABLE"    if score >  0.25 else
                        "MILD_POSITIVE"if score >  0.05 else
                        "NEUTRAL"      if score > -0.05 else
                        "MILD_NEGATIVE"if score > -0.25 else
                        "UNFAVORABLE"
                    ),
                }
                result = mock()
                assert result["advisory_label"] == expected_label, \
                    f"score={score}: got {result['advisory_label']} expected {expected_label}"


# ══════════════════════════════════════════════════════════════════════════════
# 5. Expectancy table internal consistency
# ══════════════════════════════════════════════════════════════════════════════
class TestExpectancyTables:
    def test_session_stats_wr_range(self):
        for sess, (n, wr, net, avg) in SESSION_STATS.items():
            assert 0 <= wr <= 100, f"{sess}: WR {wr} out of range"
            assert n > 0, f"{sess}: n={n}"

    def test_dow_stats_completeness(self):
        assert set(DOW_STATS.keys()) == set(range(7)), "DOW_STATS must cover Mon-Sun (0-6)"

    def test_dow_names_alignment(self):
        assert len(DOW_NAMES) == 7
        assert DOW_NAMES[0] == "Mon" and DOW_NAMES[6] == "Sun"

    def test_liq_hour_stats_completeness(self):
        expected_buckets = {0, 3, 6, 9, 12, 15, 18, 21}
        assert set(LIQ_HOUR_STATS.keys()) == expected_buckets

    def test_liq_hour_best_bucket_is_12(self):
        """liq 12-15h should have highest WR and most positive net PnL."""
        best_wr_bkt  = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
        best_net_bkt = max(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
        assert best_wr_bkt  == 12, f"Expected liq 12h best WR, got {best_wr_bkt}"
        assert best_net_bkt == 12, f"Expected liq 12h best net, got {best_net_bkt}"

    def test_liq_hour_worst_bucket_is_18(self):
        """liq 18-21h (NY afternoon) should have lowest WR and worst net PnL."""
        worst_wr_bkt  = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][1])
        worst_net_bkt = min(LIQ_HOUR_STATS, key=lambda k: LIQ_HOUR_STATS[k][2])
        assert worst_wr_bkt  == 18, f"Expected liq 18h worst WR, got {worst_wr_bkt}"
        assert worst_net_bkt == 18, f"Expected liq 18h worst net, got {worst_net_bkt}"

    def test_baseline_wr_is_reasonable(self):
        # Overall WR from 637 trades was 278/637 = 43.6%
        assert 42.0 < BASELINE_WR < 45.0, f"BASELINE_WR {BASELINE_WR} looks wrong"

    def test_slot_stats_wr_range(self):
        for slot, data in SLOT_STATS.items():
            n, wr = data[0], data[1]
            assert 0 <= wr <= 100, f"slot {slot}: WR {wr} out of range"
            assert n >= 3, f"slot {slot}: n={n} below minimum threshold"

    def test_moon_illumination_range(self):
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            illum = d["moon_illumination"]
            assert 0.0 <= illum <= 1.0, f"{name}: moon_illumination {illum} out of [0,1]"

    def test_fib_strength_range(self):
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            fs = d["fib_strength"]
            assert 0.0 <= fs <= 1.0, f"{name}: fib_strength {fs} out of [0,1]"

    def test_market_cycle_pos_range(self):
        for name, t in KNOWN_TIMES.items():
            d = compute_esof(t)
            cp = d["market_cycle_pos"]
            assert 0.0 <= cp < 1.0, f"{name}: market_cycle_pos {cp} out of [0,1)"


# ══════════════════════════════════════════════════════════════════════════════
# 6. Moon approximation correctness
# ══════════════════════════════════════════════════════════════════════════════
class TestMoonApproximation:
    # Known moon phases (approximate)
    KNOWN_MOONS = [
        (datetime(2026, 4, 7,  tzinfo=timezone.utc), "NEW_MOON",    0.03),
        (datetime(2026, 4, 20, tzinfo=timezone.utc), "FULL_MOON",   0.97),
        (datetime(2026, 4, 13, tzinfo=timezone.utc), "WAXING",      0.45),  # first quarter ≈
        (datetime(2026, 4, 26, tzinfo=timezone.utc), "WANING",      0.50),  # last quarter ≈
    ]

    def test_new_moon_illumination_low(self):
        # 28th new moon after ref Jan 11 2024: ~Apr 17 2026 (computed from synodic cycle)
        # 28 * 29.53059 = 826.856 days → Jan 11 2024 + 826d = Apr 17 2026
        t = datetime(2026, 4, 17, 12, 0, tzinfo=timezone.utc)
        d = compute_esof(t)
        assert d["moon_illumination"] < 0.10, \
            f"Expected near-new-moon illumination ~0, got {d['moon_illumination']}"

    def test_full_moon_illumination_high(self):
        # Halfway between 27th (Mar 18) and 28th (Apr 17) new moon = ~Apr 2 2026
        t = datetime(2026, 4, 2, 12, 0, tzinfo=timezone.utc)
        d = compute_esof(t)
        assert d["moon_illumination"] > 0.90, \
            f"Expected near-full-moon illumination, got {d['moon_illumination']}"

    def test_mercury_retrograde_period(self):
        """2026-03-07 to 2026-03-30 is Mercury retrograde."""
        in_retro  = datetime(2026, 3, 15, 12, 0, tzinfo=timezone.utc)
        post_retro = datetime(2026, 4,  5, 12, 0, tzinfo=timezone.utc)
        assert compute_esof(in_retro)["mercury_retrograde"]  is True
        assert compute_esof(post_retro)["mercury_retrograde"] is False


# ══════════════════════════════════════════════════════════════════════════════
# 7. get_advisory() public API
# ══════════════════════════════════════════════════════════════════════════════
class TestPublicAPI:
    def test_get_advisory_no_args(self):
        """get_advisory() with no args should use current time."""
        d = get_advisory()
        assert "advisory_score" in d
        assert "advisory_label" in d

    def test_get_advisory_with_time(self):
        d = get_advisory(KNOWN_TIMES["sun_london"])
        assert d["dow_name"] == "Sun"
        assert d["session"] == "LONDON_MORNING"

    def test_deterministic(self):
        """Same input → same output."""
        t = KNOWN_TIMES["midday_win"]
        d1 = compute_esof(t)
        d2 = compute_esof(t)
        assert d1["advisory_score"]  == d2["advisory_score"]
        assert d1["advisory_label"]  == d2["advisory_label"]
        assert d1["session"]         == d2["session"]
        assert d1["liq_weighted_hour"] == d2["liq_weighted_hour"]


# ══════════════════════════════════════════════════════════════════════════════
# 8. Integration — HZ round-trip (skipped if HZ unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestHZIntegration:
    @pytest.fixture(scope="class")
    def hz_client(self):
        try:
            import hazelcast
            client = hazelcast.HazelcastClient(
                cluster_name="dolphin",
                cluster_members=["localhost:5701"],
                connection_timeout=2.0,
            )
            yield client
            client.shutdown()
        except Exception:
            pytest.skip("Hazelcast not available")

    def test_hz_write_and_read(self, hz_client):
        from esof_advisor import _hz_write
        d = compute_esof(KNOWN_TIMES["sun_london"])
        _hz_write(d)
        import time; time.sleep(0.3)
        raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
        assert raw is not None, "esof_advisor_latest not found in HZ after write"
        parsed = json.loads(raw)
        assert parsed["advisory_label"] == d["advisory_label"]
        assert parsed["session"] == "LONDON_MORNING"

    def test_hz_value_is_json(self, hz_client):
        raw = hz_client.get_map("DOLPHIN_FEATURES").blocking().get("esof_advisor_latest")
        if raw is None:
            pytest.skip("No esof_advisor_latest in HZ yet")
        parsed = json.loads(raw)
        assert "advisory_score" in parsed


# ══════════════════════════════════════════════════════════════════════════════
# 9. Integration — CH write (skipped if CH unavailable)
# ══════════════════════════════════════════════════════════════════════════════
class TestCHIntegration:
    @pytest.fixture(scope="class")
    def ch_available(self):
        import urllib.request
        try:
            req = urllib.request.Request("http://localhost:8123/ping")
            req.add_header("X-ClickHouse-User", "dolphin")
            req.add_header("X-ClickHouse-Key",  "dolphin_ch_2026")
            urllib.request.urlopen(req, timeout=2)
        except Exception:
            pytest.skip("ClickHouse not available")

    def test_ch_write_no_exception(self, ch_available):
        from esof_advisor import _ch_write
        d = compute_esof(KNOWN_TIMES["sun_london"])
        # Should complete without raising
        _ch_write(d)

    def test_ch_table_has_data(self, ch_available):
        import urllib.request
        def ch(q):
            url = "http://localhost:8123/?database=dolphin"
            req = urllib.request.Request(url, data=q.encode(), method="POST")
            req.add_header("X-ClickHouse-User", "dolphin")
            req.add_header("X-ClickHouse-Key",  "dolphin_ch_2026")
            with urllib.request.urlopen(req, timeout=5) as r:
                return r.read().decode().strip()
        count = int(ch("SELECT count() FROM esof_advisory"))
        assert count >= 0  # table exists (may be 0 if never written via daemon)

    def test_ch_schema_correct(self, ch_available):
        import urllib.request
        def ch(q):
            url = "http://localhost:8123/?database=dolphin"
            req = urllib.request.Request(url, data=q.encode(), method="POST")
            req.add_header("X-ClickHouse-User", "dolphin")
            req.add_header("X-ClickHouse-Key",  "dolphin_ch_2026")
            with urllib.request.urlopen(req, timeout=5) as r:
                return r.read().decode().strip()
        cols = ch("SELECT name FROM system.columns WHERE table='esof_advisory' AND database='dolphin' FORMAT CSV")
        assert "advisory_score" in cols
        assert "liq_weighted_hour" in cols
        assert "session" in cols