DOLPHIN/nautilus_dolphin/dvae/targeted_data_archaeology.py

import json
import re
import os
from pathlib import Path
from datetime import datetime

BASE = Path(r"C:\Users\Lenovo\Documents")
DIRS = {
    "NG1": BASE / "- Dolphin NG",
    "NG2": BASE / "- Dolphin NG2",
    "NG4": BASE / "- DOLPHIN NG4" / "- Results",
    "NG5": BASE / "- Dolphin NG5",
    "NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"
}

def parse_ts(s):
    for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",
                "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",
                "%Y-%m-%dT%H:%M:%SZ"):
        try:
            return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt)
        except ValueError:
            continue
    return None

def get_boundary_json(d, pattern):
    if not d.exists(): return None, None, 0
    files = sorted(list(d.glob(pattern)))
    if not files: return None, None, 0
    
    def extract_ts(f):
        try:
            with open(f, 'r', encoding='utf-8', errors='replace') as fb:
                data = json.load(fb)
                return parse_ts(data.get('timestamp'))
        except: return None

    ts_start = None
    for f in files:
        ts_start = extract_ts(f)
        if ts_start: break
        
    ts_end = None
    for f in reversed(files):
        ts_end = extract_ts(f)
        if ts_end: break
        
    return ts_start, ts_end, len(files)

def get_boundary_ng4(d):
    if not d.exists(): return None, None, 0
    files = sorted(list(d.glob('*.txt')))
    if not files: return None, None, 0
    
    log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')
    
    def extract_first_last_ts(f):
        first = None
        last = None
        try:
            with open(f, 'r', encoding='utf-8', errors='replace') as fb:
                for line in fb:
                    m = log_re.search(line)
                    if m:
                        ts = parse_ts(m.group(1))
                        if not first: first = ts
                        last = ts
        except: pass
        return first, last

    ts_min = None
    ts_max = None
    for f in files:
        f_min, f_max = extract_first_last_ts(f)
        if not ts_min: ts_min = f_min
        if f_max: ts_max = f_max
        
    return ts_min, ts_max, len(files)

def get_boundary_ng3(d):
    if not d.exists(): return None, None, 0
    subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])
    if not subdirs: return None, None, 0
    
    ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json')
    _, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json')
    
    total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs)
    return ts_min, ts_max, total_files

print("--- Targeted Data Archaeology Result ---")
for name, d in DIRS.items():
    print(f"Checking {name}...")
    if name in ["NG1", "NG2", "NG5"]:
        ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json')
    elif name == "NG4":
        ts_start, ts_end, count = get_boundary_ng4(d)
    elif name == "NG3":
        ts_start, ts_end, count = get_boundary_ng3(d)
    
    if ts_start:
        print(f"  {name}: {ts_start} to {ts_end} ({count} files)")
    else:
        print(f"  {name}: No data found.")
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`import json`
			`import re`
			`import os`
			`from pathlib import Path`
			`from datetime import datetime`

			`BASE = Path(r"C:\Users\Lenovo\Documents")`
			`DIRS = {`
			`"NG1": BASE / "- Dolphin NG",`
			`"NG2": BASE / "- Dolphin NG2",`
			`"NG4": BASE / "- DOLPHIN NG4" / "- Results",`
			`"NG5": BASE / "- Dolphin NG5",`
			`"NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"`
			`}`

			`def parse_ts(s):`
			`for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",`
			`"%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",`
			`"%Y-%m-%dT%H:%M:%SZ"):`
			`try:`
			`return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt)`
			`except ValueError:`
			`continue`
			`return None`

			`def get_boundary_json(d, pattern):`
			`if not d.exists(): return None, None, 0`
			`files = sorted(list(d.glob(pattern)))`
			`if not files: return None, None, 0`

			`def extract_ts(f):`
			`try:`
			`with open(f, 'r', encoding='utf-8', errors='replace') as fb:`
			`data = json.load(fb)`
			`return parse_ts(data.get('timestamp'))`
			`except: return None`

			`ts_start = None`
			`for f in files:`
			`ts_start = extract_ts(f)`
			`if ts_start: break`

			`ts_end = None`
			`for f in reversed(files):`
			`ts_end = extract_ts(f)`
			`if ts_end: break`

			`return ts_start, ts_end, len(files)`

			`def get_boundary_ng4(d):`
			`if not d.exists(): return None, None, 0`
			`files = sorted(list(d.glob('*.txt')))`
			`if not files: return None, None, 0`

			`log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')`

			`def extract_first_last_ts(f):`
			`first = None`
			`last = None`
			`try:`
			`with open(f, 'r', encoding='utf-8', errors='replace') as fb:`
			`for line in fb:`
			`m = log_re.search(line)`
			`if m:`
			`ts = parse_ts(m.group(1))`
			`if not first: first = ts`
			`last = ts`
			`except: pass`
			`return first, last`

			`ts_min = None`
			`ts_max = None`
			`for f in files:`
			`f_min, f_max = extract_first_last_ts(f)`
			`if not ts_min: ts_min = f_min`
			`if f_max: ts_max = f_max`

			`return ts_min, ts_max, len(files)`

			`def get_boundary_ng3(d):`
			`if not d.exists(): return None, None, 0`
			`subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])`
			`if not subdirs: return None, None, 0`

			`ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json')`
			`_, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json')`

			`total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs)`
			`return ts_min, ts_max, total_files`

			`print("--- Targeted Data Archaeology Result ---")`
			`for name, d in DIRS.items():`
			`print(f"Checking {name}...")`
			`if name in ["NG1", "NG2", "NG5"]:`
			`ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json')`
			`elif name == "NG4":`
			`ts_start, ts_end, count = get_boundary_ng4(d)`
			`elif name == "NG3":`
			`ts_start, ts_end, count = get_boundary_ng3(d)`

			`if ts_start:`
			`print(f" {name}: {ts_start} to {ts_end} ({count} files)")`
			`else:`
			`print(f" {name}: No data found.")`