initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
104
nautilus_dolphin/dvae/targeted_data_archaeology.py
Executable file
104
nautilus_dolphin/dvae/targeted_data_archaeology.py
Executable file
@@ -0,0 +1,104 @@
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
BASE = Path(r"C:\Users\Lenovo\Documents")
|
||||
DIRS = {
|
||||
"NG1": BASE / "- Dolphin NG",
|
||||
"NG2": BASE / "- Dolphin NG2",
|
||||
"NG4": BASE / "- DOLPHIN NG4" / "- Results",
|
||||
"NG5": BASE / "- Dolphin NG5",
|
||||
"NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"
|
||||
}
|
||||
|
||||
def parse_ts(s):
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%SZ"):
|
||||
try:
|
||||
return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def get_boundary_json(d, pattern):
|
||||
if not d.exists(): return None, None, 0
|
||||
files = sorted(list(d.glob(pattern)))
|
||||
if not files: return None, None, 0
|
||||
|
||||
def extract_ts(f):
|
||||
try:
|
||||
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
|
||||
data = json.load(fb)
|
||||
return parse_ts(data.get('timestamp'))
|
||||
except: return None
|
||||
|
||||
ts_start = None
|
||||
for f in files:
|
||||
ts_start = extract_ts(f)
|
||||
if ts_start: break
|
||||
|
||||
ts_end = None
|
||||
for f in reversed(files):
|
||||
ts_end = extract_ts(f)
|
||||
if ts_end: break
|
||||
|
||||
return ts_start, ts_end, len(files)
|
||||
|
||||
def get_boundary_ng4(d):
|
||||
if not d.exists(): return None, None, 0
|
||||
files = sorted(list(d.glob('*.txt')))
|
||||
if not files: return None, None, 0
|
||||
|
||||
log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')
|
||||
|
||||
def extract_first_last_ts(f):
|
||||
first = None
|
||||
last = None
|
||||
try:
|
||||
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
|
||||
for line in fb:
|
||||
m = log_re.search(line)
|
||||
if m:
|
||||
ts = parse_ts(m.group(1))
|
||||
if not first: first = ts
|
||||
last = ts
|
||||
except: pass
|
||||
return first, last
|
||||
|
||||
ts_min = None
|
||||
ts_max = None
|
||||
for f in files:
|
||||
f_min, f_max = extract_first_last_ts(f)
|
||||
if not ts_min: ts_min = f_min
|
||||
if f_max: ts_max = f_max
|
||||
|
||||
return ts_min, ts_max, len(files)
|
||||
|
||||
def get_boundary_ng3(d):
|
||||
if not d.exists(): return None, None, 0
|
||||
subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])
|
||||
if not subdirs: return None, None, 0
|
||||
|
||||
ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json')
|
||||
_, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json')
|
||||
|
||||
total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs)
|
||||
return ts_min, ts_max, total_files
|
||||
|
||||
print("--- Targeted Data Archaeology Result ---")
|
||||
for name, d in DIRS.items():
|
||||
print(f"Checking {name}...")
|
||||
if name in ["NG1", "NG2", "NG5"]:
|
||||
ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json')
|
||||
elif name == "NG4":
|
||||
ts_start, ts_end, count = get_boundary_ng4(d)
|
||||
elif name == "NG3":
|
||||
ts_start, ts_end, count = get_boundary_ng3(d)
|
||||
|
||||
if ts_start:
|
||||
print(f" {name}: {ts_start} to {ts_end} ({count} files)")
|
||||
else:
|
||||
print(f" {name}: No data found.")
|
||||
Reference in New Issue
Block a user