import json import re import os from pathlib import Path from datetime import datetime BASE = Path(r"C:\Users\Lenovo\Documents") DIRS = { "NG1": BASE / "- Dolphin NG", "NG2": BASE / "- Dolphin NG2", "NG4": BASE / "- DOLPHIN NG4" / "- Results", "NG5": BASE / "- Dolphin NG5", "NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues" } def parse_ts(s): for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"): try: return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt) except ValueError: continue return None def get_boundary_json(d, pattern): if not d.exists(): return None, None, 0 files = sorted(list(d.glob(pattern))) if not files: return None, None, 0 def extract_ts(f): try: with open(f, 'r', encoding='utf-8', errors='replace') as fb: data = json.load(fb) return parse_ts(data.get('timestamp')) except: return None ts_start = None for f in files: ts_start = extract_ts(f) if ts_start: break ts_end = None for f in reversed(files): ts_end = extract_ts(f) if ts_end: break return ts_start, ts_end, len(files) def get_boundary_ng4(d): if not d.exists(): return None, None, 0 files = sorted(list(d.glob('*.txt'))) if not files: return None, None, 0 log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)') def extract_first_last_ts(f): first = None last = None try: with open(f, 'r', encoding='utf-8', errors='replace') as fb: for line in fb: m = log_re.search(line) if m: ts = parse_ts(m.group(1)) if not first: first = ts last = ts except: pass return first, last ts_min = None ts_max = None for f in files: f_min, f_max = extract_first_last_ts(f) if not ts_min: ts_min = f_min if f_max: ts_max = f_max return ts_min, ts_max, len(files) def get_boundary_ng3(d): if not d.exists(): return None, None, 0 subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')]) if not subdirs: return None, None, 0 ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json') _, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json') total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs) return ts_min, ts_max, total_files print("--- Targeted Data Archaeology Result ---") for name, d in DIRS.items(): print(f"Checking {name}...") if name in ["NG1", "NG2", "NG5"]: ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json') elif name == "NG4": ts_start, ts_end, count = get_boundary_ng4(d) elif name == "NG3": ts_start, ts_end, count = get_boundary_ng3(d) if ts_start: print(f" {name}: {ts_start} to {ts_end} ({count} files)") else: print(f" {name}: No data found.")