import json import re import os from pathlib import Path from datetime import datetime BASE = Path(r"C:\Users\Lenovo\Documents") DIRS = { "NG1": BASE / "- Dolphin NG", "NG2": BASE / "- Dolphin NG2", "NG4": BASE / "- DOLPHIN NG4" / "- Results", "NG5": BASE / "- Dolphin NG5", "NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues" } def parse_ts(s): for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"): try: return datetime.strptime(str(s)[:26].replace('Z', ''), fmt) except ValueError: continue return None def scan_dir_json(d, pattern): ts_list = [] if not d.exists(): return ts_list for f in d.glob(pattern): try: with open(f, 'r', encoding='utf-8', errors='replace') as fb: data = json.load(fb) ts_str = data.get('timestamp') if ts_str: ts = parse_ts(ts_str) if ts: ts_list.append(ts) except: continue return ts_list def scan_ng4(d): ts_list = [] if not d.exists(): return ts_list log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)') for f in d.glob('*.txt'): try: with open(f, 'r', encoding='utf-8', errors='replace') as fb: for line in fb: m = log_re.search(line) if m: ts = parse_ts(m.group(1)) if ts: ts_list.append(ts) except: continue return ts_list def scan_ng3(d): ts_list = [] if not d.exists(): return ts_list # Just check the first and last date directories to save time subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')]) if not subdirs: return ts_list for subdir in [subdirs[0], subdirs[-1]]: for f in subdir.glob('scan_*.json'): if '__Indicators' in f.name: continue try: with open(f, 'r', encoding='utf-8', errors='replace') as fb: data = json.load(fb) ts_str = data.get('timestamp') if ts_str: ts = parse_ts(ts_str) if ts: ts_list.append(ts) except: continue return ts_list print("--- Data Archaeology Result ---") for name, d in DIRS.items(): print(f"Checking {name} in {d}...") if name in ["NG1", "NG2", "NG5"]: times = scan_dir_json(d, 'regime_result_*.json') elif name == "NG4": times = scan_ng4(d) elif name == "NG3": times = scan_ng3(d) if times: print(f" {name}: {min(times)} to {max(times)} ({len(times)} samples found in scan)") else: print(f" {name}: No data found.")