Files
DOLPHIN/nautilus_dolphin/dvae/targeted_data_archaeology.py

105 lines
3.3 KiB
Python
Raw Normal View History

import json
import re
import os
from pathlib import Path
from datetime import datetime
BASE = Path(r"C:\Users\Lenovo\Documents")
DIRS = {
"NG1": BASE / "- Dolphin NG",
"NG2": BASE / "- Dolphin NG2",
"NG4": BASE / "- DOLPHIN NG4" / "- Results",
"NG5": BASE / "- Dolphin NG5",
"NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"
}
def parse_ts(s):
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ"):
try:
return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt)
except ValueError:
continue
return None
def get_boundary_json(d, pattern):
if not d.exists(): return None, None, 0
files = sorted(list(d.glob(pattern)))
if not files: return None, None, 0
def extract_ts(f):
try:
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
data = json.load(fb)
return parse_ts(data.get('timestamp'))
except: return None
ts_start = None
for f in files:
ts_start = extract_ts(f)
if ts_start: break
ts_end = None
for f in reversed(files):
ts_end = extract_ts(f)
if ts_end: break
return ts_start, ts_end, len(files)
def get_boundary_ng4(d):
if not d.exists(): return None, None, 0
files = sorted(list(d.glob('*.txt')))
if not files: return None, None, 0
log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')
def extract_first_last_ts(f):
first = None
last = None
try:
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
for line in fb:
m = log_re.search(line)
if m:
ts = parse_ts(m.group(1))
if not first: first = ts
last = ts
except: pass
return first, last
ts_min = None
ts_max = None
for f in files:
f_min, f_max = extract_first_last_ts(f)
if not ts_min: ts_min = f_min
if f_max: ts_max = f_max
return ts_min, ts_max, len(files)
def get_boundary_ng3(d):
if not d.exists(): return None, None, 0
subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])
if not subdirs: return None, None, 0
ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json')
_, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json')
total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs)
return ts_min, ts_max, total_files
print("--- Targeted Data Archaeology Result ---")
for name, d in DIRS.items():
print(f"Checking {name}...")
if name in ["NG1", "NG2", "NG5"]:
ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json')
elif name == "NG4":
ts_start, ts_end, count = get_boundary_ng4(d)
elif name == "NG3":
ts_start, ts_end, count = get_boundary_ng3(d)
if ts_start:
print(f" {name}: {ts_start} to {ts_end} ({count} files)")
else:
print(f" {name}: No data found.")