Files
DOLPHIN/nautilus_dolphin/dvae/data_range_archaeology.py

89 lines
2.9 KiB
Python
Raw Normal View History

import json
import re
import os
from pathlib import Path
from datetime import datetime
BASE = Path(r"C:\Users\Lenovo\Documents")
DIRS = {
"NG1": BASE / "- Dolphin NG",
"NG2": BASE / "- Dolphin NG2",
"NG4": BASE / "- DOLPHIN NG4" / "- Results",
"NG5": BASE / "- Dolphin NG5",
"NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"
}
def parse_ts(s):
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ"):
try:
return datetime.strptime(str(s)[:26].replace('Z', ''), fmt)
except ValueError:
continue
return None
def scan_dir_json(d, pattern):
ts_list = []
if not d.exists(): return ts_list
for f in d.glob(pattern):
try:
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
data = json.load(fb)
ts_str = data.get('timestamp')
if ts_str:
ts = parse_ts(ts_str)
if ts: ts_list.append(ts)
except: continue
return ts_list
def scan_ng4(d):
ts_list = []
if not d.exists(): return ts_list
log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')
for f in d.glob('*.txt'):
try:
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
for line in fb:
m = log_re.search(line)
if m:
ts = parse_ts(m.group(1))
if ts: ts_list.append(ts)
except: continue
return ts_list
def scan_ng3(d):
ts_list = []
if not d.exists(): return ts_list
# Just check the first and last date directories to save time
subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])
if not subdirs: return ts_list
for subdir in [subdirs[0], subdirs[-1]]:
for f in subdir.glob('scan_*.json'):
if '__Indicators' in f.name: continue
try:
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
data = json.load(fb)
ts_str = data.get('timestamp')
if ts_str:
ts = parse_ts(ts_str)
if ts: ts_list.append(ts)
except: continue
return ts_list
print("--- Data Archaeology Result ---")
for name, d in DIRS.items():
print(f"Checking {name} in {d}...")
if name in ["NG1", "NG2", "NG5"]:
times = scan_dir_json(d, 'regime_result_*.json')
elif name == "NG4":
times = scan_ng4(d)
elif name == "NG3":
times = scan_ng3(d)
if times:
print(f" {name}: {min(times)} to {max(times)} ({len(times)} samples found in scan)")
else:
print(f" {name}: No data found.")