105 lines
3.3 KiB
Python
105 lines
3.3 KiB
Python
|
|
import json
|
||
|
|
import re
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
BASE = Path(r"C:\Users\Lenovo\Documents")
|
||
|
|
DIRS = {
|
||
|
|
"NG1": BASE / "- Dolphin NG",
|
||
|
|
"NG2": BASE / "- Dolphin NG2",
|
||
|
|
"NG4": BASE / "- DOLPHIN NG4" / "- Results",
|
||
|
|
"NG5": BASE / "- Dolphin NG5",
|
||
|
|
"NG3": BASE / "- Dolphin NG HD (NG3)" / "correlation_arb512" / "eigenvalues"
|
||
|
|
}
|
||
|
|
|
||
|
|
def parse_ts(s):
|
||
|
|
for fmt in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S",
|
||
|
|
"%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S",
|
||
|
|
"%Y-%m-%dT%H:%M:%SZ"):
|
||
|
|
try:
|
||
|
|
return datetime.strptime(str(s)[:26].replace('Z', '').replace('T', ' '), fmt)
|
||
|
|
except ValueError:
|
||
|
|
continue
|
||
|
|
return None
|
||
|
|
|
||
|
|
def get_boundary_json(d, pattern):
|
||
|
|
if not d.exists(): return None, None, 0
|
||
|
|
files = sorted(list(d.glob(pattern)))
|
||
|
|
if not files: return None, None, 0
|
||
|
|
|
||
|
|
def extract_ts(f):
|
||
|
|
try:
|
||
|
|
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
|
||
|
|
data = json.load(fb)
|
||
|
|
return parse_ts(data.get('timestamp'))
|
||
|
|
except: return None
|
||
|
|
|
||
|
|
ts_start = None
|
||
|
|
for f in files:
|
||
|
|
ts_start = extract_ts(f)
|
||
|
|
if ts_start: break
|
||
|
|
|
||
|
|
ts_end = None
|
||
|
|
for f in reversed(files):
|
||
|
|
ts_end = extract_ts(f)
|
||
|
|
if ts_end: break
|
||
|
|
|
||
|
|
return ts_start, ts_end, len(files)
|
||
|
|
|
||
|
|
def get_boundary_ng4(d):
|
||
|
|
if not d.exists(): return None, None, 0
|
||
|
|
files = sorted(list(d.glob('*.txt')))
|
||
|
|
if not files: return None, None, 0
|
||
|
|
|
||
|
|
log_re = re.compile(r'(\d{4}-\d{2}-\d{2}T[\d:.]+Z)')
|
||
|
|
|
||
|
|
def extract_first_last_ts(f):
|
||
|
|
first = None
|
||
|
|
last = None
|
||
|
|
try:
|
||
|
|
with open(f, 'r', encoding='utf-8', errors='replace') as fb:
|
||
|
|
for line in fb:
|
||
|
|
m = log_re.search(line)
|
||
|
|
if m:
|
||
|
|
ts = parse_ts(m.group(1))
|
||
|
|
if not first: first = ts
|
||
|
|
last = ts
|
||
|
|
except: pass
|
||
|
|
return first, last
|
||
|
|
|
||
|
|
ts_min = None
|
||
|
|
ts_max = None
|
||
|
|
for f in files:
|
||
|
|
f_min, f_max = extract_first_last_ts(f)
|
||
|
|
if not ts_min: ts_min = f_min
|
||
|
|
if f_max: ts_max = f_max
|
||
|
|
|
||
|
|
return ts_min, ts_max, len(files)
|
||
|
|
|
||
|
|
def get_boundary_ng3(d):
|
||
|
|
if not d.exists(): return None, None, 0
|
||
|
|
subdirs = sorted([s for s in d.iterdir() if s.is_dir() and not s.name.endswith('_SKIP')])
|
||
|
|
if not subdirs: return None, None, 0
|
||
|
|
|
||
|
|
ts_min, _, _ = get_boundary_json(subdirs[0], 'scan_*.json')
|
||
|
|
_, ts_max, _ = get_boundary_json(subdirs[-1], 'scan_*.json')
|
||
|
|
|
||
|
|
total_files = sum(len(list(s.glob('scan_*.json'))) for s in subdirs)
|
||
|
|
return ts_min, ts_max, total_files
|
||
|
|
|
||
|
|
print("--- Targeted Data Archaeology Result ---")
|
||
|
|
for name, d in DIRS.items():
|
||
|
|
print(f"Checking {name}...")
|
||
|
|
if name in ["NG1", "NG2", "NG5"]:
|
||
|
|
ts_start, ts_end, count = get_boundary_json(d, 'regime_result_*.json')
|
||
|
|
elif name == "NG4":
|
||
|
|
ts_start, ts_end, count = get_boundary_ng4(d)
|
||
|
|
elif name == "NG3":
|
||
|
|
ts_start, ts_end, count = get_boundary_ng3(d)
|
||
|
|
|
||
|
|
if ts_start:
|
||
|
|
print(f" {name}: {ts_start} to {ts_end} ({count} files)")
|
||
|
|
else:
|
||
|
|
print(f" {name}: No data found.")
|