Files
DOLPHIN/prod/extract_spec.py

18 lines
520 B
Python
Raw Permalink Normal View History

import os
from pypdf import PdfReader
pdf_path = "NAUTILUS-DOLPHIN Prod System Spec_ Python_Hazelcast Upgrade.pdf"
out_path = "extracted_spec.txt"
if not os.path.exists(pdf_path):
print(f"Error: {pdf_path} not found.")
else:
reader = PdfReader(pdf_path)
text = []
for page in reader.pages:
text.append(page.extract_text())
with open(out_path, "w", encoding="utf-8") as f:
f.write("\n\n".join(text))
print(f"Successfully extracted {len(reader.pages)} pages to {out_path}.")