18 lines
520 B
Python
18 lines
520 B
Python
|
|
import os
|
||
|
|
from pypdf import PdfReader
|
||
|
|
|
||
|
|
pdf_path = "NAUTILUS-DOLPHIN Prod System Spec_ Python_Hazelcast Upgrade.pdf"
|
||
|
|
out_path = "extracted_spec.txt"
|
||
|
|
|
||
|
|
if not os.path.exists(pdf_path):
|
||
|
|
print(f"Error: {pdf_path} not found.")
|
||
|
|
else:
|
||
|
|
reader = PdfReader(pdf_path)
|
||
|
|
text = []
|
||
|
|
for page in reader.pages:
|
||
|
|
text.append(page.extract_text())
|
||
|
|
|
||
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
||
|
|
f.write("\n\n".join(text))
|
||
|
|
print(f"Successfully extracted {len(reader.pages)} pages to {out_path}.")
|