zine_library/code-for-pelican-deployment/zines-to-images2.py
2026-02-05 17:54:04 -08:00

165 lines
4.6 KiB
Python
Executable file

import os
import re
import shutil
# import pillow
from pathlib import Path
from datetime import datetime
from pdf2image import convert_from_path
from PIL import Image
from blessings import Terminal
term = Terminal()
green = term.green
bred = term.bold_red
byellow = term.bold_yellow
ROOT_DIR = Path("./input_zines")
SCRIPT_DIR = Path(__file__).parent.resolve()
CONTENT_DIR = SCRIPT_DIR / "content"
IMAGES_DIR = CONTENT_DIR / "images"
LIBRARY_DIR = CONTENT_DIR / "library"
CONTENT_DIR.mkdir(exist_ok=True)
IMAGES_DIR.mkdir(exist_ok=True)
LIBRARY_DIR.mkdir(exist_ok=True)
def slugify(text: str) -> str:
text = text.lower()
text = re.sub(r"[^\w\s-]", "", text)
text = re.sub(r"[\s_-]+", "-", text)
return text.strip("-")
def sanitize_path_part(text: str) -> str:
text = text.lower()
text = text.replace("&", "and")
text = re.sub(r"\s+", "_", text)
text = re.sub(r"[^a-z0-9_-]", "", text)
return text
def clean_title(text: str) -> str:
text = text.strip()
text = re.sub(r"\s+", " ", text)
return text.rstrip(" -–—:")
def parse_title_and_author_from_filename(pdf_path: Path):
base = pdf_path.stem
if " - " in base:
title_part, author_part = base.rsplit("-", 1)
return clean_title(title_part), clean_title(author_part)
return clean_title(base), ""
def get_category_and_tags(pdf_path: Path):
relative = pdf_path.parent.relative_to(ROOT_DIR)
parts = list(relative.parts)
category = parts[0].replace("&", "and").title() if parts else ""
tags = [
p.replace("&", "and").lower().replace(" ", "_")
for p in parts[1:]
]
return category, tags
def markdown_path_for_pdf(pdf_path: Path) -> Path:
relative = pdf_path.relative_to(ROOT_DIR).with_suffix("")
slug = slugify("-".join(relative.parts))
return CONTENT_DIR / f"{slug}.md"
def copied_pdf_path(pdf_path: Path) -> Path:
relative = pdf_path.relative_to(ROOT_DIR)
sanitized_parts = []
for part in relative.parts:
if part.lower().endswith(".pdf"):
stem = Path(part).stem
suffix = Path(part).suffix # problems here before
sanitized_name = sanitize_path_part(stem) + suffix
sanitized_parts.append(sanitized_name)
else:
sanitized_parts.append(sanitize_path_part(part))
dest = LIBRARY_DIR.joinpath(*sanitized_parts)
dest.parent.mkdir(parents=True, exist_ok=True)
if not dest.exists():
shutil.copy2(pdf_path, dest)
return dest
def cover_image_path_for_pdf(pdf_path: Path) -> Path:
md_path = markdown_path_for_pdf(pdf_path)
return IMAGES_DIR / f"{md_path.stem}.jpg"
def generate_cover_image(pdf_path: Path) -> Path:
cover_path = cover_image_path_for_pdf(pdf_path)
if cover_path.exists():
return cover_path
images = convert_from_path(
pdf_path,
first_page=1,
last_page=1,
dpi=200
)
page = images[0]
width, height = page.size
right_half = page.crop((width // 2, 0, width, height))
right_half.convert("RGB").save(cover_path, "JPEG", quality=90)
return cover_path
def create_markdown(pdf_path: Path):
md_path = markdown_path_for_pdf(pdf_path)
if md_path.exists():
print(byellow(f"Skipping existing: {md_path}"))
return
title, author = parse_title_and_author_from_filename(pdf_path)
slug = slugify(title)
category, tags = get_category_and_tags(pdf_path)
copied_pdf = copied_pdf_path(pdf_path)
cover_path = generate_cover_image(pdf_path)
rel_pdf = copied_pdf.relative_to(CONTENT_DIR).as_posix()
today = datetime.today().strftime("%Y-%m-%d")
front_matter = [
"---",
f"Title: {title}",
f"Date: {today}",
f"Slug: {slug}",
f"Category: {category}",
f"Tags: {', '.join(tags)}",
"Summary:",
]
if author:
front_matter.append(f"Author: {author}")
front_matter.extend([
f"Cover: {{static}}/images/{cover_path.name}",
f"PDF: {{static}}/{rel_pdf}",
"---",
"",
f'<img src="{{static}}/images/{cover_path.name}" width="200" />',
"",
f"[Download imposed PDF]({{static}}/{rel_pdf})",
"",
])
md_path.write_text("\n".join(front_matter), encoding="utf-8")
print(bgreen(f"Created: {md_path}"))
def main():
for root, _, files in os.walk(ROOT_DIR):
for name in files:
if name.lower().endswith(".pdf"):
pdf_path = Path(root) / name
try:
create_markdown(pdf_path)
except Exception as e:
print(bred(f"Failed processing {pdf_path}: {e}"))
if __name__ == "__main__":
main()