Initial commit: PDF OCR Hotfolder v0.1.0

Komplettes Rewrite des alten Bash-Tools `pdf-tool` in Python.
- ocrmypdf als Library, watchdog für Hotfolder, ThreadPool für Parallelität
- Upload-Targets: folder, Nextcloud (WebDAV), SFTP
- E-Mail-Notify, optional veraPDF
- Interaktiver Installer mit Service-User-Support (lokal + AD via SSSD)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-08 00:22:31 +02:00
commit 76c3a991df
16 changed files with 1261 additions and 0 deletions
+57
View File
@@ -0,0 +1,57 @@
"""CLI-Entrypoint."""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
from . import __version__
from .config import load_config
from .service import HotfolderService
def _setup_logging(level: str) -> None:
logging.basicConfig(
level=getattr(logging, level.upper(), logging.INFO),
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def main() -> int:
parser = argparse.ArgumentParser(
prog="pdf-ocr-hotfolder",
description="Wandelt eingehende PDFs per OCR in durchsuchbare PDFs um.",
)
parser.add_argument("--config", "-c", default="/etc/pdf-ocr-hotfolder/config.toml",
help="Pfad zur Konfigurationsdatei (TOML)")
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
parser.add_argument("--once", action="store_true",
help="Nur bestehende Dateien verarbeiten und beenden")
args = parser.parse_args()
cfg_path = Path(args.config)
if not cfg_path.exists():
print(f"Config nicht gefunden: {cfg_path}", file=sys.stderr)
return 2
cfg = load_config(cfg_path)
_setup_logging(cfg.log_level)
service = HotfolderService(cfg)
if args.once:
service._ensure_dirs() # noqa: SLF001
service._scan_existing() # noqa: SLF001
service._executor.shutdown(wait=True) # noqa: SLF001
return 0
try:
service.run()
except KeyboardInterrupt:
pass
return 0
if __name__ == "__main__":
sys.exit(main())