Initial commit: PDF OCR Hotfolder v0.1.0
Komplettes Rewrite des alten Bash-Tools `pdf-tool` in Python. - ocrmypdf als Library, watchdog für Hotfolder, ThreadPool für Parallelität - Upload-Targets: folder, Nextcloud (WebDAV), SFTP - E-Mail-Notify, optional veraPDF - Interaktiver Installer mit Service-User-Support (lokal + AD via SSSD) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
"""Konfigurations-Loader (TOML)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import tomllib
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class Paths:
|
||||
incoming: Path
|
||||
outgoing: Path
|
||||
working: Path
|
||||
error: Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class OcrConfig:
|
||||
languages: str = "deu+eng"
|
||||
jobs: int = 4
|
||||
skip_text: bool = True
|
||||
oversample: int = 300
|
||||
pdfa_level: str = "2"
|
||||
deskew: bool = True
|
||||
clean: bool = False
|
||||
max_workers: int = 2
|
||||
timeout: int = 1800
|
||||
|
||||
|
||||
@dataclass
|
||||
class VeraPdfConfig:
|
||||
enabled: bool = False
|
||||
binary: str = "/opt/verapdf/verapdf"
|
||||
flavour: str = "1b"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FolderUpload:
|
||||
enabled: bool = True
|
||||
target: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class NextcloudUpload:
|
||||
enabled: bool = False
|
||||
url: str = ""
|
||||
username: str = ""
|
||||
password: str = ""
|
||||
remote_path: str = ""
|
||||
verify_ssl: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class SftpUpload:
|
||||
enabled: bool = False
|
||||
host: str = ""
|
||||
port: int = 22
|
||||
username: str = ""
|
||||
key_file: str = ""
|
||||
password: str = ""
|
||||
remote_path: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmailNotify:
|
||||
enabled: bool = False
|
||||
smtp_host: str = ""
|
||||
smtp_port: int = 587
|
||||
smtp_user: str = ""
|
||||
smtp_password: str = ""
|
||||
use_starttls: bool = True
|
||||
from_addr: str = ""
|
||||
to_addrs: list[str] = field(default_factory=list)
|
||||
on: str = "errors" # always | errors | never
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
paths: Paths
|
||||
ocr: OcrConfig
|
||||
verapdf: VeraPdfConfig
|
||||
folder: FolderUpload
|
||||
nextcloud: NextcloudUpload
|
||||
sftp: SftpUpload
|
||||
email: EmailNotify
|
||||
log_level: str = "INFO"
|
||||
|
||||
|
||||
def _section(data: dict[str, Any], *keys: str) -> dict[str, Any]:
|
||||
cur: Any = data
|
||||
for k in keys:
|
||||
cur = cur.get(k, {}) if isinstance(cur, dict) else {}
|
||||
return cur if isinstance(cur, dict) else {}
|
||||
|
||||
|
||||
def load_config(path: str | Path) -> Config:
|
||||
path = Path(path)
|
||||
with path.open("rb") as f:
|
||||
data = tomllib.load(f)
|
||||
|
||||
p = _section(data, "paths")
|
||||
paths = Paths(
|
||||
incoming=Path(p["incoming"]),
|
||||
outgoing=Path(p["outgoing"]),
|
||||
working=Path(p["working"]),
|
||||
error=Path(p["error"]),
|
||||
)
|
||||
|
||||
ocr = OcrConfig(**{k: v for k, v in _section(data, "ocr").items()
|
||||
if k in OcrConfig.__annotations__})
|
||||
verapdf = VeraPdfConfig(**{k: v for k, v in _section(data, "verapdf").items()
|
||||
if k in VeraPdfConfig.__annotations__})
|
||||
folder = FolderUpload(**{k: v for k, v in _section(data, "upload", "folder").items()
|
||||
if k in FolderUpload.__annotations__})
|
||||
nextcloud = NextcloudUpload(**{k: v for k, v in _section(data, "upload", "nextcloud").items()
|
||||
if k in NextcloudUpload.__annotations__})
|
||||
sftp = SftpUpload(**{k: v for k, v in _section(data, "upload", "sftp").items()
|
||||
if k in SftpUpload.__annotations__})
|
||||
email = EmailNotify(**{k: v for k, v in _section(data, "notify", "email").items()
|
||||
if k in EmailNotify.__annotations__})
|
||||
|
||||
log_level = _section(data, "logging").get("level", "INFO")
|
||||
|
||||
return Config(
|
||||
paths=paths, ocr=ocr, verapdf=verapdf,
|
||||
folder=folder, nextcloud=nextcloud, sftp=sftp, email=email,
|
||||
log_level=log_level,
|
||||
)
|
||||
Reference in New Issue
Block a user