Files
pdf-ocr-hotfolder/pdf_ocr_hotfolder/config.py
T
techadmin a23a3968ef feat: konfigurierbarer Dateiname + Archiv-Modus für Original (v0.3.0)
Neue [output]-Section:
- name_mode: prefix | suffix | none (suffix wird vor Extension eingefügt)
- name_tag: verbatim einfügbarer String
- original_on_success: delete | archive
- archive_dir mit Kollisions-Schutz (Timestamp-Suffix)

20 neue Tests (50 insgesamt, alle grün).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 22:32:41 +02:00

145 lines
3.8 KiB
Python

"""Konfigurations-Loader (TOML)."""
from __future__ import annotations
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class Paths:
incoming: Path
outgoing: Path
working: Path
error: Path
@dataclass
class OcrConfig:
languages: str = "deu+eng"
jobs: int = 4
skip_text: bool = True
oversample: int = 300
pdfa_level: str = "2"
deskew: bool = True
clean: bool = False
max_workers: int = 2
timeout: int = 1800
@dataclass
class OutputConfig:
# "prefix" | "suffix" | "none"
name_mode: str = "prefix"
# Tag-String, verbatim eingefügt (Leerstring = kein Tag)
name_tag: str = "OCR_"
# "delete" | "archive"
original_on_success: str = "delete"
# Absoluter Pfad; Pflicht wenn original_on_success == "archive"
archive_dir: str = ""
@dataclass
class VeraPdfConfig:
enabled: bool = False
binary: str = "/opt/verapdf/verapdf"
flavour: str = "1b"
@dataclass
class FolderUpload:
enabled: bool = True
target: str = ""
@dataclass
class NextcloudUpload:
enabled: bool = False
url: str = ""
username: str = ""
password: str = ""
remote_path: str = ""
verify_ssl: bool = True
@dataclass
class SftpUpload:
enabled: bool = False
host: str = ""
port: int = 22
username: str = ""
key_file: str = ""
password: str = ""
remote_path: str = ""
@dataclass
class EmailNotify:
enabled: bool = False
smtp_host: str = ""
smtp_port: int = 587
smtp_user: str = ""
smtp_password: str = ""
use_starttls: bool = True
from_addr: str = ""
to_addrs: list[str] = field(default_factory=list)
on: str = "errors" # always | errors | never
@dataclass
class Config:
paths: Paths
ocr: OcrConfig
output: OutputConfig
verapdf: VeraPdfConfig
folder: FolderUpload
nextcloud: NextcloudUpload
sftp: SftpUpload
email: EmailNotify
log_level: str = "INFO"
def _section(data: dict[str, Any], *keys: str) -> dict[str, Any]:
cur: Any = data
for k in keys:
cur = cur.get(k, {}) if isinstance(cur, dict) else {}
return cur if isinstance(cur, dict) else {}
def load_config(path: str | Path) -> Config:
path = Path(path)
with path.open("rb") as f:
data = tomllib.load(f)
p = _section(data, "paths")
paths = Paths(
incoming=Path(p["incoming"]),
outgoing=Path(p["outgoing"]),
working=Path(p["working"]),
error=Path(p["error"]),
)
ocr = OcrConfig(**{k: v for k, v in _section(data, "ocr").items()
if k in OcrConfig.__annotations__})
output = OutputConfig(**{k: v for k, v in _section(data, "output").items()
if k in OutputConfig.__annotations__})
verapdf = VeraPdfConfig(**{k: v for k, v in _section(data, "verapdf").items()
if k in VeraPdfConfig.__annotations__})
folder = FolderUpload(**{k: v for k, v in _section(data, "upload", "folder").items()
if k in FolderUpload.__annotations__})
nextcloud = NextcloudUpload(**{k: v for k, v in _section(data, "upload", "nextcloud").items()
if k in NextcloudUpload.__annotations__})
sftp = SftpUpload(**{k: v for k, v in _section(data, "upload", "sftp").items()
if k in SftpUpload.__annotations__})
email = EmailNotify(**{k: v for k, v in _section(data, "notify", "email").items()
if k in EmailNotify.__annotations__})
log_level = _section(data, "logging").get("level", "INFO")
return Config(
paths=paths, ocr=ocr, output=output, verapdf=verapdf,
folder=folder, nextcloud=nextcloud, sftp=sftp, email=email,
log_level=log_level,
)