a23a3968ef
Neue [output]-Section: - name_mode: prefix | suffix | none (suffix wird vor Extension eingefügt) - name_tag: verbatim einfügbarer String - original_on_success: delete | archive - archive_dir mit Kollisions-Schutz (Timestamp-Suffix) 20 neue Tests (50 insgesamt, alle grün). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
145 lines
3.8 KiB
Python
145 lines
3.8 KiB
Python
"""Konfigurations-Loader (TOML)."""
|
|
from __future__ import annotations
|
|
|
|
import tomllib
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class Paths:
|
|
incoming: Path
|
|
outgoing: Path
|
|
working: Path
|
|
error: Path
|
|
|
|
|
|
@dataclass
|
|
class OcrConfig:
|
|
languages: str = "deu+eng"
|
|
jobs: int = 4
|
|
skip_text: bool = True
|
|
oversample: int = 300
|
|
pdfa_level: str = "2"
|
|
deskew: bool = True
|
|
clean: bool = False
|
|
max_workers: int = 2
|
|
timeout: int = 1800
|
|
|
|
|
|
@dataclass
|
|
class OutputConfig:
|
|
# "prefix" | "suffix" | "none"
|
|
name_mode: str = "prefix"
|
|
# Tag-String, verbatim eingefügt (Leerstring = kein Tag)
|
|
name_tag: str = "OCR_"
|
|
# "delete" | "archive"
|
|
original_on_success: str = "delete"
|
|
# Absoluter Pfad; Pflicht wenn original_on_success == "archive"
|
|
archive_dir: str = ""
|
|
|
|
|
|
@dataclass
|
|
class VeraPdfConfig:
|
|
enabled: bool = False
|
|
binary: str = "/opt/verapdf/verapdf"
|
|
flavour: str = "1b"
|
|
|
|
|
|
@dataclass
|
|
class FolderUpload:
|
|
enabled: bool = True
|
|
target: str = ""
|
|
|
|
|
|
@dataclass
|
|
class NextcloudUpload:
|
|
enabled: bool = False
|
|
url: str = ""
|
|
username: str = ""
|
|
password: str = ""
|
|
remote_path: str = ""
|
|
verify_ssl: bool = True
|
|
|
|
|
|
@dataclass
|
|
class SftpUpload:
|
|
enabled: bool = False
|
|
host: str = ""
|
|
port: int = 22
|
|
username: str = ""
|
|
key_file: str = ""
|
|
password: str = ""
|
|
remote_path: str = ""
|
|
|
|
|
|
@dataclass
|
|
class EmailNotify:
|
|
enabled: bool = False
|
|
smtp_host: str = ""
|
|
smtp_port: int = 587
|
|
smtp_user: str = ""
|
|
smtp_password: str = ""
|
|
use_starttls: bool = True
|
|
from_addr: str = ""
|
|
to_addrs: list[str] = field(default_factory=list)
|
|
on: str = "errors" # always | errors | never
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
paths: Paths
|
|
ocr: OcrConfig
|
|
output: OutputConfig
|
|
verapdf: VeraPdfConfig
|
|
folder: FolderUpload
|
|
nextcloud: NextcloudUpload
|
|
sftp: SftpUpload
|
|
email: EmailNotify
|
|
log_level: str = "INFO"
|
|
|
|
|
|
def _section(data: dict[str, Any], *keys: str) -> dict[str, Any]:
|
|
cur: Any = data
|
|
for k in keys:
|
|
cur = cur.get(k, {}) if isinstance(cur, dict) else {}
|
|
return cur if isinstance(cur, dict) else {}
|
|
|
|
|
|
def load_config(path: str | Path) -> Config:
|
|
path = Path(path)
|
|
with path.open("rb") as f:
|
|
data = tomllib.load(f)
|
|
|
|
p = _section(data, "paths")
|
|
paths = Paths(
|
|
incoming=Path(p["incoming"]),
|
|
outgoing=Path(p["outgoing"]),
|
|
working=Path(p["working"]),
|
|
error=Path(p["error"]),
|
|
)
|
|
|
|
ocr = OcrConfig(**{k: v for k, v in _section(data, "ocr").items()
|
|
if k in OcrConfig.__annotations__})
|
|
output = OutputConfig(**{k: v for k, v in _section(data, "output").items()
|
|
if k in OutputConfig.__annotations__})
|
|
verapdf = VeraPdfConfig(**{k: v for k, v in _section(data, "verapdf").items()
|
|
if k in VeraPdfConfig.__annotations__})
|
|
folder = FolderUpload(**{k: v for k, v in _section(data, "upload", "folder").items()
|
|
if k in FolderUpload.__annotations__})
|
|
nextcloud = NextcloudUpload(**{k: v for k, v in _section(data, "upload", "nextcloud").items()
|
|
if k in NextcloudUpload.__annotations__})
|
|
sftp = SftpUpload(**{k: v for k, v in _section(data, "upload", "sftp").items()
|
|
if k in SftpUpload.__annotations__})
|
|
email = EmailNotify(**{k: v for k, v in _section(data, "notify", "email").items()
|
|
if k in EmailNotify.__annotations__})
|
|
|
|
log_level = _section(data, "logging").get("level", "INFO")
|
|
|
|
return Config(
|
|
paths=paths, ocr=ocr, output=output, verapdf=verapdf,
|
|
folder=folder, nextcloud=nextcloud, sftp=sftp, email=email,
|
|
log_level=log_level,
|
|
)
|