Initial commit: PDF OCR Hotfolder v0.1.0
Komplettes Rewrite des alten Bash-Tools `pdf-tool` in Python. - ocrmypdf als Library, watchdog für Hotfolder, ThreadPool für Parallelität - Upload-Targets: folder, Nextcloud (WebDAV), SFTP - E-Mail-Notify, optional veraPDF - Interaktiver Installer mit Service-User-Support (lokal + AD via SSSD) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
# PDF OCR Hotfolder — Konfiguration
|
||||
# Speichern als /etc/pdf-ocr-hotfolder/config.toml
|
||||
|
||||
[paths]
|
||||
# Eingangsverzeichnis: hier landen gescannte PDFs
|
||||
incoming = "/var/lib/pdf-ocr-hotfolder/incoming"
|
||||
# Ausgangsverzeichnis: fertige durchsuchbare PDFs
|
||||
outgoing = "/var/lib/pdf-ocr-hotfolder/outgoing"
|
||||
# Arbeitsverzeichnis (während Verarbeitung)
|
||||
working = "/var/lib/pdf-ocr-hotfolder/working"
|
||||
# Fehlerverzeichnis: PDFs, die nicht verarbeitet werden konnten
|
||||
error = "/var/lib/pdf-ocr-hotfolder/error"
|
||||
|
||||
[ocr]
|
||||
# Tesseract-Sprachen (z.B. "deu", "deu+eng")
|
||||
languages = "deu+eng"
|
||||
# Anzahl Threads pro PDF (ocrmypdf --jobs)
|
||||
jobs = 4
|
||||
# Bereits OCR-haltige Seiten überspringen statt neu zu OCRen
|
||||
skip_text = true
|
||||
# Auflösung für gerasterte Seiten
|
||||
oversample = 300
|
||||
# PDF/A-Konformitätsstufe ("1", "2", "3" oder leer für keinen PDF/A-Output)
|
||||
pdfa_level = "2"
|
||||
# Schiefe Scans automatisch begradigen
|
||||
deskew = true
|
||||
# Hintergrund säubern
|
||||
clean = false
|
||||
# Maximale parallele PDFs (Hauptsystem hat selten mehr als 1-2 gleichzeitig)
|
||||
max_workers = 2
|
||||
# Timeout pro PDF in Sekunden
|
||||
timeout = 1800
|
||||
|
||||
[verapdf]
|
||||
# PDF/A-Validierung (optional)
|
||||
enabled = false
|
||||
binary = "/opt/verapdf/verapdf"
|
||||
flavour = "1b"
|
||||
|
||||
# Upload-Ziele — beliebig viele aktivierbar.
|
||||
# Wenn alle deaktiviert sind, bleibt das fertige PDF einfach im outgoing-Ordner.
|
||||
|
||||
[upload.folder]
|
||||
enabled = true
|
||||
# Wenn leer, wird [paths].outgoing verwendet
|
||||
target = ""
|
||||
|
||||
[upload.nextcloud]
|
||||
enabled = false
|
||||
url = "https://cloud.example.com"
|
||||
username = "scanuser"
|
||||
password = "app-password"
|
||||
# Zielpfad relativ zum User-Root, z.B. "Scans/Inbox"
|
||||
remote_path = "Scans/Inbox"
|
||||
verify_ssl = true
|
||||
|
||||
[upload.sftp]
|
||||
enabled = false
|
||||
host = "sftp.example.com"
|
||||
port = 22
|
||||
username = "scanuser"
|
||||
# Entweder Key-Datei oder Passwort
|
||||
key_file = "/etc/pdf-ocr-hotfolder/sftp_key"
|
||||
password = ""
|
||||
remote_path = "/uploads"
|
||||
|
||||
[notify.email]
|
||||
enabled = false
|
||||
smtp_host = "smtp.example.com"
|
||||
smtp_port = 587
|
||||
smtp_user = "alerts@example.com"
|
||||
smtp_password = "secret"
|
||||
use_starttls = true
|
||||
from_addr = "PDF OCR Hotfolder <alerts@example.com>"
|
||||
to_addrs = ["admin@example.com"]
|
||||
# Wann benachrichtigen: "always" | "errors" | "never"
|
||||
on = "errors"
|
||||
|
||||
[logging]
|
||||
# DEBUG | INFO | WARNING | ERROR
|
||||
level = "INFO"
|
||||
Reference in New Issue
Block a user