commit 76c3a991df0fecda492c0d1501ae6deed7f119f7 Author: Dominik Höfling Date: Wed Apr 8 00:22:31 2026 +0200 Initial commit: PDF OCR Hotfolder v0.1.0 Komplettes Rewrite des alten Bash-Tools `pdf-tool` in Python. - ocrmypdf als Library, watchdog für Hotfolder, ThreadPool für Parallelität - Upload-Targets: folder, Nextcloud (WebDAV), SFTP - E-Mail-Notify, optional veraPDF - Interaktiver Installer mit Service-User-Support (lokal + AD via SSSD) Co-Authored-By: Claude Opus 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c4f3e69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +__pycache__/ +*.pyc +*.pyo +venv/ +env/ +.venv/ +*.egg-info/ +build/ +dist/ +config.toml +.repo_path +*.log +.DS_Store +.idea/ +.vscode/ diff --git a/AI_AGENT_BRIEFING.md b/AI_AGENT_BRIEFING.md new file mode 100644 index 0000000..d4fce93 --- /dev/null +++ b/AI_AGENT_BRIEFING.md @@ -0,0 +1,117 @@ +# AI Agent Briefing — PDF OCR Hotfolder + +**Zuletzt aktualisiert:** 2026-04-08 +**Version:** 0.1.0 +**Status:** Initiale Implementation, nicht produktiv getestet + +## 🎯 Projektziel + +Eingehende gescannte PDFs werden automatisch durch OCR (ocrmypdf + Tesseract) in durchsuchbare PDFs (optional PDF/A) umgewandelt und nach Wahl in einen Ordner / Nextcloud / per SFTP weitergegeben. Ersetzt das alte Bash-Tool `pdf-tool` (im Workspace). + +## 📁 Projekt-Struktur + +``` +pdf-ocr-hotfolder/ +├── pdf_ocr_hotfolder/ +│ ├── __init__.py # Versionsstring +│ ├── __main__.py # CLI-Entrypoint (argparse, --once, --config) +│ ├── config.py # TOML-Loader, Dataclasses +│ ├── service.py # Hauptservice (watchdog + ThreadPool) +│ ├── processor.py # ocrmypdf + veraPDF +│ └── uploaders.py # folder, nextcloud (WebDAV), sftp, email +├── systemd/ +│ └── pdf-ocr-hotfolder.service # Template (Platzhalter __SERVICE_USER__/__SERVICE_GROUP__) +├── config.example.toml +├── install.sh # Interaktiver Installer +├── update.sh # Update aus Repo +├── requirements.txt +├── VERSION +├── CHANGELOG.md +└── README.md +``` + +## 🔧 Stack + +| Komponente | Technologie | +|------------|-------------| +| Sprache | Python 3.11+ (für `tomllib` aus stdlib) | +| OCR | `ocrmypdf` (als Library, nicht via Subprozess) | +| Engine | Tesseract | +| Watcher | `watchdog` | +| HTTP | `requests` (Nextcloud WebDAV) | +| SFTP | `paramiko` | +| Email | `smtplib` (stdlib) | +| Service | systemd | + +## 🖥️ Installations-Layout + +| Pfad | Inhalt | +|------|--------| +| `/opt/pdf-ocr-hotfolder/` | Code + venv (`venv/bin/python`) | +| `/etc/pdf-ocr-hotfolder/config.toml` | Konfiguration (mode 640, root:) | +| `/var/lib/pdf-ocr-hotfolder/{incoming,working,outgoing,error}/` | Datenverzeichnisse | +| `/var/log/pdf-ocr-hotfolder/` | Logs (zusätzlich zu journald) | +| `/etc/systemd/system/pdf-ocr-hotfolder.service` | systemd-Unit | +| `/var/backups/pdf-ocr-hotfolder/` | Update-Backups | + +## 👤 Service-User + +Der Installer fragt interaktiv: +1. Username (default `pdfocr`) +2. Falls User existiert (lokal oder AD via SSSD/Winbind): wird übernommen, primäre Gruppe automatisch erkannt +3. Falls nicht: Frage nach lokaler Anlage als System-User + +**Wichtig:** Bei AD-Usern mit lokaler UID werden Datei-Berechtigungen über die UID gesetzt — funktioniert transparent. + +## 🔄 Verarbeitungs-Flow + +1. `watchdog` triggert auf Datei-Event in `incoming/` +2. `_wait_until_stable()` wartet, bis Datei nicht mehr wächst (Scanner schreibt mehrmals) +3. Move nach `working/` +4. `ocrmypdf.ocr()` als **Library-Call** (kein Subprozess-Start pro PDF — schneller) +5. Optional: veraPDF-Validierung (CLI-Subprozess) +6. Move nach `outgoing/` als `OCR_.pdf` +7. Aktive Upload-Targets ausführen (folder/nextcloud/sftp) +8. Optional E-Mail-Notify + +Fehler → Move nach `error/`, Service läuft weiter (kein `exit 1` wie im alten Bash-Tool). + +## 🧠 Performance-Entscheidungen + +- **ocrmypdf als Library** statt `subprocess`: spart Python-Interpreter-Start pro PDF +- **ThreadPool** mit `max_workers` (default 2) — selbst wenn selten >1 PDF gleichzeitig kommt, blockiert ein langsamer Scan keinen schnellen +- **`--jobs` an ocrmypdf**: Tesseract parallelisiert Seiten innerhalb eines PDFs +- **`skip_text=True`**: bereits OCR-haltige Seiten werden nicht neu verarbeitet +- **Stabilitäts-Check** statt magic-file `new` (alte Bash-Krücke) +- veraPDF nur wenn `enabled=true` (JVM-Start ist teuer) + +## 🛠️ Entwicklung + +Lokaler Test ohne Installation: +```bash +cd ~/dev/gitea.sonith.de/pdf-ocr-hotfolder +python3 -m venv venv && source venv/bin/activate +pip install -r requirements.txt +cp config.example.toml /tmp/config.toml +# Pfade in /tmp/config.toml auf Test-Verzeichnisse anpassen +python -m pdf_ocr_hotfolder --config /tmp/config.toml +``` + +## 📋 Roadmap / TODO + +- [ ] Tests (`pytest`) für `processor` und `uploaders` +- [ ] Prometheus-Metriken (verarbeitete PDFs, Fehlerquote, Laufzeit) +- [ ] CLI-Subkommandos: `pdf-ocr-hotfolder reprocess ` +- [ ] Optional: S3/MinIO Upload-Target +- [ ] Docker-Image für Setups ohne systemd + +## 🔑 Repo + +- **Repo:** https://gitea.sonith.de/sonith_ug/pdf-ocr-hotfolder +- **Owner:** sonith_ug +- **Versionierung:** Semver (PATCH bei jedem Build, MINOR bei Features, MAJOR manuell) +- **Tags:** `v{VERSION}`, automatischer Push nach Commit + +## 📞 Kontakt + +**Maintainer:** Dominik Höfling (Sonith GmbH) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..20f895e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +## [0.1.0] - 2026-04-08 + +### Added +- Initiale Version (Komplettes Rewrite des alten Bash-Tools `pdf-tool`) +- Python-Implementation auf Basis von `ocrmypdf` (Library, kein Subprozess) +- Hotfolder-Watcher mit `watchdog` (created/moved/closed Events) +- File-Stability-Check (wartet bis Scanner fertig geschrieben hat) +- ThreadPool für parallele PDF-Verarbeitung (`max_workers`) +- Upload-Targets: lokaler Ordner, Nextcloud (WebDAV via `requests`), SFTP (`paramiko`) +- E-Mail-Notify (`smtplib`, immer / nur Fehler / nie) +- Optional veraPDF-Validierung +- TOML-Konfiguration (`tomllib` aus stdlib, Python ≥3.11) +- systemd-Unit mit Hardening-Optionen +- `install.sh` mit interaktivem Service-User-Prompt + (lokal anlegen oder bestehenden lokalen/AD-User übernehmen) +- `update.sh` mit Backup, Code-Sync und Service-Reload +- README.md, AI_AGENT_BRIEFING.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..a8c2659 --- /dev/null +++ b/README.md @@ -0,0 +1,176 @@ +# PDF OCR Hotfolder + +Verwandelt eingehende gescannte PDFs automatisch in **durchsuchbare PDFs** (PDF/A optional) per OCR. Hauptanwendung: Kunden-Scanner schiebt PDF in einen Ordner — Sekunden später liegt die OCR-Version im Ausgang oder wird in Nextcloud / per SFTP weitergeleitet. + +## Features + +- 🔍 **OCR via ocrmypdf + Tesseract** (Library-Call, kein Subprozess-Overhead) +- 📂 **Hotfolder via watchdog** — reagiert auf `created`, `moved`, `closed` Events +- 🧠 **Stabilitäts-Erkennung**: wartet bis Scanner fertig geschrieben hat +- 🔁 **Parallelverarbeitung** mehrerer PDFs (ThreadPool, konfigurierbar) +- ✅ **PDF/A-Output** (1, 2 oder 3) optional +- 🛡️ **veraPDF-Validierung** optional +- ☁️ **Upload-Ziele**: lokaler Ordner, Nextcloud (WebDAV via Python), SFTP +- 📧 **E-Mail-Notify** (immer / nur Fehler / nie) +- 🔐 **Service-User-Support** für lokale **und AD-User mit lokaler UID** (SSSD/Winbind) +- ⚙️ Saubere systemd-Integration mit auto-Restart + +## Schnellstart + +```bash +git clone https://gitea.sonith.de/sonith_ug/pdf-ocr-hotfolder.git +cd pdf-ocr-hotfolder +sudo ./install.sh +``` + +Der Installer fragt nach dem Service-User. Standardmäßig wird ein lokaler System-User `pdfocr` angelegt. Wenn der User bereits existiert (z.B. AD via SSSD), wird er einfach übernommen. + +Danach Konfiguration anpassen: + +```bash +sudo nano /etc/pdf-ocr-hotfolder/config.toml +sudo systemctl restart pdf-ocr-hotfolder +``` + +Test: + +```bash +cp irgendein-scan.pdf /var/lib/pdf-ocr-hotfolder/incoming/ +journalctl -u pdf-ocr-hotfolder -f +``` + +Nach wenigen Sekunden liegt das OCR-PDF unter `/var/lib/pdf-ocr-hotfolder/outgoing/OCR_irgendein-scan.pdf`. + +## Verzeichnisse + +| Pfad | Zweck | +|------|-------| +| `/etc/pdf-ocr-hotfolder/config.toml` | Konfiguration | +| `/var/lib/pdf-ocr-hotfolder/incoming` | Eingang (Scanner schreibt hier rein) | +| `/var/lib/pdf-ocr-hotfolder/working` | Arbeitsverzeichnis während OCR | +| `/var/lib/pdf-ocr-hotfolder/outgoing` | Ausgang (fertige PDFs) | +| `/var/lib/pdf-ocr-hotfolder/error` | PDFs, die nicht verarbeitet werden konnten | +| `/opt/pdf-ocr-hotfolder/` | Code + venv | +| `/var/log/pdf-ocr-hotfolder/` | Logs (zusätzlich zu journald) | + +## Konfiguration + +Vollständiges Beispiel: [`config.example.toml`](config.example.toml). Wichtigste Sektionen: + +### `[ocr]` +```toml +languages = "deu+eng" # Tesseract-Sprachen +jobs = 4 # Threads pro PDF +skip_text = true # bereits OCR-haltige Seiten überspringen +pdfa_level = "2" # "1", "2", "3" oder "" für reines PDF +deskew = true +max_workers = 2 # parallele PDFs +timeout = 1800 +``` + +### `[upload.nextcloud]` +```toml +enabled = true +url = "https://cloud.example.com" +username = "scanuser" +password = "app-password" +remote_path = "Scans/Inbox" +``` + +### `[upload.sftp]` +```toml +enabled = true +host = "sftp.example.com" +username = "scanuser" +key_file = "/etc/pdf-ocr-hotfolder/sftp_key" +remote_path = "/uploads" +``` + +### `[notify.email]` +```toml +enabled = true +smtp_host = "smtp.example.com" +smtp_port = 587 +smtp_user = "alerts@example.com" +smtp_password = "secret" +from_addr = "PDF OCR " +to_addrs = ["admin@example.com"] +on = "errors" # always | errors | never +``` + +## Service-Verwaltung + +```bash +sudo systemctl status pdf-ocr-hotfolder +sudo systemctl restart pdf-ocr-hotfolder +journalctl -u pdf-ocr-hotfolder -f +``` + +## Update + +```bash +cd /pfad/zum/repo +git pull +sudo ./update.sh +``` + +Das Repo muss bestehen bleiben — `update.sh` kopiert daraus. + +## Manueller Lauf (One-Shot) + +Bestehende PDFs im Eingang einmalig verarbeiten und beenden: + +```bash +sudo -u pdfocr /opt/pdf-ocr-hotfolder/venv/bin/python -m pdf_ocr_hotfolder \ + --config /etc/pdf-ocr-hotfolder/config.toml --once +``` + +## Troubleshooting + +### Tesseract findet die Sprache nicht +```bash +sudo apt install tesseract-ocr-deu tesseract-ocr-eng +``` + +### "PriorOcrFoundError" +ocrmypdf erkennt bereits vorhandenen OCR-Text. `skip_text = true` in der Config setzen. + +### Berechtigungsprobleme bei AD-User +Service-User braucht **rw** auf alle vier Verzeichnisse unter `/var/lib/pdf-ocr-hotfolder/`. Bei AD-User mit lokaler UID: +```bash +sudo chown -R DOMAIN\\scanuser:DOMAIN\\scangroup /var/lib/pdf-ocr-hotfolder +``` + +### veraPDF-Validierung schlägt immer fehl +veraPDF binary prüfen (`[verapdf].binary`). Wenn nicht zwingend gebraucht: `enabled = false`. + +## Architektur + +``` + ┌──────────┐ watchdog ┌──────────────┐ ocrmypdf ┌──────────┐ + │ Scanner │ ──────────────▶ │ incoming/ │ ─────────────▶ │ working/ │ + └──────────┘ PDF-Datei └──────────────┘ (Library) └────┬─────┘ + │ + optional veraPDF + │ + ▼ + ┌──────────────┐ + │ outgoing/ │ + └──────┬───────┘ + │ + ┌──────────────────────┼──────────────────────┐ + ▼ ▼ ▼ + ┌────────────┐ ┌────────────┐ ┌────────────┐ + │ Nextcloud │ │ SFTP │ │ E-Mail │ + │ (WebDAV) │ │ (paramiko) │ │ Notify │ + └────────────┘ └────────────┘ └────────────┘ +``` + +## Lizenz + +MIT — © Sonith UG + +--- + +**Version:** 0.1.0 +**Repo:** https://gitea.sonith.de/sonith_ug/pdf-ocr-hotfolder diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..6e8bf73 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/config.example.toml b/config.example.toml new file mode 100644 index 0000000..1b66943 --- /dev/null +++ b/config.example.toml @@ -0,0 +1,81 @@ +# PDF OCR Hotfolder — Konfiguration +# Speichern als /etc/pdf-ocr-hotfolder/config.toml + +[paths] +# Eingangsverzeichnis: hier landen gescannte PDFs +incoming = "/var/lib/pdf-ocr-hotfolder/incoming" +# Ausgangsverzeichnis: fertige durchsuchbare PDFs +outgoing = "/var/lib/pdf-ocr-hotfolder/outgoing" +# Arbeitsverzeichnis (während Verarbeitung) +working = "/var/lib/pdf-ocr-hotfolder/working" +# Fehlerverzeichnis: PDFs, die nicht verarbeitet werden konnten +error = "/var/lib/pdf-ocr-hotfolder/error" + +[ocr] +# Tesseract-Sprachen (z.B. "deu", "deu+eng") +languages = "deu+eng" +# Anzahl Threads pro PDF (ocrmypdf --jobs) +jobs = 4 +# Bereits OCR-haltige Seiten überspringen statt neu zu OCRen +skip_text = true +# Auflösung für gerasterte Seiten +oversample = 300 +# PDF/A-Konformitätsstufe ("1", "2", "3" oder leer für keinen PDF/A-Output) +pdfa_level = "2" +# Schiefe Scans automatisch begradigen +deskew = true +# Hintergrund säubern +clean = false +# Maximale parallele PDFs (Hauptsystem hat selten mehr als 1-2 gleichzeitig) +max_workers = 2 +# Timeout pro PDF in Sekunden +timeout = 1800 + +[verapdf] +# PDF/A-Validierung (optional) +enabled = false +binary = "/opt/verapdf/verapdf" +flavour = "1b" + +# Upload-Ziele — beliebig viele aktivierbar. +# Wenn alle deaktiviert sind, bleibt das fertige PDF einfach im outgoing-Ordner. + +[upload.folder] +enabled = true +# Wenn leer, wird [paths].outgoing verwendet +target = "" + +[upload.nextcloud] +enabled = false +url = "https://cloud.example.com" +username = "scanuser" +password = "app-password" +# Zielpfad relativ zum User-Root, z.B. "Scans/Inbox" +remote_path = "Scans/Inbox" +verify_ssl = true + +[upload.sftp] +enabled = false +host = "sftp.example.com" +port = 22 +username = "scanuser" +# Entweder Key-Datei oder Passwort +key_file = "/etc/pdf-ocr-hotfolder/sftp_key" +password = "" +remote_path = "/uploads" + +[notify.email] +enabled = false +smtp_host = "smtp.example.com" +smtp_port = 587 +smtp_user = "alerts@example.com" +smtp_password = "secret" +use_starttls = true +from_addr = "PDF OCR Hotfolder " +to_addrs = ["admin@example.com"] +# Wann benachrichtigen: "always" | "errors" | "never" +on = "errors" + +[logging] +# DEBUG | INFO | WARNING | ERROR +level = "INFO" diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..e4dd871 --- /dev/null +++ b/install.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# +# PDF OCR Hotfolder — Installer für Debian 12/13 +# +# Fragt interaktiv nach dem Service-User. Unterstützt: +# - Lokal anlegen (neuer System-User) +# - Bereits existierender lokaler User +# - AD-User mit lokaler UID (z.B. via SSSD/Winbind) +# + +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m' +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; } +log_step() { echo -e "${BLUE}==>${NC} $*"; } + +if [ "${EUID}" -ne 0 ]; then + log_error "Bitte als root ausführen: sudo ./install.sh" + exit 1 +fi + +INSTALL_DIR="/opt/pdf-ocr-hotfolder" +CONFIG_DIR="/etc/pdf-ocr-hotfolder" +DATA_DIR="/var/lib/pdf-ocr-hotfolder" +LOG_DIR="/var/log/pdf-ocr-hotfolder" +SERVICE_NAME="pdf-ocr-hotfolder" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$SCRIPT_DIR" + +if [ ! -f "$REPO_DIR/pdf_ocr_hotfolder/__init__.py" ]; then + log_error "Repo-Layout nicht erkannt. install.sh aus dem Repo ausführen." + exit 1 +fi + +echo +echo "==========================================" +echo " PDF OCR Hotfolder — Installation" +echo "==========================================" +echo + +# ============ 1. System-Dependencies ============ +log_step "Installiere System-Pakete" + +apt-get update -qq +apt-get install -y --no-install-recommends \ + python3 python3-venv python3-pip \ + tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng \ + ghostscript qpdf unpaper pngquant \ + icc-profiles-free \ + ca-certificates curl + +log_info "System-Pakete installiert ✓" + +# ============ 2. Service-User ============ +log_step "Service-User konfigurieren" + +read -r -p "Service-User-Name [pdfocr]: " SERVICE_USER +SERVICE_USER="${SERVICE_USER:-pdfocr}" + +if id "$SERVICE_USER" &>/dev/null; then + log_info "User '$SERVICE_USER' existiert bereits (lokal oder via AD)." + SERVICE_GROUP="$(id -gn "$SERVICE_USER")" + log_info "Verwende bestehende primäre Gruppe: $SERVICE_GROUP" +else + log_warn "User '$SERVICE_USER' existiert nicht." + read -r -p "Lokal als System-User anlegen? [J/n]: " CREATE_USER + CREATE_USER="${CREATE_USER:-J}" + if [[ "$CREATE_USER" =~ ^[JjYy]$ ]]; then + adduser --system --group --home "$DATA_DIR" --shell /usr/sbin/nologin "$SERVICE_USER" + SERVICE_GROUP="$SERVICE_USER" + log_info "Lokaler System-User '$SERVICE_USER' angelegt ✓" + else + log_error "User '$SERVICE_USER' muss vor der Installation existieren (z.B. via AD/SSSD)." + log_error "Lege ihn an oder wähle einen existierenden Namen." + exit 1 + fi +fi + +# ============ 3. Verzeichnisse ============ +log_step "Verzeichnisse erstellen" + +mkdir -p "$INSTALL_DIR" "$CONFIG_DIR" "$LOG_DIR" +mkdir -p "$DATA_DIR"/{incoming,outgoing,working,error} + +cp -r "$REPO_DIR/pdf_ocr_hotfolder" "$INSTALL_DIR/" +cp "$REPO_DIR/requirements.txt" "$INSTALL_DIR/" +cp "$REPO_DIR/VERSION" "$INSTALL_DIR/" +echo "$REPO_DIR" > "$INSTALL_DIR/.repo_path" + +if [ ! -f "$CONFIG_DIR/config.toml" ]; then + cp "$REPO_DIR/config.example.toml" "$CONFIG_DIR/config.toml" + log_info "Beispiel-Konfig nach $CONFIG_DIR/config.toml kopiert" +else + log_info "Bestehende Konfig $CONFIG_DIR/config.toml bleibt unverändert" +fi + +log_info "Verzeichnisse erstellt ✓" + +# ============ 4. Python venv ============ +log_step "Python venv anlegen" + +if [ ! -d "$INSTALL_DIR/venv" ]; then + python3 -m venv "$INSTALL_DIR/venv" +fi +"$INSTALL_DIR/venv/bin/pip" install --upgrade pip -q +"$INSTALL_DIR/venv/bin/pip" install -r "$INSTALL_DIR/requirements.txt" -q + +log_info "venv bereit ✓" + +# ============ 5. Berechtigungen ============ +log_step "Berechtigungen setzen" + +chown -R "$SERVICE_USER:$SERVICE_GROUP" "$INSTALL_DIR" "$DATA_DIR" "$LOG_DIR" +chown root:"$SERVICE_GROUP" "$CONFIG_DIR" +chmod 750 "$CONFIG_DIR" +if [ -f "$CONFIG_DIR/config.toml" ]; then + chown root:"$SERVICE_GROUP" "$CONFIG_DIR/config.toml" + chmod 640 "$CONFIG_DIR/config.toml" +fi + +log_info "Berechtigungen gesetzt ✓" + +# ============ 6. systemd-Unit ============ +log_step "systemd-Unit installieren" + +sed -e "s|__SERVICE_USER__|$SERVICE_USER|g" \ + -e "s|__SERVICE_GROUP__|$SERVICE_GROUP|g" \ + "$REPO_DIR/systemd/pdf-ocr-hotfolder.service" \ + > "/etc/systemd/system/${SERVICE_NAME}.service" + +systemctl daemon-reload +systemctl enable "${SERVICE_NAME}.service" + +log_info "systemd-Unit installiert & enabled ✓" + +# ============ 7. Start ============ +log_step "Service starten" +systemctl restart "${SERVICE_NAME}.service" +sleep 2 +systemctl --no-pager --lines=10 status "${SERVICE_NAME}.service" || true + +echo +echo "==========================================" +echo " Installation abgeschlossen" +echo "==========================================" +echo +echo " Konfiguration: $CONFIG_DIR/config.toml" +echo " Eingang: $DATA_DIR/incoming" +echo " Ausgang: $DATA_DIR/outgoing" +echo " Service-User: $SERVICE_USER ($SERVICE_GROUP)" +echo +echo " Logs: journalctl -u $SERVICE_NAME -f" +echo " Update: sudo ./update.sh" +echo diff --git a/pdf_ocr_hotfolder/__init__.py b/pdf_ocr_hotfolder/__init__.py new file mode 100644 index 0000000..2bf9449 --- /dev/null +++ b/pdf_ocr_hotfolder/__init__.py @@ -0,0 +1,3 @@ +"""PDF OCR Hotfolder — Scanner-PDFs automatisch durchsuchbar machen.""" + +__version__ = "0.1.0" diff --git a/pdf_ocr_hotfolder/__main__.py b/pdf_ocr_hotfolder/__main__.py new file mode 100644 index 0000000..a4fc3d6 --- /dev/null +++ b/pdf_ocr_hotfolder/__main__.py @@ -0,0 +1,57 @@ +"""CLI-Entrypoint.""" +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +from . import __version__ +from .config import load_config +from .service import HotfolderService + + +def _setup_logging(level: str) -> None: + logging.basicConfig( + level=getattr(logging, level.upper(), logging.INFO), + format="%(asctime)s %(levelname)-7s %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + prog="pdf-ocr-hotfolder", + description="Wandelt eingehende PDFs per OCR in durchsuchbare PDFs um.", + ) + parser.add_argument("--config", "-c", default="/etc/pdf-ocr-hotfolder/config.toml", + help="Pfad zur Konfigurationsdatei (TOML)") + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") + parser.add_argument("--once", action="store_true", + help="Nur bestehende Dateien verarbeiten und beenden") + args = parser.parse_args() + + cfg_path = Path(args.config) + if not cfg_path.exists(): + print(f"Config nicht gefunden: {cfg_path}", file=sys.stderr) + return 2 + + cfg = load_config(cfg_path) + _setup_logging(cfg.log_level) + + service = HotfolderService(cfg) + if args.once: + service._ensure_dirs() # noqa: SLF001 + service._scan_existing() # noqa: SLF001 + service._executor.shutdown(wait=True) # noqa: SLF001 + return 0 + + try: + service.run() + except KeyboardInterrupt: + pass + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pdf_ocr_hotfolder/config.py b/pdf_ocr_hotfolder/config.py new file mode 100644 index 0000000..6b67337 --- /dev/null +++ b/pdf_ocr_hotfolder/config.py @@ -0,0 +1,129 @@ +"""Konfigurations-Loader (TOML).""" +from __future__ import annotations + +import tomllib +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +@dataclass +class Paths: + incoming: Path + outgoing: Path + working: Path + error: Path + + +@dataclass +class OcrConfig: + languages: str = "deu+eng" + jobs: int = 4 + skip_text: bool = True + oversample: int = 300 + pdfa_level: str = "2" + deskew: bool = True + clean: bool = False + max_workers: int = 2 + timeout: int = 1800 + + +@dataclass +class VeraPdfConfig: + enabled: bool = False + binary: str = "/opt/verapdf/verapdf" + flavour: str = "1b" + + +@dataclass +class FolderUpload: + enabled: bool = True + target: str = "" + + +@dataclass +class NextcloudUpload: + enabled: bool = False + url: str = "" + username: str = "" + password: str = "" + remote_path: str = "" + verify_ssl: bool = True + + +@dataclass +class SftpUpload: + enabled: bool = False + host: str = "" + port: int = 22 + username: str = "" + key_file: str = "" + password: str = "" + remote_path: str = "" + + +@dataclass +class EmailNotify: + enabled: bool = False + smtp_host: str = "" + smtp_port: int = 587 + smtp_user: str = "" + smtp_password: str = "" + use_starttls: bool = True + from_addr: str = "" + to_addrs: list[str] = field(default_factory=list) + on: str = "errors" # always | errors | never + + +@dataclass +class Config: + paths: Paths + ocr: OcrConfig + verapdf: VeraPdfConfig + folder: FolderUpload + nextcloud: NextcloudUpload + sftp: SftpUpload + email: EmailNotify + log_level: str = "INFO" + + +def _section(data: dict[str, Any], *keys: str) -> dict[str, Any]: + cur: Any = data + for k in keys: + cur = cur.get(k, {}) if isinstance(cur, dict) else {} + return cur if isinstance(cur, dict) else {} + + +def load_config(path: str | Path) -> Config: + path = Path(path) + with path.open("rb") as f: + data = tomllib.load(f) + + p = _section(data, "paths") + paths = Paths( + incoming=Path(p["incoming"]), + outgoing=Path(p["outgoing"]), + working=Path(p["working"]), + error=Path(p["error"]), + ) + + ocr = OcrConfig(**{k: v for k, v in _section(data, "ocr").items() + if k in OcrConfig.__annotations__}) + verapdf = VeraPdfConfig(**{k: v for k, v in _section(data, "verapdf").items() + if k in VeraPdfConfig.__annotations__}) + folder = FolderUpload(**{k: v for k, v in _section(data, "upload", "folder").items() + if k in FolderUpload.__annotations__}) + nextcloud = NextcloudUpload(**{k: v for k, v in _section(data, "upload", "nextcloud").items() + if k in NextcloudUpload.__annotations__}) + sftp = SftpUpload(**{k: v for k, v in _section(data, "upload", "sftp").items() + if k in SftpUpload.__annotations__}) + email = EmailNotify(**{k: v for k, v in _section(data, "notify", "email").items() + if k in EmailNotify.__annotations__}) + + log_level = _section(data, "logging").get("level", "INFO") + + return Config( + paths=paths, ocr=ocr, verapdf=verapdf, + folder=folder, nextcloud=nextcloud, sftp=sftp, email=email, + log_level=log_level, + ) diff --git a/pdf_ocr_hotfolder/processor.py b/pdf_ocr_hotfolder/processor.py new file mode 100644 index 0000000..b138720 --- /dev/null +++ b/pdf_ocr_hotfolder/processor.py @@ -0,0 +1,112 @@ +"""OCR-Verarbeitung einer einzelnen PDF mit ocrmypdf + optional veraPDF.""" +from __future__ import annotations + +import logging +import shutil +import subprocess +from dataclasses import dataclass +from pathlib import Path + +import ocrmypdf + +from .config import OcrConfig, VeraPdfConfig + +log = logging.getLogger(__name__) + + +@dataclass +class ProcessResult: + source: Path + output: Path + success: bool + error: str = "" + verapdf_passed: bool | None = None + + +def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None: + """Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead).""" + kwargs: dict = { + "language": cfg.languages, + "jobs": cfg.jobs, + "deskew": cfg.deskew, + "clean": cfg.clean, + "oversample": cfg.oversample, + "progress_bar": False, + "skip_text": cfg.skip_text, + } + if cfg.pdfa_level: + kwargs["output_type"] = f"pdfa-{cfg.pdfa_level}" + else: + kwargs["output_type"] = "pdf" + + log.info("OCR start: %s", src.name) + ocrmypdf.ocr(str(src), str(dst), **kwargs) + log.info("OCR done: %s", dst.name) + + +def run_verapdf(pdf: Path, cfg: VeraPdfConfig) -> bool: + """Validiert PDF/A mit veraPDF (CLI). Gibt True zurück, wenn konform.""" + if not cfg.enabled: + return True + if not Path(cfg.binary).exists(): + log.warning("veraPDF binary nicht gefunden: %s", cfg.binary) + return False + try: + result = subprocess.run( + [cfg.binary, "--flavour", cfg.flavour, "--format", "text", str(pdf)], + capture_output=True, text=True, timeout=300, + ) + ok = result.returncode == 0 and "PASS" in result.stdout + log.info("veraPDF %s: %s", "PASS" if ok else "FAIL", pdf.name) + return ok + except subprocess.TimeoutExpired: + log.error("veraPDF Timeout: %s", pdf.name) + return False + + +def process_pdf( + src: Path, + working_dir: Path, + outgoing_dir: Path, + error_dir: Path, + ocr_cfg: OcrConfig, + vera_cfg: VeraPdfConfig, +) -> ProcessResult: + """Verarbeitet eine einzelne PDF: move→OCR→validate→outgoing/error.""" + work_src = working_dir / src.name + work_out = working_dir / f"OCR_{src.name}" + final_out = outgoing_dir / f"OCR_{src.name}" + + try: + shutil.move(str(src), str(work_src)) + except OSError as e: + return ProcessResult(src, final_out, False, f"move to working failed: {e}") + + try: + run_ocr(work_src, work_out, ocr_cfg) + except Exception as e: # noqa: BLE001 - ocrmypdf wirft viele Typen + log.exception("OCR fehlgeschlagen für %s", src.name) + _move_to_error(work_src, error_dir) + return ProcessResult(src, final_out, False, f"ocr failed: {e}") + + vera_ok: bool | None = None + if vera_cfg.enabled: + vera_ok = run_verapdf(work_out, vera_cfg) + if not vera_ok: + _move_to_error(work_out, error_dir) + work_src.unlink(missing_ok=True) + return ProcessResult(src, final_out, False, + "verapdf validation failed", verapdf_passed=False) + + outgoing_dir.mkdir(parents=True, exist_ok=True) + shutil.move(str(work_out), str(final_out)) + work_src.unlink(missing_ok=True) + return ProcessResult(src, final_out, True, verapdf_passed=vera_ok) + + +def _move_to_error(p: Path, error_dir: Path) -> None: + error_dir.mkdir(parents=True, exist_ok=True) + try: + shutil.move(str(p), str(error_dir / p.name)) + except OSError: + log.exception("Konnte %s nicht in error-Verzeichnis verschieben", p) diff --git a/pdf_ocr_hotfolder/service.py b/pdf_ocr_hotfolder/service.py new file mode 100644 index 0000000..91066f0 --- /dev/null +++ b/pdf_ocr_hotfolder/service.py @@ -0,0 +1,173 @@ +"""Hauptservice: Hotfolder via watchdog, ThreadPool für PDF-Verarbeitung.""" +from __future__ import annotations + +import logging +import signal +import threading +import time +from concurrent.futures import Future, ThreadPoolExecutor +from pathlib import Path + +from watchdog.events import FileSystemEvent, FileSystemEventHandler +from watchdog.observers import Observer + +from .config import Config +from .processor import ProcessResult, process_pdf +from .uploaders import notify_email, upload_folder, upload_nextcloud, upload_sftp + +log = logging.getLogger(__name__) + + +def _is_pdf(path: Path) -> bool: + return path.suffix.lower() == ".pdf" and path.is_file() + + +def _wait_until_stable(path: Path, checks: int = 3, interval: float = 1.0) -> bool: + """Wartet bis Datei nicht mehr wächst (Scanner schreibt mehrmals).""" + last = -1 + stable_count = 0 + for _ in range(60): # max ~60s + try: + size = path.stat().st_size + except FileNotFoundError: + return False + if size == last and size > 0: + stable_count += 1 + if stable_count >= checks: + return True + else: + stable_count = 0 + last = size + time.sleep(interval) + return False + + +class _Handler(FileSystemEventHandler): + def __init__(self, service: "HotfolderService") -> None: + self.service = service + + def on_created(self, event: FileSystemEvent) -> None: + if not event.is_directory: + self.service.enqueue(Path(event.src_path)) + + def on_moved(self, event: FileSystemEvent) -> None: + if not event.is_directory: + self.service.enqueue(Path(event.dest_path)) + + def on_closed(self, event: FileSystemEvent) -> None: + if not event.is_directory: + self.service.enqueue(Path(event.src_path)) + + +class HotfolderService: + def __init__(self, cfg: Config) -> None: + self.cfg = cfg + self._executor = ThreadPoolExecutor( + max_workers=cfg.ocr.max_workers, + thread_name_prefix="ocr", + ) + self._observer: Observer | None = None + self._stop = threading.Event() + self._inflight: set[str] = set() + self._lock = threading.Lock() + + # ---- Setup ---- + + def _ensure_dirs(self) -> None: + for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing, + self.cfg.paths.working, self.cfg.paths.error): + p.mkdir(parents=True, exist_ok=True) + + # ---- Lifecycle ---- + + def run(self) -> None: + self._ensure_dirs() + self._scan_existing() + + self._observer = Observer() + self._observer.schedule(_Handler(self), str(self.cfg.paths.incoming), recursive=False) + self._observer.start() + log.info("Hotfolder läuft. Watching: %s", self.cfg.paths.incoming) + + signal.signal(signal.SIGTERM, lambda *_: self._stop.set()) + signal.signal(signal.SIGINT, lambda *_: self._stop.set()) + + try: + while not self._stop.is_set(): + self._stop.wait(1.0) + finally: + self.shutdown() + + def shutdown(self) -> None: + log.info("Shutdown läuft...") + if self._observer: + self._observer.stop() + self._observer.join(timeout=5) + self._executor.shutdown(wait=True, cancel_futures=False) + log.info("Shutdown ok.") + + # ---- Queue ---- + + def _scan_existing(self) -> None: + """Beim Start: bereits liegende PDFs aufgreifen.""" + for p in self.cfg.paths.incoming.iterdir(): + if _is_pdf(p): + self.enqueue(p) + + def enqueue(self, path: Path) -> None: + if not _is_pdf(path): + return + key = str(path.resolve()) + with self._lock: + if key in self._inflight: + return + self._inflight.add(key) + fut = self._executor.submit(self._process, path) + fut.add_done_callback(lambda f, k=key: self._done(k, f)) + + def _done(self, key: str, fut: Future) -> None: + with self._lock: + self._inflight.discard(key) + exc = fut.exception() + if exc: + log.exception("Worker-Exception", exc_info=exc) + + # ---- Processing ---- + + def _process(self, path: Path) -> None: + if not _wait_until_stable(path): + log.warning("Datei nicht stabilisiert, überspringe: %s", path) + return + if not path.exists(): + return + + result: ProcessResult = process_pdf( + src=path, + working_dir=self.cfg.paths.working, + outgoing_dir=self.cfg.paths.outgoing, + error_dir=self.cfg.paths.error, + ocr_cfg=self.cfg.ocr, + vera_cfg=self.cfg.verapdf, + ) + + if result.success: + self._dispatch_uploads(result.output) + self._notify(result) + + def _dispatch_uploads(self, pdf: Path) -> None: + upload_folder(pdf, self.cfg.folder, self.cfg.paths.outgoing) + if self.cfg.nextcloud.enabled: + upload_nextcloud(pdf, self.cfg.nextcloud) + if self.cfg.sftp.enabled: + upload_sftp(pdf, self.cfg.sftp) + + def _notify(self, result: ProcessResult) -> None: + if result.success: + subject = f"[pdf-ocr] OK: {result.source.name}" + body = f"Datei verarbeitet: {result.output}\n" + if result.verapdf_passed is not None: + body += f"veraPDF: {'PASS' if result.verapdf_passed else 'FAIL'}\n" + else: + subject = f"[pdf-ocr] FEHLER: {result.source.name}" + body = f"Fehler beim Verarbeiten von {result.source}\n\n{result.error}\n" + notify_email(self.cfg.email, subject, body, result.success) diff --git a/pdf_ocr_hotfolder/uploaders.py b/pdf_ocr_hotfolder/uploaders.py new file mode 100644 index 0000000..2a88fa5 --- /dev/null +++ b/pdf_ocr_hotfolder/uploaders.py @@ -0,0 +1,104 @@ +"""Upload-Ziele: lokaler Ordner, Nextcloud (WebDAV), SFTP. Plus E-Mail-Notify.""" +from __future__ import annotations + +import logging +import smtplib +import ssl +from email.message import EmailMessage +from pathlib import Path +from urllib.parse import quote + +import paramiko +import requests + +from .config import EmailNotify, FolderUpload, NextcloudUpload, SftpUpload + +log = logging.getLogger(__name__) + + +def upload_folder(pdf: Path, cfg: FolderUpload, default_target: Path) -> bool: + if not cfg.enabled: + return True + target = Path(cfg.target) if cfg.target else default_target + target.mkdir(parents=True, exist_ok=True) + dest = target / pdf.name + try: + if pdf.resolve() == dest.resolve(): + return True + dest.write_bytes(pdf.read_bytes()) + log.info("Folder upload OK: %s", dest) + return True + except OSError as e: + log.error("Folder upload failed: %s", e) + return False + + +def upload_nextcloud(pdf: Path, cfg: NextcloudUpload) -> bool: + if not cfg.enabled: + return True + base = cfg.url.rstrip("/") + remote = "/".join(quote(part) for part in cfg.remote_path.strip("/").split("/") if part) + url = f"{base}/remote.php/dav/files/{quote(cfg.username)}/{remote}/{quote(pdf.name)}" + try: + with pdf.open("rb") as f: + r = requests.put(url, data=f, auth=(cfg.username, cfg.password), + verify=cfg.verify_ssl, timeout=300) + if r.status_code in (200, 201, 204): + log.info("Nextcloud upload OK: %s", pdf.name) + return True + log.error("Nextcloud upload HTTP %s: %s", r.status_code, r.text[:200]) + return False + except requests.RequestException as e: + log.error("Nextcloud upload failed: %s", e) + return False + + +def upload_sftp(pdf: Path, cfg: SftpUpload) -> bool: + if not cfg.enabled: + return True + try: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + connect_kwargs: dict = { + "hostname": cfg.host, "port": cfg.port, "username": cfg.username, + "timeout": 30, + } + if cfg.key_file: + connect_kwargs["key_filename"] = cfg.key_file + if cfg.password: + connect_kwargs["password"] = cfg.password + client.connect(**connect_kwargs) + sftp = client.open_sftp() + try: + remote = f"{cfg.remote_path.rstrip('/')}/{pdf.name}" + sftp.put(str(pdf), remote) + log.info("SFTP upload OK: %s", remote) + return True + finally: + sftp.close() + client.close() + except (paramiko.SSHException, OSError) as e: + log.error("SFTP upload failed: %s", e) + return False + + +def notify_email(cfg: EmailNotify, subject: str, body: str, success: bool) -> None: + if not cfg.enabled or cfg.on == "never": + return + if cfg.on == "errors" and success: + return + msg = EmailMessage() + msg["Subject"] = subject + msg["From"] = cfg.from_addr + msg["To"] = ", ".join(cfg.to_addrs) + msg.set_content(body) + try: + with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port, timeout=30) as s: + if cfg.use_starttls: + s.starttls(context=ssl.create_default_context()) + if cfg.smtp_user: + s.login(cfg.smtp_user, cfg.smtp_password) + s.send_message(msg) + log.info("E-Mail-Notify gesendet: %s", subject) + except (smtplib.SMTPException, OSError) as e: + log.error("E-Mail-Notify fehlgeschlagen: %s", e) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d281fc8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +ocrmypdf>=16.0 +watchdog>=4.0 +requests>=2.31 +paramiko>=3.4 diff --git a/systemd/pdf-ocr-hotfolder.service b/systemd/pdf-ocr-hotfolder.service new file mode 100644 index 0000000..5ae1963 --- /dev/null +++ b/systemd/pdf-ocr-hotfolder.service @@ -0,0 +1,25 @@ +[Unit] +Description=PDF OCR Hotfolder +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=__SERVICE_USER__ +Group=__SERVICE_GROUP__ +ExecStart=/opt/pdf-ocr-hotfolder/venv/bin/python -m pdf_ocr_hotfolder --config /etc/pdf-ocr-hotfolder/config.toml +Restart=on-failure +RestartSec=5 +KillMode=mixed +TimeoutStopSec=30 + +# Hardening (lockerer wegen AD-User & Datei-ACLs) +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=full +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true + +[Install] +WantedBy=multi-user.target diff --git a/update.sh b/update.sh new file mode 100755 index 0000000..cc40581 --- /dev/null +++ b/update.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# PDF OCR Hotfolder — Update-Script +# +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*"; } + +if [ "${EUID}" -ne 0 ]; then + log_error "Bitte als root ausführen: sudo ./update.sh" + exit 1 +fi + +INSTALL_DIR="/opt/pdf-ocr-hotfolder" +SERVICE_NAME="pdf-ocr-hotfolder" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -f "$SCRIPT_DIR/pdf_ocr_hotfolder/__init__.py" ]; then + REPO_DIR="$SCRIPT_DIR" +elif [ -f "$INSTALL_DIR/.repo_path" ]; then + REPO_DIR="$(cat "$INSTALL_DIR/.repo_path")" + [ -d "$REPO_DIR" ] || { log_error "Gespeicherter Repo-Pfad existiert nicht: $REPO_DIR"; exit 1; } +else + log_error "Repo nicht gefunden. update.sh aus dem Repo ausführen." + exit 1 +fi + +[ -d "$INSTALL_DIR" ] || { log_error "Installation nicht gefunden. Erst install.sh ausführen."; exit 1; } + +OLD_VERSION="$(cat "$INSTALL_DIR/VERSION" 2>/dev/null || echo unknown)" +NEW_VERSION="$(cat "$REPO_DIR/VERSION" 2>/dev/null || echo unknown)" + +echo +echo "==========================================" +echo " PDF OCR Hotfolder — Update" +echo "==========================================" +log_info "Repo: $REPO_DIR" +log_info "Install: $INSTALL_DIR" +log_info "Version: $OLD_VERSION → $NEW_VERSION" +echo + +# Service-User aus systemd-Unit lesen +SERVICE_USER="$(awk -F= '/^User=/{print $2}' /etc/systemd/system/${SERVICE_NAME}.service 2>/dev/null || echo pdfocr)" +SERVICE_GROUP="$(awk -F= '/^Group=/{print $2}' /etc/systemd/system/${SERVICE_NAME}.service 2>/dev/null || echo pdfocr)" + +log_info "Stoppe Service..." +systemctl stop "${SERVICE_NAME}.service" 2>/dev/null || true + +log_info "Backup erstellen..." +BACKUP_DIR="/var/backups/pdf-ocr-hotfolder" +mkdir -p "$BACKUP_DIR" +tar -czf "$BACKUP_DIR/backup-$(date +%Y%m%d-%H%M%S).tar.gz" \ + -C "$INSTALL_DIR" --exclude=venv --exclude=__pycache__ . 2>/dev/null || true + +log_info "Code aktualisieren..." +rm -rf "$INSTALL_DIR/pdf_ocr_hotfolder" +cp -r "$REPO_DIR/pdf_ocr_hotfolder" "$INSTALL_DIR/" +cp "$REPO_DIR/requirements.txt" "$INSTALL_DIR/" +cp "$REPO_DIR/VERSION" "$INSTALL_DIR/" +echo "$REPO_DIR" > "$INSTALL_DIR/.repo_path" + +log_info "Dependencies aktualisieren..." +"$INSTALL_DIR/venv/bin/pip" install --upgrade pip -q +"$INSTALL_DIR/venv/bin/pip" install --upgrade -r "$INSTALL_DIR/requirements.txt" -q + +log_info "systemd-Unit aktualisieren..." +sed -e "s|__SERVICE_USER__|$SERVICE_USER|g" \ + -e "s|__SERVICE_GROUP__|$SERVICE_GROUP|g" \ + "$REPO_DIR/systemd/pdf-ocr-hotfolder.service" \ + > "/etc/systemd/system/${SERVICE_NAME}.service" +systemctl daemon-reload + +log_info "Berechtigungen setzen..." +chown -R "$SERVICE_USER:$SERVICE_GROUP" "$INSTALL_DIR" + +log_info "Service starten..." +systemctl start "${SERVICE_NAME}.service" +sleep 2 + +if systemctl is-active --quiet "${SERVICE_NAME}.service"; then + log_info "✅ Service läuft (Version $NEW_VERSION)" +else + log_error "Service läuft nicht. journalctl -u $SERVICE_NAME -n 30" + exit 1 +fi