fix: Preflight-Check und Exit-Code in --once Modus (v0.2.1)

- #1: check_preflight() prüft beim Start tesseract + gs, wirft
  PreflightError. CLI endet mit Exit 2 statt grün zu bleiben.
- #2: run_once() gibt Anzahl fehlgeschlagener PDFs zurück, CLI
  endet mit Exit 1 wenn mindestens eine Datei scheiterte.
- pytest-Suite mit 11 Tests für beide Szenarien
- ocrmypdf-Import lazy in processor.py (Tests ohne ocrmypdf möglich)

Closes #1, #2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-09 07:24:00 +02:00
parent 985a33d3f9
commit 6f7cadfc63
10 changed files with 305 additions and 10 deletions
+11 -5
View File
@@ -8,7 +8,7 @@ from pathlib import Path
from . import __version__
from .config import load_config
from .service import HotfolderService
from .service import HotfolderService, PreflightError
def _setup_logging(level: str) -> None:
@@ -40,14 +40,20 @@ def main() -> int:
_setup_logging(cfg.log_level)
service = HotfolderService(cfg)
if args.once:
service._ensure_dirs() # noqa: SLF001
service._scan_existing() # noqa: SLF001
service._executor.shutdown(wait=True) # noqa: SLF001
return 0
try:
errors = service.run_once()
except PreflightError as e:
print(f"FEHLER: {e}", file=sys.stderr)
return 2
return 1 if errors > 0 else 0
try:
service.run()
except PreflightError as e:
print(f"FEHLER: {e}", file=sys.stderr)
return 2
except KeyboardInterrupt:
pass
return 0
+2 -2
View File
@@ -7,8 +7,6 @@ import subprocess
from dataclasses import dataclass
from pathlib import Path
import ocrmypdf
from .config import OcrConfig, VeraPdfConfig
log = logging.getLogger(__name__)
@@ -25,6 +23,8 @@ class ProcessResult:
def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None:
"""Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead)."""
import ocrmypdf # lazy, damit Tests ohne ocrmypdf laufen
kwargs: dict = {
"language": cfg.languages,
"jobs": cfg.jobs,
+55 -2
View File
@@ -2,6 +2,7 @@
from __future__ import annotations
import logging
import shutil
import signal
import threading
import time
@@ -18,6 +19,27 @@ from .uploaders import notify_email, upload_folder, upload_nextcloud, upload_sft
log = logging.getLogger(__name__)
class PreflightError(RuntimeError):
"""Erforderliche externe Binaries fehlen."""
# Pflicht-Binaries für ocrmypdf
_REQUIRED_BINARIES = ("tesseract", "gs")
def check_preflight() -> None:
"""Prüft, ob alle externen Abhängigkeiten (Tesseract, Ghostscript) installiert sind.
Wirft PreflightError mit Liste der fehlenden Binaries.
"""
missing = [b for b in _REQUIRED_BINARIES if shutil.which(b) is None]
if missing:
raise PreflightError(
"Fehlende Abhängigkeiten: " + ", ".join(missing)
+ ". Bitte installieren: sudo apt install tesseract-ocr ghostscript"
)
def _is_pdf(path: Path) -> bool:
return path.suffix.lower() == ".pdf" and path.is_file()
@@ -70,10 +92,20 @@ class HotfolderService:
self._stop = threading.Event()
self._inflight: set[str] = set()
self._lock = threading.Lock()
self._success_count = 0
self._error_count = 0
@property
def success_count(self) -> int:
return self._success_count
@property
def error_count(self) -> int:
return self._error_count
# ---- Setup ----
def _ensure_dirs(self) -> None:
def ensure_dirs(self) -> None:
for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing,
self.cfg.paths.working, self.cfg.paths.error):
p.mkdir(parents=True, exist_ok=True)
@@ -81,7 +113,8 @@ class HotfolderService:
# ---- Lifecycle ----
def run(self) -> None:
self._ensure_dirs()
check_preflight()
self.ensure_dirs()
self._scan_existing()
self._observer = Observer()
@@ -98,6 +131,20 @@ class HotfolderService:
finally:
self.shutdown()
def run_once(self) -> int:
"""Verarbeitet alle bereits im incoming-Ordner liegenden PDFs und beendet sich.
Returns:
Anzahl fehlgeschlagener PDFs (0 = alles ok).
"""
check_preflight()
self.ensure_dirs()
self._scan_existing()
self._executor.shutdown(wait=True)
log.info("One-shot fertig: %d ok, %d Fehler",
self._success_count, self._error_count)
return self._error_count
def shutdown(self) -> None:
log.info("Shutdown läuft...")
if self._observer:
@@ -150,6 +197,12 @@ class HotfolderService:
vera_cfg=self.cfg.verapdf,
)
with self._lock:
if result.success:
self._success_count += 1
else:
self._error_count += 1
if result.success:
self._dispatch_uploads(result.output)
self._notify(result)