1 Commits

Author SHA1 Message Date
techadmin 6f7cadfc63 fix: Preflight-Check und Exit-Code in --once Modus (v0.2.1)
- #1: check_preflight() prüft beim Start tesseract + gs, wirft
  PreflightError. CLI endet mit Exit 2 statt grün zu bleiben.
- #2: run_once() gibt Anzahl fehlgeschlagener PDFs zurück, CLI
  endet mit Exit 1 wenn mindestens eine Datei scheiterte.
- pytest-Suite mit 11 Tests für beide Szenarien
- ocrmypdf-Import lazy in processor.py (Tests ohne ocrmypdf möglich)

Closes #1, #2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 07:24:00 +02:00
10 changed files with 305 additions and 10 deletions
+1
View File
@@ -4,6 +4,7 @@ __pycache__/
venv/ venv/
env/ env/
.venv/ .venv/
.pytest_cache/
*.egg-info/ *.egg-info/
build/ build/
dist/ dist/
+12
View File
@@ -1,5 +1,17 @@
# Changelog # Changelog
## [0.2.1] - 2026-04-09
### Fixed
- **Issue #1**: Preflight-Check beim Start prüft jetzt `tesseract` und `gs` (Ghostscript). Fehlt eine Abhängigkeit, beendet sich der Service sofort mit Exit-Code 2 und klarer Fehlermeldung statt erst bei der ersten Datei.
- **Issue #2**: `--once`-Modus liefert jetzt Exit-Code `1`, sobald **mindestens ein** PDF fehlgeschlagen ist. Exit-Code `0` nur bei vollständigem Erfolg (inkl. "keine Dateien vorhanden"). Exit-Code `2` bei Preflight-Fehler.
### Added
- Public API: `HotfolderService.run_once()`, `.success_count`, `.error_count`, `.ensure_dirs()`
- `check_preflight()` / `PreflightError` in `pdf_ocr_hotfolder.service`
- pytest-Test-Suite (`tests/`) mit 11 Tests — deckt alle Szenarien aus Issue #1 und #2 ab
- `ocrmypdf`-Import in `processor.py` ist jetzt lazy (Tests ohne ocrmypdf-Installation möglich)
## [0.2.0] - 2026-04-08 ## [0.2.0] - 2026-04-08
### Added ### Added
+1 -1
View File
@@ -1 +1 @@
0.2.0 0.2.1
+11 -5
View File
@@ -8,7 +8,7 @@ from pathlib import Path
from . import __version__ from . import __version__
from .config import load_config from .config import load_config
from .service import HotfolderService from .service import HotfolderService, PreflightError
def _setup_logging(level: str) -> None: def _setup_logging(level: str) -> None:
@@ -40,14 +40,20 @@ def main() -> int:
_setup_logging(cfg.log_level) _setup_logging(cfg.log_level)
service = HotfolderService(cfg) service = HotfolderService(cfg)
if args.once: if args.once:
service._ensure_dirs() # noqa: SLF001 try:
service._scan_existing() # noqa: SLF001 errors = service.run_once()
service._executor.shutdown(wait=True) # noqa: SLF001 except PreflightError as e:
return 0 print(f"FEHLER: {e}", file=sys.stderr)
return 2
return 1 if errors > 0 else 0
try: try:
service.run() service.run()
except PreflightError as e:
print(f"FEHLER: {e}", file=sys.stderr)
return 2
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
return 0 return 0
+2 -2
View File
@@ -7,8 +7,6 @@ import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
import ocrmypdf
from .config import OcrConfig, VeraPdfConfig from .config import OcrConfig, VeraPdfConfig
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -25,6 +23,8 @@ class ProcessResult:
def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None: def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None:
"""Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead).""" """Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead)."""
import ocrmypdf # lazy, damit Tests ohne ocrmypdf laufen
kwargs: dict = { kwargs: dict = {
"language": cfg.languages, "language": cfg.languages,
"jobs": cfg.jobs, "jobs": cfg.jobs,
+55 -2
View File
@@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
import shutil
import signal import signal
import threading import threading
import time import time
@@ -18,6 +19,27 @@ from .uploaders import notify_email, upload_folder, upload_nextcloud, upload_sft
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class PreflightError(RuntimeError):
"""Erforderliche externe Binaries fehlen."""
# Pflicht-Binaries für ocrmypdf
_REQUIRED_BINARIES = ("tesseract", "gs")
def check_preflight() -> None:
"""Prüft, ob alle externen Abhängigkeiten (Tesseract, Ghostscript) installiert sind.
Wirft PreflightError mit Liste der fehlenden Binaries.
"""
missing = [b for b in _REQUIRED_BINARIES if shutil.which(b) is None]
if missing:
raise PreflightError(
"Fehlende Abhängigkeiten: " + ", ".join(missing)
+ ". Bitte installieren: sudo apt install tesseract-ocr ghostscript"
)
def _is_pdf(path: Path) -> bool: def _is_pdf(path: Path) -> bool:
return path.suffix.lower() == ".pdf" and path.is_file() return path.suffix.lower() == ".pdf" and path.is_file()
@@ -70,10 +92,20 @@ class HotfolderService:
self._stop = threading.Event() self._stop = threading.Event()
self._inflight: set[str] = set() self._inflight: set[str] = set()
self._lock = threading.Lock() self._lock = threading.Lock()
self._success_count = 0
self._error_count = 0
@property
def success_count(self) -> int:
return self._success_count
@property
def error_count(self) -> int:
return self._error_count
# ---- Setup ---- # ---- Setup ----
def _ensure_dirs(self) -> None: def ensure_dirs(self) -> None:
for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing, for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing,
self.cfg.paths.working, self.cfg.paths.error): self.cfg.paths.working, self.cfg.paths.error):
p.mkdir(parents=True, exist_ok=True) p.mkdir(parents=True, exist_ok=True)
@@ -81,7 +113,8 @@ class HotfolderService:
# ---- Lifecycle ---- # ---- Lifecycle ----
def run(self) -> None: def run(self) -> None:
self._ensure_dirs() check_preflight()
self.ensure_dirs()
self._scan_existing() self._scan_existing()
self._observer = Observer() self._observer = Observer()
@@ -98,6 +131,20 @@ class HotfolderService:
finally: finally:
self.shutdown() self.shutdown()
def run_once(self) -> int:
"""Verarbeitet alle bereits im incoming-Ordner liegenden PDFs und beendet sich.
Returns:
Anzahl fehlgeschlagener PDFs (0 = alles ok).
"""
check_preflight()
self.ensure_dirs()
self._scan_existing()
self._executor.shutdown(wait=True)
log.info("One-shot fertig: %d ok, %d Fehler",
self._success_count, self._error_count)
return self._error_count
def shutdown(self) -> None: def shutdown(self) -> None:
log.info("Shutdown läuft...") log.info("Shutdown läuft...")
if self._observer: if self._observer:
@@ -150,6 +197,12 @@ class HotfolderService:
vera_cfg=self.cfg.verapdf, vera_cfg=self.cfg.verapdf,
) )
with self._lock:
if result.success:
self._success_count += 1
else:
self._error_count += 1
if result.success: if result.success:
self._dispatch_uploads(result.output) self._dispatch_uploads(result.output)
self._notify(result) self._notify(result)
View File
+52
View File
@@ -0,0 +1,52 @@
"""Gemeinsame pytest-Fixtures."""
from __future__ import annotations
from pathlib import Path
import pytest
from pdf_ocr_hotfolder.config import (
Config,
EmailNotify,
FolderUpload,
NextcloudUpload,
OcrConfig,
Paths,
SftpUpload,
VeraPdfConfig,
)
@pytest.fixture
def tmp_config(tmp_path: Path) -> Config:
"""Minimal-Config mit tmp_path-Verzeichnissen, alle Uploads deaktiviert."""
paths = Paths(
incoming=tmp_path / "incoming",
outgoing=tmp_path / "outgoing",
working=tmp_path / "working",
error=tmp_path / "error",
)
for p in (paths.incoming, paths.outgoing, paths.working, paths.error):
p.mkdir(parents=True, exist_ok=True)
return Config(
paths=paths,
ocr=OcrConfig(max_workers=1),
verapdf=VeraPdfConfig(enabled=False),
folder=FolderUpload(enabled=False),
nextcloud=NextcloudUpload(enabled=False),
sftp=SftpUpload(enabled=False),
email=EmailNotify(enabled=False),
log_level="DEBUG",
)
@pytest.fixture
def dummy_pdf(tmp_config: Config) -> Path:
"""Legt eine Datei mit .pdf-Extension im incoming-Ordner ab.
Achtung: kein echtes PDF. Für Tests wird `process_pdf` gemockt.
"""
pdf = tmp_config.paths.incoming / "test.pdf"
pdf.write_bytes(b"%PDF-1.4 fake\n")
return pdf
+96
View File
@@ -0,0 +1,96 @@
"""Tests für Issue #2: --once Modus muss Exit-Code != 0 bei Fehlern liefern."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
from pdf_ocr_hotfolder.processor import ProcessResult
from pdf_ocr_hotfolder.service import HotfolderService
def _fake_success(src: Path, working_dir, outgoing_dir, error_dir, ocr_cfg, vera_cfg):
out = outgoing_dir / f"OCR_{src.name}"
out.parent.mkdir(parents=True, exist_ok=True)
out.write_bytes(b"%PDF-1.4 ocr\n")
src.unlink(missing_ok=True)
return ProcessResult(src, out, True)
def _fake_failure(src: Path, working_dir, outgoing_dir, error_dir, ocr_cfg, vera_cfg):
error_dir.mkdir(parents=True, exist_ok=True)
dest = error_dir / src.name
src.rename(dest)
return ProcessResult(src, outgoing_dir / f"OCR_{src.name}", False,
error="fake ocr failure")
def _run(tmp_config, fake_process):
"""Helper: führt run_once() mit gemocktem process_pdf und preflight aus."""
with patch("pdf_ocr_hotfolder.service.check_preflight", return_value=None), \
patch("pdf_ocr_hotfolder.service.process_pdf", side_effect=fake_process), \
patch("pdf_ocr_hotfolder.service._wait_until_stable", return_value=True):
service = HotfolderService(tmp_config)
try:
return service.run_once()
finally:
service._executor.shutdown(wait=False)
def test_once_exit_0_when_no_files(tmp_config) -> None:
"""Szenario: Keine PDFs vorhanden → Exit 0."""
errors = _run(tmp_config, _fake_success)
assert errors == 0
def test_once_exit_0_when_all_success(tmp_config) -> None:
"""Szenario: Alle PDFs erfolgreich → Exit 0."""
(tmp_config.paths.incoming / "a.pdf").write_bytes(b"%PDF-1.4\n")
(tmp_config.paths.incoming / "b.pdf").write_bytes(b"%PDF-1.4\n")
errors = _run(tmp_config, _fake_success)
assert errors == 0
def test_once_exit_nonzero_when_all_fail(tmp_config) -> None:
"""Szenario: Alle PDFs fehlgeschlagen → Exit != 0 (Issue #2)."""
(tmp_config.paths.incoming / "a.pdf").write_bytes(b"%PDF-1.4\n")
(tmp_config.paths.incoming / "b.pdf").write_bytes(b"%PDF-1.4\n")
errors = _run(tmp_config, _fake_failure)
assert errors == 2
def test_once_exit_nonzero_when_some_fail(tmp_config) -> None:
"""Szenario: Teilweise fehlgeschlagen → Exit != 0."""
(tmp_config.paths.incoming / "ok.pdf").write_bytes(b"%PDF-1.4\n")
(tmp_config.paths.incoming / "bad.pdf").write_bytes(b"%PDF-1.4\n")
def mixed(src, *args, **kwargs):
if "bad" in src.name:
return _fake_failure(src, *args, **kwargs)
return _fake_success(src, *args, **kwargs)
errors = _run(tmp_config, mixed)
assert errors == 1
def test_counters_track_success_and_failure(tmp_config) -> None:
"""success_count und error_count sollen korrekt mitzählen."""
(tmp_config.paths.incoming / "ok.pdf").write_bytes(b"%PDF-1.4\n")
(tmp_config.paths.incoming / "bad.pdf").write_bytes(b"%PDF-1.4\n")
def mixed(src, *args, **kwargs):
if "bad" in src.name:
return _fake_failure(src, *args, **kwargs)
return _fake_success(src, *args, **kwargs)
with patch("pdf_ocr_hotfolder.service.check_preflight", return_value=None), \
patch("pdf_ocr_hotfolder.service.process_pdf", side_effect=mixed), \
patch("pdf_ocr_hotfolder.service._wait_until_stable", return_value=True):
service = HotfolderService(tmp_config)
try:
service.run_once()
assert service.success_count == 1
assert service.error_count == 1
finally:
service._executor.shutdown(wait=False)
+75
View File
@@ -0,0 +1,75 @@
"""Tests für Issue #1: Preflight-Check bei fehlendem Tesseract."""
from __future__ import annotations
import sys
from unittest.mock import patch
import pytest
from pdf_ocr_hotfolder.service import (
HotfolderService,
PreflightError,
check_preflight,
)
def test_preflight_passes_when_all_binaries_present() -> None:
"""Wenn tesseract + gs im PATH sind, darf kein Fehler fliegen."""
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value="/usr/bin/fake"):
check_preflight() # darf nicht werfen
def test_preflight_fails_when_tesseract_missing() -> None:
"""Fehlendes tesseract → PreflightError mit passender Meldung."""
def fake_which(name: str) -> str | None:
return None if name == "tesseract" else "/usr/bin/fake"
with patch("pdf_ocr_hotfolder.service.shutil.which", side_effect=fake_which):
with pytest.raises(PreflightError, match="tesseract"):
check_preflight()
def test_preflight_fails_when_ghostscript_missing() -> None:
def fake_which(name: str) -> str | None:
return None if name == "gs" else "/usr/bin/fake"
with patch("pdf_ocr_hotfolder.service.shutil.which", side_effect=fake_which):
with pytest.raises(PreflightError, match="gs"):
check_preflight()
def test_preflight_lists_all_missing_binaries() -> None:
"""Bei mehreren fehlenden Binaries werden alle genannt."""
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
with pytest.raises(PreflightError) as exc_info:
check_preflight()
msg = str(exc_info.value)
assert "tesseract" in msg
assert "gs" in msg
def test_run_once_raises_preflight_error(tmp_config) -> None:
"""HotfolderService.run_once() wirft PreflightError, wenn tesseract fehlt."""
service = HotfolderService(tmp_config)
try:
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
with pytest.raises(PreflightError):
service.run_once()
finally:
service._executor.shutdown(wait=False)
def test_main_returns_2_on_preflight_error(tmp_config, tmp_path, monkeypatch) -> None:
"""CLI liefert Exit-Code 2 bei Preflight-Fehler (Issue #1 Szenario)."""
cfg_file = tmp_path / "cfg.toml"
cfg_file.write_text(f"""
[paths]
incoming = "{tmp_config.paths.incoming}"
outgoing = "{tmp_config.paths.outgoing}"
working = "{tmp_config.paths.working}"
error = "{tmp_config.paths.error}"
""")
monkeypatch.setattr(sys, "argv", ["pdf-ocr-hotfolder", "--config", str(cfg_file), "--once"])
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
from pdf_ocr_hotfolder.__main__ import main
assert main() == 2