Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f7cadfc63 |
@@ -4,6 +4,7 @@ __pycache__/
|
|||||||
venv/
|
venv/
|
||||||
env/
|
env/
|
||||||
.venv/
|
.venv/
|
||||||
|
.pytest_cache/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
|
|||||||
@@ -1,5 +1,17 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.2.1] - 2026-04-09
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Issue #1**: Preflight-Check beim Start prüft jetzt `tesseract` und `gs` (Ghostscript). Fehlt eine Abhängigkeit, beendet sich der Service sofort mit Exit-Code 2 und klarer Fehlermeldung statt erst bei der ersten Datei.
|
||||||
|
- **Issue #2**: `--once`-Modus liefert jetzt Exit-Code `1`, sobald **mindestens ein** PDF fehlgeschlagen ist. Exit-Code `0` nur bei vollständigem Erfolg (inkl. "keine Dateien vorhanden"). Exit-Code `2` bei Preflight-Fehler.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Public API: `HotfolderService.run_once()`, `.success_count`, `.error_count`, `.ensure_dirs()`
|
||||||
|
- `check_preflight()` / `PreflightError` in `pdf_ocr_hotfolder.service`
|
||||||
|
- pytest-Test-Suite (`tests/`) mit 11 Tests — deckt alle Szenarien aus Issue #1 und #2 ab
|
||||||
|
- `ocrmypdf`-Import in `processor.py` ist jetzt lazy (Tests ohne ocrmypdf-Installation möglich)
|
||||||
|
|
||||||
## [0.2.0] - 2026-04-08
|
## [0.2.0] - 2026-04-08
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .config import load_config
|
from .config import load_config
|
||||||
from .service import HotfolderService
|
from .service import HotfolderService, PreflightError
|
||||||
|
|
||||||
|
|
||||||
def _setup_logging(level: str) -> None:
|
def _setup_logging(level: str) -> None:
|
||||||
@@ -40,14 +40,20 @@ def main() -> int:
|
|||||||
_setup_logging(cfg.log_level)
|
_setup_logging(cfg.log_level)
|
||||||
|
|
||||||
service = HotfolderService(cfg)
|
service = HotfolderService(cfg)
|
||||||
|
|
||||||
if args.once:
|
if args.once:
|
||||||
service._ensure_dirs() # noqa: SLF001
|
try:
|
||||||
service._scan_existing() # noqa: SLF001
|
errors = service.run_once()
|
||||||
service._executor.shutdown(wait=True) # noqa: SLF001
|
except PreflightError as e:
|
||||||
return 0
|
print(f"FEHLER: {e}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
return 1 if errors > 0 else 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
service.run()
|
service.run()
|
||||||
|
except PreflightError as e:
|
||||||
|
print(f"FEHLER: {e}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ import subprocess
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import ocrmypdf
|
|
||||||
|
|
||||||
from .config import OcrConfig, VeraPdfConfig
|
from .config import OcrConfig, VeraPdfConfig
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -25,6 +23,8 @@ class ProcessResult:
|
|||||||
|
|
||||||
def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None:
|
def run_ocr(src: Path, dst: Path, cfg: OcrConfig) -> None:
|
||||||
"""Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead)."""
|
"""Führt ocrmypdf als Library-Call aus (kein Subprozess-Overhead)."""
|
||||||
|
import ocrmypdf # lazy, damit Tests ohne ocrmypdf laufen
|
||||||
|
|
||||||
kwargs: dict = {
|
kwargs: dict = {
|
||||||
"language": cfg.languages,
|
"language": cfg.languages,
|
||||||
"jobs": cfg.jobs,
|
"jobs": cfg.jobs,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import shutil
|
||||||
import signal
|
import signal
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@@ -18,6 +19,27 @@ from .uploaders import notify_email, upload_folder, upload_nextcloud, upload_sft
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PreflightError(RuntimeError):
|
||||||
|
"""Erforderliche externe Binaries fehlen."""
|
||||||
|
|
||||||
|
|
||||||
|
# Pflicht-Binaries für ocrmypdf
|
||||||
|
_REQUIRED_BINARIES = ("tesseract", "gs")
|
||||||
|
|
||||||
|
|
||||||
|
def check_preflight() -> None:
|
||||||
|
"""Prüft, ob alle externen Abhängigkeiten (Tesseract, Ghostscript) installiert sind.
|
||||||
|
|
||||||
|
Wirft PreflightError mit Liste der fehlenden Binaries.
|
||||||
|
"""
|
||||||
|
missing = [b for b in _REQUIRED_BINARIES if shutil.which(b) is None]
|
||||||
|
if missing:
|
||||||
|
raise PreflightError(
|
||||||
|
"Fehlende Abhängigkeiten: " + ", ".join(missing)
|
||||||
|
+ ". Bitte installieren: sudo apt install tesseract-ocr ghostscript"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _is_pdf(path: Path) -> bool:
|
def _is_pdf(path: Path) -> bool:
|
||||||
return path.suffix.lower() == ".pdf" and path.is_file()
|
return path.suffix.lower() == ".pdf" and path.is_file()
|
||||||
|
|
||||||
@@ -70,10 +92,20 @@ class HotfolderService:
|
|||||||
self._stop = threading.Event()
|
self._stop = threading.Event()
|
||||||
self._inflight: set[str] = set()
|
self._inflight: set[str] = set()
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
self._success_count = 0
|
||||||
|
self._error_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def success_count(self) -> int:
|
||||||
|
return self._success_count
|
||||||
|
|
||||||
|
@property
|
||||||
|
def error_count(self) -> int:
|
||||||
|
return self._error_count
|
||||||
|
|
||||||
# ---- Setup ----
|
# ---- Setup ----
|
||||||
|
|
||||||
def _ensure_dirs(self) -> None:
|
def ensure_dirs(self) -> None:
|
||||||
for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing,
|
for p in (self.cfg.paths.incoming, self.cfg.paths.outgoing,
|
||||||
self.cfg.paths.working, self.cfg.paths.error):
|
self.cfg.paths.working, self.cfg.paths.error):
|
||||||
p.mkdir(parents=True, exist_ok=True)
|
p.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -81,7 +113,8 @@ class HotfolderService:
|
|||||||
# ---- Lifecycle ----
|
# ---- Lifecycle ----
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
self._ensure_dirs()
|
check_preflight()
|
||||||
|
self.ensure_dirs()
|
||||||
self._scan_existing()
|
self._scan_existing()
|
||||||
|
|
||||||
self._observer = Observer()
|
self._observer = Observer()
|
||||||
@@ -98,6 +131,20 @@ class HotfolderService:
|
|||||||
finally:
|
finally:
|
||||||
self.shutdown()
|
self.shutdown()
|
||||||
|
|
||||||
|
def run_once(self) -> int:
|
||||||
|
"""Verarbeitet alle bereits im incoming-Ordner liegenden PDFs und beendet sich.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Anzahl fehlgeschlagener PDFs (0 = alles ok).
|
||||||
|
"""
|
||||||
|
check_preflight()
|
||||||
|
self.ensure_dirs()
|
||||||
|
self._scan_existing()
|
||||||
|
self._executor.shutdown(wait=True)
|
||||||
|
log.info("One-shot fertig: %d ok, %d Fehler",
|
||||||
|
self._success_count, self._error_count)
|
||||||
|
return self._error_count
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
def shutdown(self) -> None:
|
||||||
log.info("Shutdown läuft...")
|
log.info("Shutdown läuft...")
|
||||||
if self._observer:
|
if self._observer:
|
||||||
@@ -150,6 +197,12 @@ class HotfolderService:
|
|||||||
vera_cfg=self.cfg.verapdf,
|
vera_cfg=self.cfg.verapdf,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if result.success:
|
||||||
|
self._success_count += 1
|
||||||
|
else:
|
||||||
|
self._error_count += 1
|
||||||
|
|
||||||
if result.success:
|
if result.success:
|
||||||
self._dispatch_uploads(result.output)
|
self._dispatch_uploads(result.output)
|
||||||
self._notify(result)
|
self._notify(result)
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
"""Gemeinsame pytest-Fixtures."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pdf_ocr_hotfolder.config import (
|
||||||
|
Config,
|
||||||
|
EmailNotify,
|
||||||
|
FolderUpload,
|
||||||
|
NextcloudUpload,
|
||||||
|
OcrConfig,
|
||||||
|
Paths,
|
||||||
|
SftpUpload,
|
||||||
|
VeraPdfConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tmp_config(tmp_path: Path) -> Config:
|
||||||
|
"""Minimal-Config mit tmp_path-Verzeichnissen, alle Uploads deaktiviert."""
|
||||||
|
paths = Paths(
|
||||||
|
incoming=tmp_path / "incoming",
|
||||||
|
outgoing=tmp_path / "outgoing",
|
||||||
|
working=tmp_path / "working",
|
||||||
|
error=tmp_path / "error",
|
||||||
|
)
|
||||||
|
for p in (paths.incoming, paths.outgoing, paths.working, paths.error):
|
||||||
|
p.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
return Config(
|
||||||
|
paths=paths,
|
||||||
|
ocr=OcrConfig(max_workers=1),
|
||||||
|
verapdf=VeraPdfConfig(enabled=False),
|
||||||
|
folder=FolderUpload(enabled=False),
|
||||||
|
nextcloud=NextcloudUpload(enabled=False),
|
||||||
|
sftp=SftpUpload(enabled=False),
|
||||||
|
email=EmailNotify(enabled=False),
|
||||||
|
log_level="DEBUG",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def dummy_pdf(tmp_config: Config) -> Path:
|
||||||
|
"""Legt eine Datei mit .pdf-Extension im incoming-Ordner ab.
|
||||||
|
|
||||||
|
Achtung: kein echtes PDF. Für Tests wird `process_pdf` gemockt.
|
||||||
|
"""
|
||||||
|
pdf = tmp_config.paths.incoming / "test.pdf"
|
||||||
|
pdf.write_bytes(b"%PDF-1.4 fake\n")
|
||||||
|
return pdf
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
"""Tests für Issue #2: --once Modus muss Exit-Code != 0 bei Fehlern liefern."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from pdf_ocr_hotfolder.processor import ProcessResult
|
||||||
|
from pdf_ocr_hotfolder.service import HotfolderService
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_success(src: Path, working_dir, outgoing_dir, error_dir, ocr_cfg, vera_cfg):
|
||||||
|
out = outgoing_dir / f"OCR_{src.name}"
|
||||||
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
out.write_bytes(b"%PDF-1.4 ocr\n")
|
||||||
|
src.unlink(missing_ok=True)
|
||||||
|
return ProcessResult(src, out, True)
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_failure(src: Path, working_dir, outgoing_dir, error_dir, ocr_cfg, vera_cfg):
|
||||||
|
error_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = error_dir / src.name
|
||||||
|
src.rename(dest)
|
||||||
|
return ProcessResult(src, outgoing_dir / f"OCR_{src.name}", False,
|
||||||
|
error="fake ocr failure")
|
||||||
|
|
||||||
|
|
||||||
|
def _run(tmp_config, fake_process):
|
||||||
|
"""Helper: führt run_once() mit gemocktem process_pdf und preflight aus."""
|
||||||
|
with patch("pdf_ocr_hotfolder.service.check_preflight", return_value=None), \
|
||||||
|
patch("pdf_ocr_hotfolder.service.process_pdf", side_effect=fake_process), \
|
||||||
|
patch("pdf_ocr_hotfolder.service._wait_until_stable", return_value=True):
|
||||||
|
service = HotfolderService(tmp_config)
|
||||||
|
try:
|
||||||
|
return service.run_once()
|
||||||
|
finally:
|
||||||
|
service._executor.shutdown(wait=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_once_exit_0_when_no_files(tmp_config) -> None:
|
||||||
|
"""Szenario: Keine PDFs vorhanden → Exit 0."""
|
||||||
|
errors = _run(tmp_config, _fake_success)
|
||||||
|
assert errors == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_once_exit_0_when_all_success(tmp_config) -> None:
|
||||||
|
"""Szenario: Alle PDFs erfolgreich → Exit 0."""
|
||||||
|
(tmp_config.paths.incoming / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
(tmp_config.paths.incoming / "b.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
|
||||||
|
errors = _run(tmp_config, _fake_success)
|
||||||
|
assert errors == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_once_exit_nonzero_when_all_fail(tmp_config) -> None:
|
||||||
|
"""Szenario: Alle PDFs fehlgeschlagen → Exit != 0 (Issue #2)."""
|
||||||
|
(tmp_config.paths.incoming / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
(tmp_config.paths.incoming / "b.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
|
||||||
|
errors = _run(tmp_config, _fake_failure)
|
||||||
|
assert errors == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_once_exit_nonzero_when_some_fail(tmp_config) -> None:
|
||||||
|
"""Szenario: Teilweise fehlgeschlagen → Exit != 0."""
|
||||||
|
(tmp_config.paths.incoming / "ok.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
(tmp_config.paths.incoming / "bad.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
|
||||||
|
def mixed(src, *args, **kwargs):
|
||||||
|
if "bad" in src.name:
|
||||||
|
return _fake_failure(src, *args, **kwargs)
|
||||||
|
return _fake_success(src, *args, **kwargs)
|
||||||
|
|
||||||
|
errors = _run(tmp_config, mixed)
|
||||||
|
assert errors == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_counters_track_success_and_failure(tmp_config) -> None:
|
||||||
|
"""success_count und error_count sollen korrekt mitzählen."""
|
||||||
|
(tmp_config.paths.incoming / "ok.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
(tmp_config.paths.incoming / "bad.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
|
||||||
|
def mixed(src, *args, **kwargs):
|
||||||
|
if "bad" in src.name:
|
||||||
|
return _fake_failure(src, *args, **kwargs)
|
||||||
|
return _fake_success(src, *args, **kwargs)
|
||||||
|
|
||||||
|
with patch("pdf_ocr_hotfolder.service.check_preflight", return_value=None), \
|
||||||
|
patch("pdf_ocr_hotfolder.service.process_pdf", side_effect=mixed), \
|
||||||
|
patch("pdf_ocr_hotfolder.service._wait_until_stable", return_value=True):
|
||||||
|
service = HotfolderService(tmp_config)
|
||||||
|
try:
|
||||||
|
service.run_once()
|
||||||
|
assert service.success_count == 1
|
||||||
|
assert service.error_count == 1
|
||||||
|
finally:
|
||||||
|
service._executor.shutdown(wait=False)
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
"""Tests für Issue #1: Preflight-Check bei fehlendem Tesseract."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pdf_ocr_hotfolder.service import (
|
||||||
|
HotfolderService,
|
||||||
|
PreflightError,
|
||||||
|
check_preflight,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_preflight_passes_when_all_binaries_present() -> None:
|
||||||
|
"""Wenn tesseract + gs im PATH sind, darf kein Fehler fliegen."""
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value="/usr/bin/fake"):
|
||||||
|
check_preflight() # darf nicht werfen
|
||||||
|
|
||||||
|
|
||||||
|
def test_preflight_fails_when_tesseract_missing() -> None:
|
||||||
|
"""Fehlendes tesseract → PreflightError mit passender Meldung."""
|
||||||
|
def fake_which(name: str) -> str | None:
|
||||||
|
return None if name == "tesseract" else "/usr/bin/fake"
|
||||||
|
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", side_effect=fake_which):
|
||||||
|
with pytest.raises(PreflightError, match="tesseract"):
|
||||||
|
check_preflight()
|
||||||
|
|
||||||
|
|
||||||
|
def test_preflight_fails_when_ghostscript_missing() -> None:
|
||||||
|
def fake_which(name: str) -> str | None:
|
||||||
|
return None if name == "gs" else "/usr/bin/fake"
|
||||||
|
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", side_effect=fake_which):
|
||||||
|
with pytest.raises(PreflightError, match="gs"):
|
||||||
|
check_preflight()
|
||||||
|
|
||||||
|
|
||||||
|
def test_preflight_lists_all_missing_binaries() -> None:
|
||||||
|
"""Bei mehreren fehlenden Binaries werden alle genannt."""
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
|
||||||
|
with pytest.raises(PreflightError) as exc_info:
|
||||||
|
check_preflight()
|
||||||
|
msg = str(exc_info.value)
|
||||||
|
assert "tesseract" in msg
|
||||||
|
assert "gs" in msg
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_once_raises_preflight_error(tmp_config) -> None:
|
||||||
|
"""HotfolderService.run_once() wirft PreflightError, wenn tesseract fehlt."""
|
||||||
|
service = HotfolderService(tmp_config)
|
||||||
|
try:
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
|
||||||
|
with pytest.raises(PreflightError):
|
||||||
|
service.run_once()
|
||||||
|
finally:
|
||||||
|
service._executor.shutdown(wait=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_main_returns_2_on_preflight_error(tmp_config, tmp_path, monkeypatch) -> None:
|
||||||
|
"""CLI liefert Exit-Code 2 bei Preflight-Fehler (Issue #1 Szenario)."""
|
||||||
|
cfg_file = tmp_path / "cfg.toml"
|
||||||
|
cfg_file.write_text(f"""
|
||||||
|
[paths]
|
||||||
|
incoming = "{tmp_config.paths.incoming}"
|
||||||
|
outgoing = "{tmp_config.paths.outgoing}"
|
||||||
|
working = "{tmp_config.paths.working}"
|
||||||
|
error = "{tmp_config.paths.error}"
|
||||||
|
""")
|
||||||
|
monkeypatch.setattr(sys, "argv", ["pdf-ocr-hotfolder", "--config", str(cfg_file), "--once"])
|
||||||
|
with patch("pdf_ocr_hotfolder.service.shutil.which", return_value=None):
|
||||||
|
from pdf_ocr_hotfolder.__main__ import main
|
||||||
|
assert main() == 2
|
||||||
Reference in New Issue
Block a user