Files
pdf-ocr-hotfolder/install.sh
T
techadmin 9cdc9ae443 fix: Ghostscript 10.0.0-10.02.0 PDF/A-Bug abfangen (v0.2.2)
- config.example.toml: pdfa_level="" als sicherer Default
- check_preflight(pdfa_level) erkennt betroffene GS-Versionen und bricht ab
- install.sh warnt bei betroffenen GS-Versionen
- 19 neue Tests (parametrisiert über Versions-Matrix)

Closes #3

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 07:29:18 +02:00

264 lines
9.2 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
#
# PDF OCR Hotfolder — Installer / Instanz-Manager für Debian 12/13
#
# Basis-Installation erfolgt einmalig (Code, venv, systemd-Template-Unit).
# Danach werden Hotfolder-Instanzen verwaltet:
# - Beim Erstlauf: mindestens eine Instanz wird angelegt
# - Beim Folgelauf: bestehende Instanzen werden erkannt; neue können ergänzt werden
#
# Unterstützt lokale System-User und AD-User mit lokaler UID (SSSD/Winbind).
#
set -euo pipefail
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*"; }
log_step() { echo -e "\n${BLUE}==>${NC} $*"; }
if [ "${EUID}" -ne 0 ]; then
log_error "Bitte als root ausführen: sudo ./install.sh"
exit 1
fi
INSTALL_DIR="/opt/pdf-ocr-hotfolder"
CONFIG_DIR="/etc/pdf-ocr-hotfolder"
DATA_ROOT="/var/lib/pdf-ocr-hotfolder"
LOG_DIR="/var/log/pdf-ocr-hotfolder"
SERVICE_TEMPLATE="pdf-ocr-hotfolder@.service"
DEFAULT_USER="pdfocr"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$SCRIPT_DIR"
if [ ! -f "$REPO_DIR/pdf_ocr_hotfolder/__init__.py" ]; then
log_error "Repo-Layout nicht erkannt. install.sh aus dem Repo ausführen."
exit 1
fi
# ============================================================
# Basis-Installation (idempotent)
# ============================================================
install_base() {
log_step "System-Pakete installieren"
apt-get update -qq
apt-get install -y --no-install-recommends \
python3 python3-venv python3-pip \
tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng \
ghostscript qpdf unpaper pngquant \
icc-profiles-free ca-certificates curl
log_info "System-Pakete ok ✓"
# Ghostscript-Versions-Check (Issue #3)
if command -v gs >/dev/null 2>&1; then
GS_VER="$(gs --version 2>/dev/null || echo 0.0)"
log_info "Ghostscript: $GS_VER"
case "$GS_VER" in
10.0.0|10.00.0|10.01.*|10.02.0)
echo
log_warn "═══════════════════════════════════════════════════════════════"
log_warn "Ghostscript $GS_VER ist vom PDF/A-Bug betroffen (10.0.010.02.0)."
log_warn "Mit pdfa_level + skip_text=true kann ocrmypdf KEINE PDFs verarbeiten."
log_warn ""
log_warn "Workarounds:"
log_warn " 1. ghostscript aus bookworm-backports installieren (>=10.02.1)"
log_warn " 2. In der Config [ocr].pdfa_level = \"\" setzen (Default ab v0.2.2)"
log_warn "═══════════════════════════════════════════════════════════════"
echo
;;
esac
fi
log_step "Default-User '$DEFAULT_USER' prüfen"
if id "$DEFAULT_USER" &>/dev/null; then
log_info "'$DEFAULT_USER' existiert bereits"
else
adduser --system --group --home "$DATA_ROOT" --shell /usr/sbin/nologin "$DEFAULT_USER"
log_info "System-User '$DEFAULT_USER' angelegt ✓"
fi
log_step "Verzeichnisse anlegen"
mkdir -p "$INSTALL_DIR" "$CONFIG_DIR" "$DATA_ROOT" "$LOG_DIR"
chown root:"$DEFAULT_USER" "$CONFIG_DIR"
chmod 750 "$CONFIG_DIR"
log_step "Code kopieren"
rm -rf "$INSTALL_DIR/pdf_ocr_hotfolder"
cp -r "$REPO_DIR/pdf_ocr_hotfolder" "$INSTALL_DIR/"
cp "$REPO_DIR/requirements.txt" "$INSTALL_DIR/"
cp "$REPO_DIR/VERSION" "$INSTALL_DIR/"
cp "$REPO_DIR/config.example.toml" "$INSTALL_DIR/"
echo "$REPO_DIR" > "$INSTALL_DIR/.repo_path"
log_step "Python venv"
if [ ! -d "$INSTALL_DIR/venv" ]; then
python3 -m venv "$INSTALL_DIR/venv"
fi
"$INSTALL_DIR/venv/bin/pip" install --upgrade pip -q
"$INSTALL_DIR/venv/bin/pip" install -r "$INSTALL_DIR/requirements.txt" -q
log_info "venv ok ✓"
log_step "systemd Template-Unit installieren"
cp "$REPO_DIR/systemd/$SERVICE_TEMPLATE" "/etc/systemd/system/$SERVICE_TEMPLATE"
systemctl daemon-reload
log_info "Template-Unit installiert ✓"
chown -R "$DEFAULT_USER":"$DEFAULT_USER" "$INSTALL_DIR" "$LOG_DIR"
}
# ============================================================
# Instanz-Verwaltung
# ============================================================
list_instances() {
find "$CONFIG_DIR" -maxdepth 1 -name '*.toml' -type f 2>/dev/null \
| sed 's|.*/||; s|\.toml$||' \
| sort
}
show_existing_instances() {
local instances
mapfile -t instances < <(list_instances)
if [ "${#instances[@]}" -eq 0 ]; then
log_info "Keine bestehenden Instanzen gefunden."
return
fi
echo
log_info "Bestehende Instanzen:"
for name in "${instances[@]}"; do
local active
active=$(systemctl is-active "pdf-ocr-hotfolder@${name}.service" 2>/dev/null || echo inactive)
printf " • %-30s [%s]\n" "$name" "$active"
done
echo
}
create_instance() {
echo
read -r -p "Instanz-Name (nur a-z, 0-9, -): " INST
if [[ ! "$INST" =~ ^[a-z0-9][a-z0-9-]*$ ]]; then
log_error "Ungültiger Name. Abbruch."
return 1
fi
if [ -f "$CONFIG_DIR/$INST.toml" ]; then
log_error "Instanz '$INST' existiert bereits. Abbruch."
return 1
fi
local default_base="$DATA_ROOT/$INST"
read -r -p "Basis-Pfad für Daten [$default_base]: " BASE
BASE="${BASE:-$default_base}"
read -r -p "Service-User [$DEFAULT_USER]: " SVC_USER
SVC_USER="${SVC_USER:-$DEFAULT_USER}"
local SVC_GROUP
if id "$SVC_USER" &>/dev/null; then
SVC_GROUP="$(id -gn "$SVC_USER")"
log_info "User '$SVC_USER' existiert (Gruppe: $SVC_GROUP)"
else
log_warn "User '$SVC_USER' existiert nicht."
read -r -p "Lokal als System-User anlegen? [J/n]: " CREATE_USER
CREATE_USER="${CREATE_USER:-J}"
if [[ "$CREATE_USER" =~ ^[JjYy]$ ]]; then
adduser --system --group --home "$BASE" --shell /usr/sbin/nologin "$SVC_USER"
SVC_GROUP="$SVC_USER"
log_info "User '$SVC_USER' angelegt ✓"
else
log_error "User muss existieren (z.B. via AD/SSSD). Abbruch."
return 1
fi
fi
log_info "Lege Datenverzeichnisse unter $BASE an..."
mkdir -p "$BASE"/{incoming,outgoing,working,error}
chown -R "$SVC_USER":"$SVC_GROUP" "$BASE"
log_info "Erstelle Config $CONFIG_DIR/$INST.toml..."
sed \
-e "s|/var/lib/pdf-ocr-hotfolder/incoming|$BASE/incoming|" \
-e "s|/var/lib/pdf-ocr-hotfolder/outgoing|$BASE/outgoing|" \
-e "s|/var/lib/pdf-ocr-hotfolder/working|$BASE/working|" \
-e "s|/var/lib/pdf-ocr-hotfolder/error|$BASE/error|" \
"$INSTALL_DIR/config.example.toml" > "$CONFIG_DIR/$INST.toml"
chown root:"$SVC_GROUP" "$CONFIG_DIR/$INST.toml"
chmod 640 "$CONFIG_DIR/$INST.toml"
# Drop-in für abweichenden Service-User
if [ "$SVC_USER" != "$DEFAULT_USER" ]; then
local DROPIN_DIR="/etc/systemd/system/pdf-ocr-hotfolder@${INST}.service.d"
mkdir -p "$DROPIN_DIR"
cat > "$DROPIN_DIR/user.conf" <<EOF
[Service]
User=$SVC_USER
Group=$SVC_GROUP
EOF
log_info "Drop-in für User '$SVC_USER' erstellt"
fi
systemctl daemon-reload
systemctl enable --now "pdf-ocr-hotfolder@${INST}.service"
sleep 1
if systemctl is-active --quiet "pdf-ocr-hotfolder@${INST}.service"; then
log_info "✅ Instanz '$INST' läuft"
else
log_warn "Instanz '$INST' läuft nicht. Logs: journalctl -u pdf-ocr-hotfolder@${INST}"
fi
echo
echo " Config: $CONFIG_DIR/$INST.toml"
echo " Eingang: $BASE/incoming"
echo " Ausgang: $BASE/outgoing"
echo " User: $SVC_USER ($SVC_GROUP)"
echo
}
# ============================================================
# Main
# ============================================================
echo
echo "=========================================="
echo " PDF OCR Hotfolder — Installer"
echo "=========================================="
if [ ! -d "$INSTALL_DIR/venv" ] || [ ! -f "/etc/systemd/system/$SERVICE_TEMPLATE" ]; then
log_step "Basis-Installation"
install_base
else
log_info "Basis-Installation bereits vorhanden ($INSTALL_DIR)"
log_info "Überspringe Basis-Setup (nutze update.sh für Code-Updates)"
fi
show_existing_instances
# Erste Instanz ist Pflicht, wenn noch keine vorhanden
mapfile -t existing < <(list_instances)
if [ "${#existing[@]}" -eq 0 ]; then
log_info "Lege erste Hotfolder-Instanz an."
create_instance || true
fi
while true; do
read -r -p "Weitere Instanz anlegen? [j/N]: " MORE
MORE="${MORE:-N}"
if [[ "$MORE" =~ ^[JjYy]$ ]]; then
create_instance || true
else
break
fi
done
echo
echo "=========================================="
echo " Fertig"
echo "=========================================="
show_existing_instances
echo " Logs: journalctl -u pdf-ocr-hotfolder@<instanz> -f"
echo " Neustart: systemctl restart pdf-ocr-hotfolder@<instanz>"
echo " Update: sudo ./update.sh"
echo