mirror of
https://github.com/kmein/niveum
synced 2026-03-16 18:21:07 +01:00
24 lines
373 B
Bash
Executable File
24 lines
373 B
Bash
Executable File
#!/usr/bin/env nix-shell
|
|
#! nix-shell -i bash -p poppler_utils tesseract4
|
|
set -eu
|
|
|
|
pdf_path="$(realpath "$1")"
|
|
|
|
[ -f "$pdf_path" ] || {
|
|
echo "Usage: $0 FILE.pdf" >&2
|
|
exit 1
|
|
}
|
|
|
|
|
|
tmpdir="$(mktemp -d)"
|
|
trap 'rm -rf $tmpdir' EXIT
|
|
|
|
cd "$tmpdir"
|
|
|
|
pdftoppm -png "$pdf_path" pdf-ocr
|
|
for png in pdf-ocr*.png; do
|
|
tesseract "$png" "$png.txt" 2>/dev/null
|
|
done
|
|
|
|
cat pdf-ocr-*.txt
|