mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-05-09 01:58:52 +02:00
tools.func: add setup_nltk as new function (#14314)
This commit is contained in:
committed by
GitHub
parent
24fbf24c6d
commit
02eaf288bf
@@ -81,11 +81,7 @@ STARTEOF
|
||||
cp -r /opt/mealie/frontend/dist/* /opt/mealie/mealie/frontend/
|
||||
msg_ok "Copied Frontend"
|
||||
|
||||
msg_info "Updating NLTK Data"
|
||||
mkdir -p /nltk_data/
|
||||
cd /opt/mealie
|
||||
$STD uv run python -m nltk.downloader -d /nltk_data averaged_perceptron_tagger_eng
|
||||
msg_ok "Updated NLTK Data"
|
||||
setup_nltk "averaged_perceptron_tagger_eng" "/nltk_data"
|
||||
|
||||
msg_info "Starting Service"
|
||||
systemctl start mealie
|
||||
|
||||
@@ -164,13 +164,7 @@ function update_script() {
|
||||
fi
|
||||
fi
|
||||
|
||||
msg_info "Updating NLTK Data"
|
||||
cd /opt/paperless
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data snowball_data
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data stopwords
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data punkt_tab ||
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data punkt
|
||||
msg_ok "Updated NLTK Data"
|
||||
setup_nltk "snowball_data stopwords punkt_tab" "/usr/share/nltk_data"
|
||||
|
||||
msg_info "Starting all Paperless-ngx Services"
|
||||
systemctl start paperless-consumer paperless-webserver paperless-scheduler paperless-task-queue
|
||||
|
||||
@@ -47,8 +47,7 @@ msg_info "Setting up KitchenOwl"
|
||||
cd /opt/kitchenowl/backend
|
||||
$STD uv sync --no-dev
|
||||
sed -i 's/default=True/default=False/' /opt/kitchenowl/backend/wsgi.py
|
||||
mkdir -p /nltk_data
|
||||
$STD uv run python -m nltk.downloader -d /nltk_data averaged_perceptron_tagger_eng
|
||||
setup_nltk "averaged_perceptron_tagger_eng" "/nltk_data"
|
||||
JWT_SECRET=$(openssl rand -hex 32)
|
||||
mkdir -p /opt/kitchenowl/data
|
||||
cat <<EOF >/opt/kitchenowl/kitchenowl.env
|
||||
|
||||
@@ -55,11 +55,7 @@ mkdir -p /opt/mealie/mealie/frontend
|
||||
cp -r /opt/mealie/frontend/dist/* /opt/mealie/mealie/frontend/
|
||||
msg_ok "Copied Frontend"
|
||||
|
||||
msg_info "Downloading NLTK Data"
|
||||
mkdir -p /nltk_data/
|
||||
cd /opt/mealie
|
||||
$STD uv run python -m nltk.downloader -d /nltk_data averaged_perceptron_tagger_eng
|
||||
msg_ok "Downloaded NLTK Data"
|
||||
setup_nltk "averaged_perceptron_tagger_eng" "/nltk_data"
|
||||
|
||||
msg_info "Writing Environment File"
|
||||
SECRET=$(openssl rand -hex 32)
|
||||
|
||||
@@ -94,18 +94,12 @@ user.save()
|
||||
EOF
|
||||
msg_ok "Set up admin Paperless-ngx User & Password"
|
||||
|
||||
msg_info "Installing Natural Language Toolkit (Patience)"
|
||||
cd /opt/paperless
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data snowball_data
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data stopwords
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data punkt_tab ||
|
||||
$STD uv run python -m nltk.downloader -d /usr/share/nltk_data punkt
|
||||
setup_nltk "snowball_data stopwords punkt_tab" "/usr/share/nltk_data"
|
||||
for policy_file in /etc/ImageMagick-6/policy.xml /etc/ImageMagick-7/policy.xml; do
|
||||
if [[ -f "$policy_file" ]]; then
|
||||
sed -i -e 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' "$policy_file"
|
||||
fi
|
||||
done
|
||||
msg_ok "Installed Natural Language Toolkit"
|
||||
|
||||
msg_info "Creating Services"
|
||||
cat <<EOF >/etc/systemd/system/paperless-scheduler.service
|
||||
|
||||
@@ -2095,10 +2095,10 @@ get_latest_gh_tag() {
|
||||
local count
|
||||
count=$(jq 'length' "$temp_file" 2>/dev/null || echo 0)
|
||||
if [[ "$count" -gt 0 ]]; then
|
||||
tag=$(jq -r '.[].ref' "$temp_file" \
|
||||
| sed 's|^refs/tags/||' \
|
||||
| sort -V \
|
||||
| tail -n1)
|
||||
tag=$(jq -r '.[].ref' "$temp_file" |
|
||||
sed 's|^refs/tags/||' |
|
||||
sort -V |
|
||||
tail -n1)
|
||||
fi
|
||||
else
|
||||
# No prefix: just take the first (newest) tag from /tags
|
||||
@@ -9439,3 +9439,73 @@ function fetch_and_deploy_gl_release() {
|
||||
msg_ok "Deployed: $app ($version)"
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Download NLTK data packages directly from GitHub, bypassing Python.
|
||||
# Avoids CPU-instruction failures (SIGILL) on older hardware lacking AVX.
|
||||
#
|
||||
# Usage:
|
||||
# setup_nltk "averaged_perceptron_tagger_eng" "/nltk_data"
|
||||
# setup_nltk "snowball_data stopwords punkt_tab" "/usr/share/nltk_data"
|
||||
#
|
||||
# Parameters:
|
||||
# $1 - Space-separated list of NLTK package IDs
|
||||
# $2 - Target directory (default: /usr/share/nltk_data)
|
||||
#
|
||||
# Returns: 0 on success, non-zero if any package failed
|
||||
# ------------------------------------------------------------------------------
|
||||
function setup_nltk() {
|
||||
local packages="${1:?setup_nltk requires at least one package name}"
|
||||
local target_dir="${2:-/usr/share/nltk_data}"
|
||||
local NLTK_INDEX_URL="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml"
|
||||
local index_xml rc=0
|
||||
|
||||
ensure_dependencies unzip
|
||||
|
||||
index_xml=$(curl_with_retry "$NLTK_INDEX_URL" "-") || {
|
||||
msg_error "Failed to fetch NLTK package index"
|
||||
return 1
|
||||
}
|
||||
|
||||
local pkg
|
||||
for pkg in $packages; do
|
||||
msg_info "Downloading NLTK: $pkg"
|
||||
local pkg_line subdir pkg_url do_unzip tmp_zip
|
||||
|
||||
pkg_line=$(echo "$index_xml" | grep "id=\"${pkg}\"" | head -1)
|
||||
if [[ -z "$pkg_line" ]]; then
|
||||
msg_error "NLTK package not found in index: $pkg"
|
||||
rc=1
|
||||
continue
|
||||
fi
|
||||
|
||||
subdir=$(echo "$pkg_line" | grep -oP 'subdir="\K[^"]+')
|
||||
pkg_url=$(echo "$pkg_line" | grep -oP 'url="\K[^"]+')
|
||||
do_unzip=$(echo "$pkg_line" | grep -oP 'unzip="\K[^"]+')
|
||||
|
||||
if [[ -z "$subdir" || -z "$pkg_url" ]]; then
|
||||
msg_error "Could not parse NLTK index entry for: $pkg"
|
||||
rc=1
|
||||
continue
|
||||
fi
|
||||
|
||||
mkdir -p "${target_dir}/${subdir}"
|
||||
tmp_zip=$(mktemp --suffix=.zip)
|
||||
|
||||
if CURL_TIMEOUT=120 curl_with_retry "$pkg_url" "$tmp_zip"; then
|
||||
if [[ "$do_unzip" == "1" ]]; then
|
||||
$STD unzip -q -o "$tmp_zip" -d "${target_dir}/${subdir}/"
|
||||
rm -f "$tmp_zip"
|
||||
else
|
||||
mv "$tmp_zip" "${target_dir}/${subdir}/${pkg}.zip"
|
||||
fi
|
||||
msg_ok "Downloaded NLTK: $pkg"
|
||||
else
|
||||
msg_error "Failed to download NLTK package: $pkg"
|
||||
rm -f "$tmp_zip"
|
||||
rc=1
|
||||
fi
|
||||
done
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user