cinny: harden + version-control the webhook web-deploy (lotus_deploy.sh)
The live /usr/local/bin/lotus_deploy.sh (the `lotus-deploy` webhook target) was never under version control and had rotted into two deploy-killing bugs that froze chat.lotusguild.org on an old build: 1. CI gate: it waited on the WHOLE workflow run with a 15-min cap. Web CI shares the single act_runner with the slow Tauri desktop builds, so a web run could sit queued >15 min -> "result: timeout" -> deploy aborted. Now it gates only on the "Build & Quality Checks" commit-status context (build + unit tests), decoupled from "Trigger Desktop Build", and waits up to 45 min. 2. Dead element-call copy: `cp node_modules/@element-hq/element-call-embedded/...` under `set -e` aborted every deploy after the widget was forked to @lotusguild/element-call-embedded. The build already emits dist/public/ element-call; replaced the copy with a presence check. Also: rsync now excludes config.json so the app deploy stops clobbering the production runtime config (homeserver list / allowCustomHomeservers) that the matrix repo owns. lxc106-cinny.sh now installs this script (syntax-checked). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Executable
+123
@@ -0,0 +1,123 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
REPO="/opt/lotus-cinny"
|
||||
WEBROOT="/var/www/html"
|
||||
LOCKFILE="/tmp/lotus-deploy.lock"
|
||||
LOGFILE="/var/log/lotus-deploy.log"
|
||||
|
||||
# Prevent concurrent deploys
|
||||
exec 200>"$LOCKFILE"
|
||||
flock -n 200 || { echo "[$(date '+%Y-%m-%d %H:%M:%S')] Deploy already in progress, skipping." >> "$LOGFILE"; exit 0; }
|
||||
|
||||
exec >> "$LOGFILE" 2>&1
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ===== Deploy triggered ====="
|
||||
|
||||
# Load secrets (auth tokens etc — not in git)
|
||||
if [ -f /etc/lotus-deploy.env ]; then
|
||||
# shellcheck disable=SC1091
|
||||
set -a; source /etc/lotus-deploy.env; set +a
|
||||
fi
|
||||
|
||||
cd "$REPO"
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fetching origin/lotus..."
|
||||
git fetch --all
|
||||
COMMIT_SHA=$(git rev-parse origin/lotus)
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Commit: $COMMIT_SHA"
|
||||
|
||||
# ── CI gate ─────────────────────────────────────────────────────────────────
|
||||
# Wait for the web build+test CI to pass before deploying. We gate ONLY on the
|
||||
# "Build & Quality Checks" commit-status context (npm build + unit tests) — NOT
|
||||
# the whole workflow run. This decouples the web deploy from the unrelated
|
||||
# "Trigger Desktop Build" job and the slow downstream Tauri desktop builds that
|
||||
# share the act_runner: web CI can sit queued behind a 30-min desktop build, so
|
||||
# we keep waiting while the context is pending/absent, and only abort on an
|
||||
# explicit failure or the (generous) cap. The previous version gated on the
|
||||
# overall workflow run with a 15-min cap, so a web CI queued behind a desktop
|
||||
# build timed out -> "result: timeout" -> deploy aborted -> the site stayed
|
||||
# frozen on an old build for days.
|
||||
if [ -n "${GITEA_API_TOKEN:-}" ]; then
|
||||
GITEA_API="https://code.lotusguild.org/api/v1"
|
||||
REPO_PATH="LotusGuild/cinny"
|
||||
GATE_CONTEXT="Build & Quality Checks"
|
||||
MAX_WAIT=2700 # 45 min — web CI can queue behind long Tauri desktop builds
|
||||
POLL_INTERVAL=15
|
||||
elapsed=0
|
||||
ci_result=""
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Waiting for CI '$GATE_CONTEXT' on $COMMIT_SHA..."
|
||||
|
||||
while [ "$elapsed" -lt "$MAX_WAIT" ]; do
|
||||
state=$(curl -s -H "Authorization: token $GITEA_API_TOKEN" \
|
||||
"$GITEA_API/repos/$REPO_PATH/commits/$COMMIT_SHA/status" \
|
||||
| GATE="$GATE_CONTEXT" python3 -c "
|
||||
import json, os, sys
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print('pending'); sys.exit(0)
|
||||
gate = os.environ.get('GATE', '')
|
||||
for s in d.get('statuses', []):
|
||||
if gate in (s.get('context') or ''):
|
||||
print(s.get('status') or 'pending'); break
|
||||
else:
|
||||
print('pending')
|
||||
" 2>/dev/null || echo pending)
|
||||
|
||||
case "$state" in
|
||||
success) ci_result=success; break ;;
|
||||
failure|error) ci_result="$state"; break ;;
|
||||
esac
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] CI not yet passed (${elapsed}s elapsed, '$GATE_CONTEXT': ${state}), waiting..."
|
||||
sleep "$POLL_INTERVAL"
|
||||
elapsed=$((elapsed + POLL_INTERVAL))
|
||||
done
|
||||
|
||||
if [ "$ci_result" != "success" ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] CI did not pass (result: ${ci_result:-timeout}). Aborting deploy."
|
||||
exit 1
|
||||
fi
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] CI '$GATE_CONTEXT' passed. Proceeding with deploy."
|
||||
else
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] WARNING: GITEA_API_TOKEN not set, deploying without CI gate."
|
||||
fi
|
||||
|
||||
git reset --hard origin/lotus
|
||||
|
||||
# Tag this build with the exact commit so Sentry can link errors to source
|
||||
export VITE_APP_VERSION=$COMMIT_SHA
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Building commit $VITE_APP_VERSION..."
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Installing dependencies..."
|
||||
npm ci --ignore-scripts
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Building..."
|
||||
NODE_OPTIONS=--max_old_space_size=4096 npm run build
|
||||
|
||||
# The Element Call widget (the @lotusguild/element-call-embedded fork) is emitted
|
||||
# into dist/public/element-call by the build itself — no manual copy is needed.
|
||||
# (The old `cp node_modules/@element-hq/element-call-embedded/dist/.` step was a
|
||||
# deploy-killer: the package was forked to @lotusguild, so under `set -e` that
|
||||
# now-missing path aborted every deploy.) Verify the bundle actually landed
|
||||
# before publishing rather than blindly copying.
|
||||
if [ ! -f "$REPO/dist/public/element-call/index.html" ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: dist/public/element-call/ missing after build (check @lotusguild/element-call-embedded pin). Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Deploying to $WEBROOT..."
|
||||
# Exclude config.json: the production runtime config (homeserver list,
|
||||
# allowCustomHomeservers, etc.) is owned by the matrix repo and deployed to
|
||||
# /var/www/html/config.json by lxc106-cinny.sh. The build ships a DEV default
|
||||
# (allowCustomHomeservers:true); rsyncing it would clobber the production config
|
||||
# on every deploy. Keep the app bundle and the runtime config separate.
|
||||
rsync -a --delete --exclude config.json dist/ "$WEBROOT/"
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ===== Deploy complete ($VITE_APP_VERSION) ====="
|
||||
|
||||
# Inject runtime secrets that are never stored in git. If the production
|
||||
# config.json carries the "gifApiKey": "" placeholder, fill it from the env.
|
||||
if [ -n "${GIPHY_API_KEY:-}" ]; then
|
||||
sed -i "s|\"gifApiKey\": \"\"|\"gifApiKey\": \"$GIPHY_API_KEY\"|" "$WEBROOT/config.json"
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Injected GIPHY_API_KEY into config.json"
|
||||
fi
|
||||
Reference in New Issue
Block a user