fix(ci): pipeline race-condition + health-check retry + maestro secrets
Hauptproblem: Webhook-Deploy (deploy.sh) und GH-Actions-Deploy (deploy-from-artifact.sh) liefen gleichzeitig → Race auf .output-staging und doppelter pm2-restart. Fixes: - deploy-from-artifact.sh: setzt .deploy-ga.lock (noclobber, mit PID) während Deploy läuft; stale locks werden erkannt und überschrieben - deploy.sh: prüft .deploy-ga.lock bei Start — wenn GH-Actions aktiv, sauberes exit 0 statt Kollision - Health-Check: Retry-Loop (12× × 5s = max 60s) statt einmaligem sleep 5; Infisical-Login + Nitro-Start braucht auf gestresstem Server bis 30s - maestro-cloud.yml: ungültiges `if: secrets.X != ''` entfernt (secrets in if-conditions sind in GH-Actions immer leer); stattdessen expliziter secrets-check als erster Step mit klarer Fehlermeldung - pnpm --prefer-offline in deploy-from-artifact.sh: nutzt Store-Cache - .gitignore: .deploy-ga.lock ergänzt Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
617312f367
commit
372aaa43dd
20
.github/workflows/deploy-admin-staging.yml
vendored
20
.github/workflows/deploy-admin-staging.yml
vendored
@ -110,11 +110,15 @@ jobs:
|
|||||||
|
|
||||||
- name: Health-Check (HTTP 3xx/200 = Server erreichbar)
|
- name: Health-Check (HTTP 3xx/200 = Server erreichbar)
|
||||||
run: |
|
run: |
|
||||||
sleep 5
|
for i in $(seq 1 12); do
|
||||||
STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
|
STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||||
https://admin.staging.rebreak.org/ || echo "000")
|
https://admin.staging.rebreak.org/ 2>/dev/null || echo "000")
|
||||||
echo "admin.staging.rebreak.org/ -> HTTP $STATUS"
|
echo "Attempt $i: admin.staging.rebreak.org/ -> HTTP $STATUS"
|
||||||
if [ "$STATUS" = "000" ] || [ "$STATUS" = "502" ] || [ "$STATUS" = "503" ]; then
|
if [ "$STATUS" != "000" ] && [ "$STATUS" != "502" ] && [ "$STATUS" != "503" ]; then
|
||||||
echo "FAIL: admin-staging nicht erreichbar (HTTP $STATUS)"
|
echo "Health-Check PASSED"
|
||||||
exit 1
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo "FAIL: admin-staging nicht erreichbar (HTTP $STATUS) nach 60s"
|
||||||
|
exit 1
|
||||||
|
|||||||
22
.github/workflows/deploy-staging.yml
vendored
22
.github/workflows/deploy-staging.yml
vendored
@ -137,11 +137,17 @@ jobs:
|
|||||||
|
|
||||||
- name: Health-Check (HTTP 401 = Server erreichbar + auth-protected)
|
- name: Health-Check (HTTP 401 = Server erreichbar + auth-protected)
|
||||||
run: |
|
run: |
|
||||||
sleep 5
|
# Infisical-Login + Nitro-Start + Cron-Init braucht auf gestresstem Server bis 30s.
|
||||||
STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
|
# Retry-Loop: bis zu 12x mit 5s Pause = max 60s warten.
|
||||||
https://staging.rebreak.org/api/auth/me || echo "000")
|
for i in $(seq 1 12); do
|
||||||
echo "staging.rebreak.org/api/auth/me -> HTTP $STATUS"
|
STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||||
if [ "$STATUS" != "401" ] && [ "$STATUS" != "200" ]; then
|
https://staging.rebreak.org/api/auth/me 2>/dev/null || echo "000")
|
||||||
echo "FAIL: erwartet 401/200, bekommen $STATUS"
|
echo "Attempt $i: staging.rebreak.org/api/auth/me -> HTTP $STATUS"
|
||||||
exit 1
|
if [ "$STATUS" = "401" ] || [ "$STATUS" = "200" ]; then
|
||||||
fi
|
echo "Health-Check PASSED"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo "FAIL: erwartet 401/200, zuletzt bekommen $STATUS nach 60s"
|
||||||
|
exit 1
|
||||||
|
|||||||
50
.github/workflows/maestro-cloud.yml
vendored
50
.github/workflows/maestro-cloud.yml
vendored
@ -1,9 +1,9 @@
|
|||||||
# Maestro Cloud — E2E template for rebreak-native.
|
# Maestro Cloud — E2E for rebreak-native.
|
||||||
# STATUS: TEMPLATE ONLY — not active. Requires User confirmation before enabling.
|
# STATUS: TEMPLATE ONLY — not active. Requires User confirmation before enabling.
|
||||||
#
|
#
|
||||||
# Trigger: manual dispatch OR PR to main (commented out — enable after User GO).
|
# Trigger: manual dispatch only (PR-trigger commented out — enable after User GO).
|
||||||
# Requires:
|
# Requires:
|
||||||
# - MAESTRO_CLOUD_API_KEY in GitHub Actions secrets
|
# - MAESTRO_CLOUD_API_KEY in GitHub Actions secrets (environment: staging)
|
||||||
# - EAS_TOKEN in GitHub Actions secrets
|
# - EAS_TOKEN in GitHub Actions secrets
|
||||||
# - E2E_TEST_USER + E2E_TEST_PASSWORD in GitHub Actions secrets
|
# - E2E_TEST_USER + E2E_TEST_PASSWORD in GitHub Actions secrets
|
||||||
# - Maestro Cloud account configured at mobile.dev
|
# - Maestro Cloud account configured at mobile.dev
|
||||||
@ -32,12 +32,28 @@ jobs:
|
|||||||
maestro-cloud:
|
maestro-cloud:
|
||||||
name: E2E (${{ inputs.platform || 'ios' }})
|
name: E2E (${{ inputs.platform || 'ios' }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
environment: staging
|
||||||
# Skip entirely if Maestro Cloud key is not configured —
|
|
||||||
# avoids CI failure on forks or before Cloud is set up.
|
|
||||||
if: ${{ secrets.MAESTRO_CLOUD_API_KEY != '' }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
- name: Check required secrets
|
||||||
|
env:
|
||||||
|
MAESTRO_CLOUD_API_KEY: ${{ secrets.MAESTRO_CLOUD_API_KEY }}
|
||||||
|
EAS_TOKEN: ${{ secrets.EAS_TOKEN }}
|
||||||
|
E2E_TEST_USER: ${{ secrets.E2E_TEST_USER }}
|
||||||
|
E2E_TEST_PASSWORD: ${{ secrets.E2E_TEST_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
missing=()
|
||||||
|
[ -z "$MAESTRO_CLOUD_API_KEY" ] && missing+=("MAESTRO_CLOUD_API_KEY")
|
||||||
|
[ -z "$EAS_TOKEN" ] && missing+=("EAS_TOKEN")
|
||||||
|
[ -z "$E2E_TEST_USER" ] && missing+=("E2E_TEST_USER")
|
||||||
|
[ -z "$E2E_TEST_PASSWORD" ] && missing+=("E2E_TEST_PASSWORD")
|
||||||
|
if [ ${#missing[@]} -gt 0 ]; then
|
||||||
|
echo "FATAL: Folgende Secrets fehlen in GitHub Actions (environment: staging):"
|
||||||
|
printf ' - %s\n' "${missing[@]}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "All required secrets present"
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
@ -48,16 +64,15 @@ jobs:
|
|||||||
|
|
||||||
- name: Setup pnpm
|
- name: Setup pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4
|
||||||
with:
|
|
||||||
version: 9
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pnpm install --frozen-lockfile
|
run: pnpm install --frozen-lockfile
|
||||||
working-directory: apps/rebreak-native
|
working-directory: apps/rebreak-native
|
||||||
|
|
||||||
# Build app via EAS — requires EAS_TOKEN secret and eas.json configured.
|
# Build app via EAS.
|
||||||
# Profile "preview" must produce a .ipa (iOS) or .apk (Android).
|
# Profile "preview" must produce a .ipa (iOS) or .apk (Android).
|
||||||
- name: Build with EAS
|
# eas.json in apps/rebreak-native/ muss "preview"-Profile definieren.
|
||||||
|
- name: Setup EAS
|
||||||
uses: expo/expo-github-action@v8
|
uses: expo/expo-github-action@v8
|
||||||
with:
|
with:
|
||||||
eas-version: latest
|
eas-version: latest
|
||||||
@ -72,7 +87,6 @@ jobs:
|
|||||||
--output ./build-artifact
|
--output ./build-artifact
|
||||||
working-directory: apps/rebreak-native
|
working-directory: apps/rebreak-native
|
||||||
|
|
||||||
# Install Maestro CLI
|
|
||||||
- name: Install Maestro CLI
|
- name: Install Maestro CLI
|
||||||
run: curl -Ls "https://get.maestro.mobile.dev" | bash
|
run: curl -Ls "https://get.maestro.mobile.dev" | bash
|
||||||
env:
|
env:
|
||||||
@ -81,14 +95,12 @@ jobs:
|
|||||||
- name: Add Maestro to PATH
|
- name: Add Maestro to PATH
|
||||||
run: echo "$HOME/.maestro/bin" >> $GITHUB_PATH
|
run: echo "$HOME/.maestro/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
# Upload build + run flows on Maestro Cloud
|
|
||||||
- name: Run Maestro Cloud
|
- name: Run Maestro Cloud
|
||||||
run: |
|
run: |
|
||||||
maestro cloud \
|
maestro cloud \
|
||||||
--apiKey ${{ secrets.MAESTRO_CLOUD_API_KEY }} \
|
--apiKey "${{ secrets.MAESTRO_CLOUD_API_KEY }}" \
|
||||||
--app ./build-artifact \
|
--app ./apps/rebreak-native/build-artifact \
|
||||||
--device ${{ inputs.platform || 'ios' }} \
|
--device "${{ inputs.platform || 'ios' }}" \
|
||||||
--env=E2E_TEST_USER=${{ secrets.E2E_TEST_USER }} \
|
--env=E2E_TEST_USER="${{ secrets.E2E_TEST_USER }}" \
|
||||||
--env=E2E_TEST_PASSWORD=${{ secrets.E2E_TEST_PASSWORD }} \
|
--env=E2E_TEST_PASSWORD="${{ secrets.E2E_TEST_PASSWORD }}" \
|
||||||
apps/rebreak-native/.maestro/
|
apps/rebreak-native/.maestro/
|
||||||
working-directory: ${{ github.workspace }}
|
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -36,3 +36,4 @@ xgit
|
|||||||
|
|
||||||
# Server-only deploy state (NICHT committen — wird vom deploy.sh gepflegt)
|
# Server-only deploy state (NICHT committen — wird vom deploy.sh gepflegt)
|
||||||
.last-deployed-sha
|
.last-deployed-sha
|
||||||
|
.deploy-ga.lock
|
||||||
|
|||||||
@ -11,6 +11,10 @@
|
|||||||
# - Atomic .output-staging-Replacement bleibt
|
# - Atomic .output-staging-Replacement bleibt
|
||||||
#
|
#
|
||||||
# Failure-Mode: Bei Migration-Fehler kein pm2-restart (Daten-Konsistenz-Schutz).
|
# Failure-Mode: Bei Migration-Fehler kein pm2-restart (Daten-Konsistenz-Schutz).
|
||||||
|
#
|
||||||
|
# Lockfile: setzt /srv/rebreak/.deploy-ga.lock waehrend Deploy laeuft.
|
||||||
|
# deploy.sh (webhook-trigger) respektiert diesen Lock und bricht ab.
|
||||||
|
# Verhindert Race-Condition wenn Webhook und GH-Actions gleichzeitig deployen.
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -19,6 +23,7 @@ APP_DIR="${REPO_ROOT}/backend"
|
|||||||
ARTIFACT="${APP_DIR}/.output-incoming.tar.gz"
|
ARTIFACT="${APP_DIR}/.output-incoming.tar.gz"
|
||||||
PM2_BIN="/root/.nvm/versions/node/v24.11.1/bin/pm2"
|
PM2_BIN="/root/.nvm/versions/node/v24.11.1/bin/pm2"
|
||||||
PNPM_BIN="/root/.nvm/versions/node/v24.11.1/bin/pnpm"
|
PNPM_BIN="/root/.nvm/versions/node/v24.11.1/bin/pnpm"
|
||||||
|
GA_LOCK="${REPO_ROOT}/.deploy-ga.lock"
|
||||||
|
|
||||||
log() { echo "[deploy-artifact] $(date '+%H:%M:%S') $*"; }
|
log() { echo "[deploy-artifact] $(date '+%H:%M:%S') $*"; }
|
||||||
log_err() { echo "[deploy-artifact:err] $(date '+%H:%M:%S') $*" >&2; }
|
log_err() { echo "[deploy-artifact:err] $(date '+%H:%M:%S') $*" >&2; }
|
||||||
@ -27,7 +32,22 @@ log "=== Rebreak Deploy-from-Artifact gestartet ==="
|
|||||||
|
|
||||||
export PATH="/root/.nvm/versions/node/v24.11.1/bin:$PATH"
|
export PATH="/root/.nvm/versions/node/v24.11.1/bin:$PATH"
|
||||||
|
|
||||||
# 0. Sanity-Check Artifact
|
# 0a. Exklusiv-Lock setzen (verhindert parallelen Webhook-Deploy)
|
||||||
|
if ! ( set -o noclobber; echo "$$" > "$GA_LOCK" ) 2>/dev/null; then
|
||||||
|
LOCK_PID=$(cat "$GA_LOCK" 2>/dev/null || echo "?")
|
||||||
|
# Stale lock (Prozess tot)? Dann ueberschreiben.
|
||||||
|
if ! kill -0 "$LOCK_PID" 2>/dev/null; then
|
||||||
|
log "Staler Lock von PID $LOCK_PID gefunden -- ueberschreibe"
|
||||||
|
echo "$$" > "$GA_LOCK"
|
||||||
|
else
|
||||||
|
log_err "Ein anderer Deploy laeuft bereits (PID $LOCK_PID) -- abort"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# Lock beim Beenden immer aufraumen
|
||||||
|
trap 'rm -f "$GA_LOCK"' EXIT
|
||||||
|
|
||||||
|
# 0b. Sanity-Check Artifact
|
||||||
[[ -f "$ARTIFACT" ]] || { log_err "Artifact $ARTIFACT fehlt -- abort"; exit 1; }
|
[[ -f "$ARTIFACT" ]] || { log_err "Artifact $ARTIFACT fehlt -- abort"; exit 1; }
|
||||||
|
|
||||||
# 1. Git pull (fuer scripts/-Updates + prisma/migrations + .last-deployed-sha)
|
# 1. Git pull (fuer scripts/-Updates + prisma/migrations + .last-deployed-sha)
|
||||||
@ -93,11 +113,13 @@ fi
|
|||||||
# 3. Runtime-Deps installieren (nur falls package.json/lockfile changed)
|
# 3. Runtime-Deps installieren (nur falls package.json/lockfile changed)
|
||||||
# Prisma-Client ist schon im Artifact baked-in via `prisma generate` auf dem Runner,
|
# Prisma-Client ist schon im Artifact baked-in via `prisma generate` auf dem Runner,
|
||||||
# aber Runtime-Module (z.B. @prisma/client native binaries) muessen lokal sein.
|
# aber Runtime-Module (z.B. @prisma/client native binaries) muessen lokal sein.
|
||||||
|
# --prefer-offline: nutzt pnpm-Store-Cache wenn moeglich (kein neuer Download).
|
||||||
|
# Store wächst unbegrenzt -- prunen via: pnpm store prune (z.B. monatlich als Cron).
|
||||||
log "Step 3: pnpm install (runtime-deps)..."
|
log "Step 3: pnpm install (runtime-deps)..."
|
||||||
cd "${REPO_ROOT}"
|
cd "${REPO_ROOT}"
|
||||||
CI=true "${PNPM_BIN}" install --frozen-lockfile 2>&1 || {
|
CI=true "${PNPM_BIN}" install --frozen-lockfile --prefer-offline 2>&1 || {
|
||||||
log_err "frozen-lockfile fehlgeschlagen, fallback ohne frozen..."
|
log_err "frozen-lockfile fehlgeschlagen, fallback ohne frozen..."
|
||||||
CI=true "${PNPM_BIN}" install --no-frozen-lockfile 2>&1
|
CI=true "${PNPM_BIN}" install --no-frozen-lockfile --prefer-offline 2>&1
|
||||||
}
|
}
|
||||||
log "pnpm install done"
|
log "pnpm install done"
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,10 @@
|
|||||||
# Clone: git@github.com:RaynisDev/rebreak.git
|
# Clone: git@github.com:RaynisDev/rebreak.git
|
||||||
# Backend: /srv/rebreak/backend (standalone Nitro, package: rebreak-backend)
|
# Backend: /srv/rebreak/backend (standalone Nitro, package: rebreak-backend)
|
||||||
#
|
#
|
||||||
|
# WICHTIG: GitHub Actions ist der primaere Deploy-Weg (deploy-from-artifact.sh).
|
||||||
|
# Dieses Script ist Fallback/Legacy-Pfad und wird NICHT ausgefuehrt wenn
|
||||||
|
# GH-Actions gerade deployed (.deploy-ga.lock).
|
||||||
|
#
|
||||||
# Ablauf:
|
# Ablauf:
|
||||||
# 1. Git pull (via Deploy-Key)
|
# 1. Git pull (via Deploy-Key)
|
||||||
# 2. pnpm install --frozen-lockfile (mit hoisted node-linker via .npmrc)
|
# 2. pnpm install --frozen-lockfile (mit hoisted node-linker via .npmrc)
|
||||||
@ -25,6 +29,7 @@ APP_DIR="${REPO_ROOT}/backend"
|
|||||||
NODE_BIN="/root/.nvm/versions/node/v24.11.1/bin/node"
|
NODE_BIN="/root/.nvm/versions/node/v24.11.1/bin/node"
|
||||||
PNPM_BIN="/root/.nvm/versions/node/v24.11.1/bin/pnpm"
|
PNPM_BIN="/root/.nvm/versions/node/v24.11.1/bin/pnpm"
|
||||||
PM2_BIN="/root/.nvm/versions/node/v24.11.1/bin/pm2"
|
PM2_BIN="/root/.nvm/versions/node/v24.11.1/bin/pm2"
|
||||||
|
GA_LOCK="${REPO_ROOT}/.deploy-ga.lock"
|
||||||
|
|
||||||
log() { echo "[deploy] $(date '+%H:%M:%S') $*"; }
|
log() { echo "[deploy] $(date '+%H:%M:%S') $*"; }
|
||||||
log_err() { echo "[deploy:err] $(date '+%H:%M:%S') $*" >&2; }
|
log_err() { echo "[deploy:err] $(date '+%H:%M:%S') $*" >&2; }
|
||||||
@ -34,6 +39,18 @@ log "=== Rebreak Deploy gestartet (backend/-Layout) ==="
|
|||||||
# 0. Sicherstellen dass PATH stimmt
|
# 0. Sicherstellen dass PATH stimmt
|
||||||
export PATH="/root/.nvm/versions/node/v24.11.1/bin:$PATH"
|
export PATH="/root/.nvm/versions/node/v24.11.1/bin:$PATH"
|
||||||
|
|
||||||
|
# 0a. GH-Actions-Lock pruefen: wenn deploy-from-artifact.sh laeuft, nicht doppeln.
|
||||||
|
if [[ -f "$GA_LOCK" ]]; then
|
||||||
|
LOCK_PID=$(cat "$GA_LOCK" 2>/dev/null || echo "")
|
||||||
|
if [[ -n "$LOCK_PID" ]] && kill -0 "$LOCK_PID" 2>/dev/null; then
|
||||||
|
log "GitHub-Actions-Deploy laeuft gerade (PID $LOCK_PID) -- Webhook-Deploy abgebrochen (kein Fehler)"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
log "Staler GA-Lock gefunden (PID $LOCK_PID) -- wird ignoriert"
|
||||||
|
rm -f "$GA_LOCK"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# 1. Git pull via Deploy-Key (SSH ist konfiguriert in /root/.ssh/config)
|
# 1. Git pull via Deploy-Key (SSH ist konfiguriert in /root/.ssh/config)
|
||||||
log "Step 1: git pull..."
|
log "Step 1: git pull..."
|
||||||
cd "${REPO_ROOT}"
|
cd "${REPO_ROOT}"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user