feat(deploy): GitHub-Actions Build+Deploy-Pipeline für rebreak-staging
CX23 (4GB RAM) OOM'd am 2026-05-06 während webhook-getriggertem `pnpm build` (Heap-Limit 1.5GB überschritten). Build raus aus Server, GitHub-Runner (7GB RAM) übernimmt — Server deployed nur noch Artifact via scp + atomic-mv + pm2 restart. - .github/workflows/deploy-staging.yml: 2-Job (build + deploy via SSH-Artifact-Push) - scripts/deploy-from-artifact.sh: Server-Script mit Migration-Detection + atomic-mv - ops/GITHUB_ACTIONS_PIPELINE.md: Architektur-Doku + Cheatsheet Coexistence: alter rebreak-webhook bleibt als Failsafe, wird nach 5+ erfolgreichen GA-Runs deaktiviert. Erster Run: Webhook temporär gestoppt für sauberen Test. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b38bf176b9
commit
87438ede8e
122
.github/workflows/deploy-staging.yml
vendored
Normal file
122
.github/workflows/deploy-staging.yml
vendored
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
name: Deploy Staging
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# Build + Deploy-Pipeline fuer rebreak-staging.
|
||||||
|
#
|
||||||
|
# Warum GitHub-Actions statt Server-Build:
|
||||||
|
# - Hetzner CX23 (4 GB RAM) gerissen am 2026-05-06 das 1.5 GB Heap-Limit
|
||||||
|
# waehrend `pnpm build` (OOM, "ineffective mark-compacts near heap limit").
|
||||||
|
# - GitHub Actions Runner haben 7 GB RAM und sind frei (public-repo) bzw.
|
||||||
|
# monatliches Free-Quota (private-repo).
|
||||||
|
# - Server bleibt unangetastet -- nur Artifact-Extract + pm2 restart.
|
||||||
|
#
|
||||||
|
# Pattern (uebernommen aus trucko-monorepo .github/workflows/android.yml):
|
||||||
|
# - SSH-Key in GitHub-Secret (HETZNER_SSH_KEY)
|
||||||
|
# - ssh-keyscan + scp + ssh fuer Server-Side-Trigger
|
||||||
|
# - Concurrency-Group verhindert parallele Deploys
|
||||||
|
#
|
||||||
|
# Coexistence: scripts/deploy.sh + scripts/deploy-webhook/server.mjs laufen
|
||||||
|
# parallel auf dem Server (Failsafe). User entscheidet wann der Webhook
|
||||||
|
# abgeschaltet wird (siehe ops/GITHUB_ACTIONS_PIPELINE.md).
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: deploy-staging
|
||||||
|
cancel-in-progress: false # parallele deploys queueen, nicht canceln
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# ── 1. Build auf GitHub-Runner (7 GB RAM, kein OOM-Risiko) ──────────────────
|
||||||
|
build:
|
||||||
|
name: Build backend (Nitro)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # full history fuer migrate-detection im Server-Script
|
||||||
|
|
||||||
|
- uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 10
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 24.11.1 # MATCH Hetzner-Node-Version exakt
|
||||||
|
cache: pnpm
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pnpm install --frozen-lockfile
|
||||||
|
|
||||||
|
- name: Build backend (prisma generate + nitro build)
|
||||||
|
working-directory: backend
|
||||||
|
run: pnpm build
|
||||||
|
|
||||||
|
- name: Tar artifact
|
||||||
|
run: tar czf backend-output.tar.gz -C backend/.output .
|
||||||
|
|
||||||
|
- name: Upload artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: backend-output
|
||||||
|
path: backend-output.tar.gz
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
# ── 2. Deploy: Artifact zum Hetzner pushen + extract + pm2 restart ──────────
|
||||||
|
deploy:
|
||||||
|
name: Deploy zu Hetzner
|
||||||
|
needs: build
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
environment: staging # GitHub-Environment fuer Secret-Isolation
|
||||||
|
steps:
|
||||||
|
- name: Download artifact
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: backend-output
|
||||||
|
|
||||||
|
- name: Setup SSH
|
||||||
|
env:
|
||||||
|
SSH_PRIVATE_KEY: ${{ secrets.HETZNER_SSH_KEY }}
|
||||||
|
SSH_HOST: ${{ secrets.HETZNER_HOST }}
|
||||||
|
run: |
|
||||||
|
if [ -z "$SSH_PRIVATE_KEY" ] || [ -z "$SSH_HOST" ]; then
|
||||||
|
echo "FATAL: HETZNER_SSH_KEY oder HETZNER_HOST nicht gesetzt"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
printf '%s\n' "$SSH_PRIVATE_KEY" > ~/.ssh/id_ed25519
|
||||||
|
chmod 600 ~/.ssh/id_ed25519
|
||||||
|
ssh-keyscan -H "$SSH_HOST" >> ~/.ssh/known_hosts
|
||||||
|
|
||||||
|
- name: Upload artifact zu Hetzner
|
||||||
|
env:
|
||||||
|
SSH_HOST: ${{ secrets.HETZNER_HOST }}
|
||||||
|
SSH_USER: ${{ secrets.HETZNER_USER }}
|
||||||
|
run: |
|
||||||
|
scp -i ~/.ssh/id_ed25519 backend-output.tar.gz \
|
||||||
|
"$SSH_USER@$SSH_HOST:/srv/rebreak/backend/.output-incoming.tar.gz"
|
||||||
|
|
||||||
|
- name: Server-side deploy (extract + migrate + pm2 restart)
|
||||||
|
env:
|
||||||
|
SSH_HOST: ${{ secrets.HETZNER_HOST }}
|
||||||
|
SSH_USER: ${{ secrets.HETZNER_USER }}
|
||||||
|
run: |
|
||||||
|
ssh -i ~/.ssh/id_ed25519 "$SSH_USER@$SSH_HOST" \
|
||||||
|
'bash /srv/rebreak/scripts/deploy-from-artifact.sh'
|
||||||
|
|
||||||
|
- name: Health-Check (HTTP 401 = Server erreichbar + auth-protected)
|
||||||
|
run: |
|
||||||
|
sleep 5
|
||||||
|
STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||||
|
https://staging.rebreak.org/api/auth/me || echo "000")
|
||||||
|
echo "staging.rebreak.org/api/auth/me -> HTTP $STATUS"
|
||||||
|
if [ "$STATUS" != "401" ] && [ "$STATUS" != "200" ]; then
|
||||||
|
echo "FAIL: erwartet 401/200, bekommen $STATUS"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
215
ops/GITHUB_ACTIONS_PIPELINE.md
Normal file
215
ops/GITHUB_ACTIONS_PIPELINE.md
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
# GitHub-Actions Build+Deploy-Pipeline -- rebreak-monorepo
|
||||||
|
|
||||||
|
**Owner:** backyard (Infrastruktur-Architekt)
|
||||||
|
**Stand:** 2026-05-07
|
||||||
|
**Status:** Files geschrieben (Workflow + Server-Script + dieses Doc) -- noch NICHT auf main gepushed, GitHub-Secrets noch NICHT angelegt.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Warum diese Pipeline jetzt
|
||||||
|
|
||||||
|
Am 2026-05-06 hat der Hetzner CX23 (4 GB RAM, 1.5 GB Heap-Limit fuer Node) waehrend des Webhook-getriggerten `pnpm build` OOM'd:
|
||||||
|
|
||||||
|
```
|
||||||
|
FATAL ERROR: Ineffective mark-compacts near heap limit
|
||||||
|
Allocation failed - JavaScript heap out of memory
|
||||||
|
```
|
||||||
|
|
||||||
|
Der laufende `rebreak-staging`-Prozess hat den Crash UEBERLEBT (pm2 nicht restartet, alte `.output-staging` aktiv, HTTP 401 ok). Aber der naechste Push waere genauso gefailt -- und irgendwann wuerde der pm2-restart auf eine korrupt-extrahierte `.output-staging` greifen und in den Crash-Loop gehen (siehe Backyard-Memory `feedback_deploy_workflow.md`).
|
||||||
|
|
||||||
|
User-Decision (2026-05-08): **GitHub Actions baut, Hetzner deployt nur noch das Artifact.** Server bleibt CX23 (keine Cost-Hochskalierung weil Rebreak noch nicht monetarisiert).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recon: Trucko-Pattern (Vorlagen)
|
||||||
|
|
||||||
|
`/Users/chahinebrini/mono/trucko-monorepo/.github/workflows/` -- alle drei Workflows sind aktuell `.disabled`, aber der Code laeuft als Vorlage:
|
||||||
|
|
||||||
|
| File | Was wir uebernehmen |
|
||||||
|
|---|---|
|
||||||
|
| `android.yml.disabled` | **Hauptvorlage** -- SSH-Setup-Pattern (`HETZNER_SSH_KEY` / `HETZNER_HOST` / `HETZNER_USER` als GitHub-Secrets), `ssh-keyscan -H`, `scp` zum Server, `mkdir -p ~/.ssh`, `printf '%s\n' "$SSH_PRIVATE_KEY"`. Plus die concurrency-group + workflow_dispatch + Artifact-Upload-Idee |
|
||||||
|
| `ci.yml.disabled` | Aufbau-Pattern: `pnpm/action-setup` + `setup-node` mit `cache: pnpm` + `pnpm install --frozen-lockfile`. Plus die `concurrency: cancel-in-progress` Idee (bei uns aber `false`, weil Deploys nicht abgebrochen werden duerfen) |
|
||||||
|
| `e2e.yml.disabled` | Infisical-Secret-Loading-Pattern (falls wir spaeter Test-Jobs brauchen). Aktuell NICHT relevant -- Tests kommen in eigenem Workflow wenn Coverage existiert |
|
||||||
|
|
||||||
|
**Wir schreiben keinen `ci.yml`-Klon** -- Phase 6 explizit: keine Tests in der Pipeline (Ahmed's `TESTING_STATE.md` hat Coverage 0%).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Neue Pipeline-Architektur
|
||||||
|
|
||||||
|
```
|
||||||
|
GitHub-Push (main)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
.github/workflows/deploy-staging.yml
|
||||||
|
|
|
||||||
|
+--[Job 1: build]----------------------+
|
||||||
|
| ubuntu-latest (7 GB RAM) |
|
||||||
|
| - checkout (fetch-depth=0) |
|
||||||
|
| - pnpm/setup-node@v4 (24.11.1) |
|
||||||
|
| - pnpm install --frozen-lockfile |
|
||||||
|
| - cd backend && pnpm build |
|
||||||
|
| (= prisma generate + nitro build) |
|
||||||
|
| - tar czf backend-output.tar.gz |
|
||||||
|
| - upload-artifact@v4 (7d retention) |
|
||||||
|
+--------------------------------------+
|
||||||
|
|
|
||||||
|
v
|
||||||
|
+--[Job 2: deploy]---------------------+
|
||||||
|
| ubuntu-latest |
|
||||||
|
| environment: staging |
|
||||||
|
| - download-artifact@v4 |
|
||||||
|
| - SSH-Setup mit HETZNER_SSH_KEY |
|
||||||
|
| - scp tar.gz -> /srv/rebreak/ |
|
||||||
|
| backend/.output-incoming.tar.gz |
|
||||||
|
| - ssh -> deploy-from-artifact.sh |
|
||||||
|
| - health-check curl /api/auth/me |
|
||||||
|
| (erwartet HTTP 401) |
|
||||||
|
+--------------------------------------+
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Hetzner CX23 (Server)
|
||||||
|
/srv/rebreak/scripts/deploy-from-artifact.sh
|
||||||
|
|
|
||||||
|
+-- git pull (fuer migrations + scripts)
|
||||||
|
+-- prisma migrate deploy (wenn schema/migrations changed)
|
||||||
|
+-- pnpm install --frozen-lockfile (runtime-deps)
|
||||||
|
+-- tar xzf -> .output-staging-new -> mv .output-staging
|
||||||
|
+-- pm2 restart rebreak-staging --update-env
|
||||||
|
+-- echo SHA > .last-deployed-sha
|
||||||
|
```
|
||||||
|
|
||||||
|
**Server baut nicht mehr selbst.** Build ist aus dem Server raus -- OOM-Risiko gelo:est.
|
||||||
|
|
||||||
|
### Files in diesem PR
|
||||||
|
|
||||||
|
- `.github/workflows/deploy-staging.yml` (neu)
|
||||||
|
- `scripts/deploy-from-artifact.sh` (neu, +x via git nach commit)
|
||||||
|
- `ops/GITHUB_ACTIONS_PIPELINE.md` (dieses Doc)
|
||||||
|
|
||||||
|
`scripts/deploy.sh` und `scripts/deploy-webhook/server.mjs` **bleiben unveraendert** (Coexistence -- siehe unten).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Required GitHub-Secrets
|
||||||
|
|
||||||
|
User muss in **GitHub Repo Settings -> Environments -> staging -> Add Secret**:
|
||||||
|
|
||||||
|
| Secret-Name | Beschreibung | Quelle |
|
||||||
|
|---|---|---|
|
||||||
|
| `HETZNER_SSH_KEY` | Privater SSH-Key (ed25519) der zu `/root/.ssh/authorized_keys` auf 49.13.55.22 pusht. **Empfehlung:** neuen Deploy-Key generieren (`ssh-keygen -t ed25519 -f ~/.ssh/rebreak-deploy -C "github-actions@rebreak"`), public part in `/root/.ssh/authorized_keys` auf Hetzner anhaengen, private part als Secret. **Recycling-Option:** falls der trucko-Workflow schon einen Hetzner-Deploy-Key hatte (gleiche Zielmaschine api.trucko.org bzw. heute 49.13.55.22), kann der wiederverwendet werden -- Doku-Quelle: trucko `.github/workflows/android.yml.disabled` Zeile 134 nutzt `HETZNER_SSH_KEY` ebenfalls | User generiert |
|
||||||
|
| `HETZNER_HOST` | `staging.rebreak.org` (resolved direkt auf 49.13.55.22, kein Cloudflare-Proxy). **NICHT** `api.trucko.org` -- das zeigt auf 128.140.47.53 (anderen, alten shared-Server). DNS-Layer-Indirection bevorzugt damit Server-Migration ohne Secret-Rotation moeglich ist | Statisch |
|
||||||
|
| `HETZNER_USER` | `root` | Statisch |
|
||||||
|
|
||||||
|
**Warum Environment "staging" und nicht Repo-Secrets:**
|
||||||
|
|
||||||
|
- Environment-Secrets sind nur fuer Jobs mit `environment: staging` zugaenglich -- Build-Job (ohne environment-Tag) sieht sie nicht. Saubere Privilege-Separation.
|
||||||
|
- Spaeter beim `deploy-prod.yml` legen wir Environment "production" mit eigener `HETZNER_SSH_KEY_PROD` an (oder gleiches Key-Pair, je nach User-Praeferenz).
|
||||||
|
- Environment-Settings koennen "Required reviewers" haben -- fuer `production` empfohlen (manuelle Approval vor jedem Deploy), fuer `staging` nicht noetig.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Required User-Actions vor erstem GA-Run
|
||||||
|
|
||||||
|
In dieser Reihenfolge:
|
||||||
|
|
||||||
|
1. **Deploy-Key generieren + auf Server pushen:**
|
||||||
|
```bash
|
||||||
|
# Lokal:
|
||||||
|
ssh-keygen -t ed25519 -f ~/.ssh/rebreak-deploy -C "github-actions@rebreak" -N ""
|
||||||
|
cat ~/.ssh/rebreak-deploy.pub
|
||||||
|
# Output kopieren, dann:
|
||||||
|
ssh root@49.13.55.22 "echo '<paste>' >> /root/.ssh/authorized_keys"
|
||||||
|
# SSH-Test:
|
||||||
|
ssh -i ~/.ssh/rebreak-deploy root@49.13.55.22 "whoami" # erwartet: root
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Server-Script deployen** (einmalig, ueber den noch laufenden Webhook):
|
||||||
|
- PR mit den drei Files mergen -> Webhook triggert `scripts/deploy.sh` -> der pullt das neue Repo inklusive `scripts/deploy-from-artifact.sh`. Danach manuell `chmod +x` via SSH:
|
||||||
|
```bash
|
||||||
|
ssh root@49.13.55.22 "chmod +x /srv/rebreak/scripts/deploy-from-artifact.sh"
|
||||||
|
```
|
||||||
|
- **Alternative:** `chmod +x` lokal vor dem Commit setzen (`git update-index --chmod=+x scripts/deploy-from-artifact.sh`) -- dann bringt git den Modus mit. **Empfohlen** weil reproducibel.
|
||||||
|
|
||||||
|
3. **GitHub-Environment "staging" anlegen:**
|
||||||
|
- Repo Settings -> Environments -> New environment -> Name: `staging`
|
||||||
|
- Add Secret: `HETZNER_SSH_KEY` (Inhalt von `~/.ssh/rebreak-deploy`)
|
||||||
|
- Add Secret: `HETZNER_HOST` (`49.13.55.22`)
|
||||||
|
- Add Secret: `HETZNER_USER` (`root`)
|
||||||
|
- Optional: "Wait timer 0 min" + "Required reviewers: none" fuer staging.
|
||||||
|
|
||||||
|
4. **Manuell triggern fuer ersten Test-Run:**
|
||||||
|
- Repo -> Actions -> "Deploy Staging" -> Run workflow -> Branch main -> Run.
|
||||||
|
- Erwartung: Build-Job 3-5 min, Deploy-Job 1-2 min, Health-Check passes.
|
||||||
|
|
||||||
|
5. **Smoke-Test via curl:**
|
||||||
|
```bash
|
||||||
|
curl -sS -o /dev/null -w '%{http_code}\n' https://staging.rebreak.org/api/auth/me
|
||||||
|
# erwartet: 401
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Coexistence-Strategie: Webhook bleibt parallel
|
||||||
|
|
||||||
|
Empfehlung: **(b) parallel lassen fuer 1-2 Wochen als Failsafe**, dann (a) Webhook-Auto-Deploy DEAKTIVIEREN sobald 5+ erfolgreiche GA-Deploys ohne Issues durchgelaufen sind.
|
||||||
|
|
||||||
|
**Warum nicht direkt (a) abschalten:**
|
||||||
|
|
||||||
|
- Webhook-Listener auf Hetzner laeuft seit URL-Fix stabil (2026-05-06 OOM-Event hat ihn nicht gekillt -- Listener selbst hat 60 MB RAM-Footprint, OOM-Killer hat den Build-Prozess geholt nicht den Listener).
|
||||||
|
- GitHub-Actions ist eine neue Komponente -- bevor wir das alte Sicherheitsnetz wegnehmen, wollen wir mehrere erfolgreiche Runs sehen.
|
||||||
|
- Beide Pipelines schreiben in `.output-staging` mit atomic-mv -- kein Race wenn nicht gleichzeitig getriggert. **Risk:** falls Webhook + GA gleichzeitig laufen (Push-Event fuert beide), gibt es zwei pnpm-installs nacheinander -- nicht schlimm, aber un-elegant.
|
||||||
|
- **Mitigation:** im `scripts/deploy.sh` einen Soft-Bail einbauen: wenn `.output-incoming.tar.gz` neuer als 60 s -> "GA-Deploy laeuft, Webhook skipped, exit 0". Optionaler Verbessungsschritt, NICHT in diesem PR.
|
||||||
|
|
||||||
|
**Cutover-Plan (User-Decision in 1-2 Wochen):**
|
||||||
|
|
||||||
|
- Webhook-Deaktivierung: GitHub Repo Settings -> Webhooks -> Edit `https://staging.rebreak.org/webhook` -> "Active" Checkbox aus.
|
||||||
|
- pm2 stop: `ssh root@49.13.55.22 "pm2 stop rebreak-webhook && pm2 save"`.
|
||||||
|
- `scripts/deploy.sh` als Reference behalten (oder nach `scripts/legacy/deploy.sh` archivieren).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Migration zu Production-Pipeline (spaeter)
|
||||||
|
|
||||||
|
Nicht in dieser Phase. Vorgeplant:
|
||||||
|
|
||||||
|
- Eigener Workflow `.github/workflows/deploy-prod.yml`
|
||||||
|
- Trigger: nur via `workflow_dispatch` (kein automatischer Push-Trigger -- prod ist nicht main, prod ist tag/release-driven)
|
||||||
|
- Environment: `production` mit Required-Reviewers + 5-min-Wait-Timer
|
||||||
|
- Server-Script: `scripts/deploy-from-artifact-prod.sh` -- analog, aber zielt auf `.output-prod` und `pm2 restart rebreak` (statt -staging)
|
||||||
|
- Build-Job kann mit `deploy-staging.yml` geteilt werden via composite-action (`./.github/actions/build-backend`)
|
||||||
|
|
||||||
|
**Voraussetzung:** Prod-Service `rebreak` muss erstmal auf dem Server existieren. Aktuell laufen nur `rebreak-staging` + `rebreak-webhook` + Mo's IMAP/IDLE/DNS-Services.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. **Deploy-Key recyceln vs neu?** Falls ein alter trucko-Hetzner-Deploy-Key existiert (Backyard hat keine Sicht in `~/.ssh/` von User) -- recycling spart Setup-Arbeit, aber neu generieren ist sicherer (klare Bindung "github-actions-rebreak", einfacher rotierbar). **Empfehlung:** neu.
|
||||||
|
2. **Health-Check streng oder lax?** Aktuelles Workflow-Snippet checkt HTTP 401 (auth-protected -> Server lebt). Strikter waere `200` von einem Public-Health-Endpoint -- aber ich sehe keinen `/api/health` im Backend. **Soll ich einen `/api/health` GET-Endpoint vorschlagen fuer rebreak-backend-Owner?** Out-of-scope hier.
|
||||||
|
3. **GA-Free-Tier-Quota:** Privates Repo hat 2000 Build-Minuten/Monat im Free-Tier. Build-Job 3-5 min + Deploy-Job 1-2 min = ~7 min pro Push. 2000 / 7 = ~285 Pushes/Monat. **Reichen wahrscheinlich**, aber bei viel Refactor-Phase koennte das Quota knapp werden -- dann selfhosted-runner-Option auf dem Hetzner selbst (aber das bringt das OOM-Problem zurueck). **Decision-Trigger:** wenn wir > 200 Deploys/Monat sehen, neu evaluieren.
|
||||||
|
4. **`pnpm install` auf Server doppelt?** Aktuell installiert `deploy-from-artifact.sh` Step 3 nochmal `pnpm install --frozen-lockfile`. Das ist fuer Runtime-Module (z.B. `@prisma/client` mit native `.node`-Files, `pg` mit native bindings) noetig, weil das Artifact nur Nitro-Server-Bundle enthaelt, nicht `node_modules/`. **Optimierung:** Artifact koennte komplett `node_modules/` mitbringen (tar groesser, ~100-200 MB), dann faellt Server-side-Install weg. **Trade-off:** Upload-Time vs Install-Time. Fuer jetzt: zwei-Schritt-Layout ist robust. Spaeter optimierbar.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Cheatsheet
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Manuell triggern
|
||||||
|
gh workflow run deploy-staging.yml
|
||||||
|
|
||||||
|
# Letzten Run anschauen
|
||||||
|
gh run list --workflow=deploy-staging.yml --limit 5
|
||||||
|
gh run view <run-id> --log
|
||||||
|
|
||||||
|
# Webhook (Legacy) deaktivieren
|
||||||
|
ssh root@49.13.55.22 "pm2 stop rebreak-webhook && pm2 save"
|
||||||
|
# GitHub Repo -> Settings -> Webhooks -> Active off
|
||||||
|
|
||||||
|
# Last-deployed-SHA auf Server pruefen
|
||||||
|
ssh root@49.13.55.22 "cat /srv/rebreak/.last-deployed-sha"
|
||||||
|
|
||||||
|
# Health-Check
|
||||||
|
curl -sS -o /dev/null -w '%{http_code}\n' https://staging.rebreak.org/api/auth/me
|
||||||
|
# erwartet: 401
|
||||||
|
```
|
||||||
143
scripts/deploy-from-artifact.sh
Executable file
143
scripts/deploy-from-artifact.sh
Executable file
@ -0,0 +1,143 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# deploy-from-artifact.sh -- Server-side Deploy nach GitHub-Actions-Artifact-Upload.
|
||||||
|
#
|
||||||
|
# Wird via SSH von .github/workflows/deploy-staging.yml aufgerufen.
|
||||||
|
# Erwartet: /srv/rebreak/backend/.output-incoming.tar.gz (vom GA-Runner via scp gepusht).
|
||||||
|
#
|
||||||
|
# Diff zu scripts/deploy.sh:
|
||||||
|
# - KEIN pnpm build hier (das macht der GA-Runner mit 7 GB RAM)
|
||||||
|
# - KEIN pnpm install --frozen-lockfile fuer Build-Deps -- nur Runtime-Deps via prod-flag
|
||||||
|
# - Migration-Detection bleibt (Pattern aus deploy.sh)
|
||||||
|
# - Atomic .output-staging-Replacement bleibt
|
||||||
|
#
|
||||||
|
# Failure-Mode: Bei Migration-Fehler kein pm2-restart (Daten-Konsistenz-Schutz).
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO_ROOT="/srv/rebreak"
|
||||||
|
APP_DIR="${REPO_ROOT}/backend"
|
||||||
|
ARTIFACT="${APP_DIR}/.output-incoming.tar.gz"
|
||||||
|
PM2_BIN="/root/.nvm/versions/node/v24.11.1/bin/pm2"
|
||||||
|
PNPM_BIN="/root/.nvm/versions/node/v24.11.1/bin/pnpm"
|
||||||
|
|
||||||
|
log() { echo "[deploy-artifact] $(date '+%H:%M:%S') $*"; }
|
||||||
|
log_err() { echo "[deploy-artifact:err] $(date '+%H:%M:%S') $*" >&2; }
|
||||||
|
|
||||||
|
log "=== Rebreak Deploy-from-Artifact gestartet ==="
|
||||||
|
|
||||||
|
export PATH="/root/.nvm/versions/node/v24.11.1/bin:$PATH"
|
||||||
|
|
||||||
|
# 0. Sanity-Check Artifact
|
||||||
|
[[ -f "$ARTIFACT" ]] || { log_err "Artifact $ARTIFACT fehlt -- abort"; exit 1; }
|
||||||
|
|
||||||
|
# 1. Git pull (fuer scripts/-Updates + prisma/migrations + .last-deployed-sha)
|
||||||
|
log "Step 1: git pull..."
|
||||||
|
cd "${REPO_ROOT}"
|
||||||
|
git fetch origin main
|
||||||
|
git reset --hard origin/main
|
||||||
|
log "Git updated to $(git rev-parse --short HEAD)"
|
||||||
|
|
||||||
|
# 2. Migration-Detection (1:1 aus scripts/deploy.sh uebernommen)
|
||||||
|
log "Step 2: Migration-Check..."
|
||||||
|
PREV_SHA=$(cat "${REPO_ROOT}/.last-deployed-sha" 2>/dev/null || echo "")
|
||||||
|
CUR_SHA=$(git -C "${REPO_ROOT}" rev-parse HEAD)
|
||||||
|
|
||||||
|
run_migration=false
|
||||||
|
if [[ -z "$PREV_SHA" ]]; then
|
||||||
|
log "Kein .last-deployed-sha gefunden -- first-deploy: Migration sicherheitshalber ausfuehren"
|
||||||
|
run_migration=true
|
||||||
|
elif ! git -C "${REPO_ROOT}" diff --quiet "$PREV_SHA"..HEAD -- backend/prisma/migrations/ backend/prisma/schema.prisma; then
|
||||||
|
log "Migration-Changes detected zwischen ${PREV_SHA} und ${CUR_SHA}"
|
||||||
|
run_migration=true
|
||||||
|
else
|
||||||
|
log "Keine Migration-Changes seit ${PREV_SHA} -- skip migrate deploy"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if $run_migration; then
|
||||||
|
log "Running prisma migrate deploy..."
|
||||||
|
cd "${APP_DIR}"
|
||||||
|
|
||||||
|
source /etc/environment
|
||||||
|
if [[ -z "${INFISICAL_CLIENT_ID:-}" || -z "${INFISICAL_CLIENT_SECRET:-}" ]]; then
|
||||||
|
log_err "INFISICAL_CLIENT_ID / INFISICAL_CLIENT_SECRET fehlt -- Migration abgebrochen"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INFISICAL_TOKEN=$(infisical login \
|
||||||
|
--method=universal-auth \
|
||||||
|
--client-id="${INFISICAL_CLIENT_ID}" \
|
||||||
|
--client-secret="${INFISICAL_CLIENT_SECRET}" \
|
||||||
|
--silent --plain 2>/dev/null)
|
||||||
|
|
||||||
|
[[ -z "$INFISICAL_TOKEN" ]] && { log_err "Infisical login fehlgeschlagen"; exit 1; }
|
||||||
|
|
||||||
|
infisical run \
|
||||||
|
--projectId="${INFISICAL_PROJECT_ID:-14b11b35-ef59-4b8a-a16b-398f0cc3ad93}" \
|
||||||
|
--env=staging \
|
||||||
|
--token="$INFISICAL_TOKEN" \
|
||||||
|
-- bash -c '
|
||||||
|
set -e
|
||||||
|
export DATABASE_URL="${DATABASE_URL:-${NUXT_DATABASE_URL:-}}"
|
||||||
|
if [[ -z "$DATABASE_URL" ]]; then
|
||||||
|
echo "[deploy-artifact:err] DATABASE_URL nicht in Infisical-staging" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
"'"${PNPM_BIN}"'" prisma migrate deploy --schema prisma/schema.prisma
|
||||||
|
' 2>&1 || {
|
||||||
|
log_err "Migration-Deploy fehlgeschlagen -- pm2-restart ABGEBROCHEN"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
log "Migration done"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. Runtime-Deps installieren (nur falls package.json/lockfile changed)
|
||||||
|
# Prisma-Client ist schon im Artifact baked-in via `prisma generate` auf dem Runner,
|
||||||
|
# aber Runtime-Module (z.B. @prisma/client native binaries) muessen lokal sein.
|
||||||
|
log "Step 3: pnpm install (runtime-deps)..."
|
||||||
|
cd "${REPO_ROOT}"
|
||||||
|
CI=true "${PNPM_BIN}" install --frozen-lockfile 2>&1 || {
|
||||||
|
log_err "frozen-lockfile fehlgeschlagen, fallback ohne frozen..."
|
||||||
|
CI=true "${PNPM_BIN}" install --no-frozen-lockfile 2>&1
|
||||||
|
}
|
||||||
|
log "pnpm install done"
|
||||||
|
|
||||||
|
# 4. Artifact extrahieren -- atomisches mv (gleicher Pattern wie deploy.sh)
|
||||||
|
log "Step 4: Artifact extrahieren..."
|
||||||
|
cd "${APP_DIR}"
|
||||||
|
rm -rf .output-staging-new
|
||||||
|
mkdir -p .output-staging-new
|
||||||
|
tar xzf "$ARTIFACT" -C .output-staging-new
|
||||||
|
|
||||||
|
# Sanity-Check: server/index.mjs muss drin sein
|
||||||
|
[[ -f .output-staging-new/server/index.mjs ]] || {
|
||||||
|
log_err "Ungueltiges Artifact -- .output-staging-new/server/index.mjs fehlt"
|
||||||
|
rm -rf .output-staging-new
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
rm -rf .output-staging
|
||||||
|
mv .output-staging-new .output-staging
|
||||||
|
rm -f "$ARTIFACT"
|
||||||
|
log ".output-staging aktualisiert"
|
||||||
|
|
||||||
|
# 5. pm2 restart (--update-env zieht neue Infisical-Secrets)
|
||||||
|
log "Step 5: pm2 restart rebreak-staging..."
|
||||||
|
"${PM2_BIN}" restart rebreak-staging --update-env 2>/dev/null || \
|
||||||
|
"${PM2_BIN}" start "${REPO_ROOT}/ecosystem.config.js" --only rebreak-staging
|
||||||
|
log "rebreak-staging restarted"
|
||||||
|
|
||||||
|
# 6. Optional services (best-effort, Mo's Scope)
|
||||||
|
log "Step 6: Optional services restart..."
|
||||||
|
"${PM2_BIN}" restart rebreak-imap-staging 2>/dev/null || true
|
||||||
|
"${PM2_BIN}" restart rebreak-idle-staging 2>/dev/null || true
|
||||||
|
"${PM2_BIN}" restart dns-rebreak-staging 2>/dev/null || true
|
||||||
|
"${PM2_BIN}" restart dns-rebreak 2>/dev/null || true
|
||||||
|
|
||||||
|
# 7. pm2 save
|
||||||
|
"${PM2_BIN}" save 2>/dev/null || true
|
||||||
|
|
||||||
|
# 8. Last-deployed-SHA persistieren
|
||||||
|
echo "${CUR_SHA}" > "${REPO_ROOT}/.last-deployed-sha"
|
||||||
|
log "Last-deployed-SHA gespeichert: ${CUR_SHA}"
|
||||||
|
|
||||||
|
log "=== Deploy erfolgreich: $(git -C ${REPO_ROOT} rev-parse --short HEAD) ==="
|
||||||
Loading…
x
Reference in New Issue
Block a user