#!/usr/bin/env bash # llm-bench.sh — TTFB benchmark across LLM providers für SOS-style requests. # # Misst time_starttransfer (= erste Byte vom Body) für streaming chat completions. # Für SSE-Endpoints korrespondiert das praktisch 1:1 mit dem ersten Token am Client. # # Usage: # bash llm-bench.sh [RUNS] # default RUNS=3 # # Auf staging-server (mit Infisical): # infisical run --projectId="$INFISICAL_PROJECT_ID" --env=staging --token="$TOKEN" -- \ # bash llm-bench.sh # # ENV vars (alle optional, fehlende → skip): # OPENROUTER_API_KEY — alle Modelle via OpenRouter # ANTHROPIC_API_KEY — Haiku/Sonnet direkt # GROQ_API_KEY — Llama via Groq direkt # OPENAI_API_KEY — GPT-4o-mini direkt # GEMINI_API_KEY — Gemini direkt (oder GOOGLE_GENERATIVE_AI_API_KEY) set -u RUNS="${1:-3}" SYSTEM_PROMPT='Du bist Lyra, eine warme empathische Begleiterin für Menschen mit Glücksspielsucht. Antworte in maximal 2-3 deutschen Sätzen, warm und ohne Belehrung. Am Ende JSON-Chips: [{"label":"...","action":"..."}]' USER_MSG='Ich bin gerade unter starkem Druck und denke daran, einen großen Einsatz zu machen.' MAX_TOK=80 command -v jq >/dev/null || { echo "jq fehlt — apt install jq"; exit 1; } # ── helpers ────────────────────────────────────────────────────────────────── # curl 1× und gib TTFB in ms zurück (oder "FAIL($code)"). ttfb_ms() { local out out=$(curl -s -N -o /dev/null --max-time 30 \ -w '%{time_starttransfer}\n%{http_code}' "$@" 2>/dev/null) || { echo "ERR"; return } local time=$(echo "$out" | head -1) local code=$(echo "$out" | tail -1) if [[ "$code" != "200" ]] && [[ "$code" != "206" ]]; then echo "FAIL($code)"; return fi awk -v s="$time" 'BEGIN { printf "%d", s * 1000 }' } # RUNS Iterationen, druckt Zeile mit allen Times + min/p50. benchN() { local label="$1"; shift local times=() printf " %-50s " "$label" for ((i=1; i<=RUNS; i++)); do t=$(ttfb_ms "$@") times+=("$t") printf "%-9s" "$t" sleep 0.4 done local valid=() for t in "${times[@]}"; do [[ "$t" =~ ^[0-9]+$ ]] && valid+=("$t") done if (( ${#valid[@]} > 0 )); then local sorted mapfile -t sorted < <(printf '%s\n' "${valid[@]}" | sort -n) local n=${#sorted[@]} printf " │ min=%sms p50=%sms\n" "${sorted[0]}" "${sorted[$((n/2))]}" else printf " │ ALL FAILED\n" fi } # OpenAI-style streaming body (OpenAI/OpenRouter/Groq/Cerebras/Mistral nutzen alle dieses Format). openai_body() { local model="$1"; local extra="${2:-}" jq -nc \ --arg model "$model" \ --arg system "$SYSTEM_PROMPT" \ --arg user "$USER_MSG" \ --argjson maxtok "$MAX_TOK" \ --argjson extra "${extra:-{\}}" \ '{model:$model, stream:true, max_tokens:$maxtok, messages:[{role:"system",content:$system},{role:"user",content:$user}]} + $extra' } anthropic_body() { local model="$1" jq -nc \ --arg model "$model" \ --arg system "$SYSTEM_PROMPT" \ --arg user "$USER_MSG" \ --argjson maxtok "$MAX_TOK" \ '{model:$model, stream:true, max_tokens:$maxtok, system:$system, messages:[{role:"user",content:$user}]}' } gemini_body() { jq -nc \ --arg system "$SYSTEM_PROMPT" \ --arg user "$USER_MSG" \ --argjson maxtok "$MAX_TOK" \ '{contents:[{parts:[{text:$user}]}], systemInstruction:{parts:[{text:$system}]}, generationConfig:{maxOutputTokens:$maxtok}}' } # ── header ─────────────────────────────────────────────────────────────────── echo echo "═══════════════════════════════════════════════════════════════════" echo " LLM TTFB Benchmark — $RUNS runs each — $(date '+%Y-%m-%d %H:%M:%S')" country=$(curl -s --max-time 3 ipinfo.io/country 2>/dev/null || echo "?") ip=$(curl -s --max-time 3 ifconfig.co 2>/dev/null || echo "?") echo " Source: $country / $ip" echo "═══════════════════════════════════════════════════════════════════" # ── via OpenRouter (default load-balanced) ─────────────────────────────────── if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then echo echo "── via OpenRouter (default load-balancing) ──" for m in \ "anthropic/claude-haiku-4.5" \ "anthropic/claude-sonnet-4.5" \ "anthropic/claude-3.5-haiku" \ "openai/gpt-4o-mini" \ "google/gemini-2.0-flash-001" \ "meta-llama/llama-3.3-70b-instruct" do benchN "OR $m" \ -X POST https://openrouter.ai/api/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENROUTER_API_KEY" \ -H "HTTP-Referer: https://rebreak.org" \ -H "X-Title: ReBreak-Bench" \ -d "$(openai_body "$m")" done echo echo "── via OpenRouter + provider:{sort:latency} ──" for m in \ "anthropic/claude-haiku-4.5" \ "anthropic/claude-sonnet-4.5" do benchN "ORL $m" \ -X POST https://openrouter.ai/api/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENROUTER_API_KEY" \ -H "HTTP-Referer: https://rebreak.org" \ -H "X-Title: ReBreak-Bench" \ -d "$(openai_body "$m" '{"provider":{"sort":"latency"}}')" done else echo; echo "(skip OpenRouter — OPENROUTER_API_KEY nicht gesetzt)" fi # ── Anthropic direct ───────────────────────────────────────────────────────── if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then echo echo "── via Anthropic direkt ──" for m in claude-haiku-4-5 claude-sonnet-4-5; do benchN "ANT $m" \ -X POST https://api.anthropic.com/v1/messages \ -H "Content-Type: application/json" \ -H "x-api-key: $ANTHROPIC_API_KEY" \ -H "anthropic-version: 2023-06-01" \ -d "$(anthropic_body "$m")" done else echo; echo "(skip Anthropic direkt — ANTHROPIC_API_KEY nicht gesetzt)" fi # ── OpenAI direct ──────────────────────────────────────────────────────────── if [[ -n "${OPENAI_API_KEY:-}" ]]; then echo echo "── via OpenAI direkt ──" for m in gpt-4o-mini gpt-4o; do benchN "OAI $m" \ -X POST https://api.openai.com/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENAI_API_KEY" \ -d "$(openai_body "$m")" done else echo; echo "(skip OpenAI direkt — OPENAI_API_KEY nicht gesetzt)" fi # ── Groq direct ────────────────────────────────────────────────────────────── if [[ -n "${GROQ_API_KEY:-}" ]]; then echo echo "── via Groq direkt (LPU hardware) ──" for m in llama-3.3-70b-versatile llama-3.1-8b-instant; do benchN "GRQ $m" \ -X POST https://api.groq.com/openai/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" \ -d "$(openai_body "$m")" done else echo; echo "(skip Groq — GROQ_API_KEY nicht gesetzt)" fi # ── Gemini direct ──────────────────────────────────────────────────────────── GEM_KEY="${GEMINI_API_KEY:-${GOOGLE_GENERATIVE_AI_API_KEY:-}}" if [[ -n "$GEM_KEY" ]]; then echo echo "── via Gemini direkt ──" for m in gemini-2.0-flash gemini-1.5-flash; do benchN "GEM $m" \ -X POST "https://generativelanguage.googleapis.com/v1beta/models/${m}:streamGenerateContent?key=${GEM_KEY}&alt=sse" \ -H "Content-Type: application/json" \ -d "$(gemini_body)" done else echo; echo "(skip Gemini direkt — GEMINI_API_KEY nicht gesetzt)" fi echo echo "═══════════════════════════════════════════════════════════════════" echo " done. min = bestes TTFB, p50 = median über $RUNS Runs" echo "═══════════════════════════════════════════════════════════════════"