From 3a4e1ecfbab1462706c7d1fb50c9c9bc03903db4 Mon Sep 17 00:00:00 2001 From: chahinebrini Date: Sun, 31 May 2026 01:07:10 +0200 Subject: [PATCH] feat(coach): switch Lyra to Gemini 2.5 Flash Lite (Groq+OpenRouter quotas dead) - Primary: gemini-2.5-flash-lite (~789ms TTFR, ~10x cheaper than Haiku, no reasoning overhead) - Fallback 1: gemini-2.5-flash (smarter when Lite overloaded) - Fallback 2: gpt-4o-mini (anchor on different provider) - message.post.ts: candidates chain replaced - sos-stream.get.ts: gemini-flash-lite default + auto-fallback to gpt-4o-mini if key missing - nitro.config.ts: geminiApiKey runtimeConfig - start-staging.sh: GEMINI_API_KEY export + NITRO_GEMINI_API_KEY OpenRouter credits = 0, Groq TPD exhausted - users get 503 currently. --- backend/nitro.config.ts | 1 + backend/server/api/coach/message.post.ts | 37 +++++++++++----------- backend/server/api/coach/sos-stream.get.ts | 22 ++++++++++--- backend/start-staging.sh | 2 ++ 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/backend/nitro.config.ts b/backend/nitro.config.ts index 4546424..0309914 100644 --- a/backend/nitro.config.ts +++ b/backend/nitro.config.ts @@ -40,6 +40,7 @@ export default defineNitroConfig({ openaiApiKey: process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "", groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "", googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "", + geminiApiKey: process.env.GEMINI_API_KEY ?? "", // ─── TTS-Provider ──────────────────────────────────────────────────── googleApiKey: process.env.GOOGLE_API_KEY ?? process.env.NUXT_GOOGLE_API_KEY ?? "", diff --git a/backend/server/api/coach/message.post.ts b/backend/server/api/coach/message.post.ts index cdc4830..e20c5c1 100644 --- a/backend/server/api/coach/message.post.ts +++ b/backend/server/api/coach/message.post.ts @@ -361,6 +361,14 @@ const PROVIDER_CONFIG = { url: "https://openrouter.ai/api/v1/chat/completions", keyName: "openrouterApiKey" as const, }, + gemini: { + url: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions", + keyName: "geminiApiKey" as const, + }, + openai: { + url: "https://api.openai.com/v1/chat/completions", + keyName: "openaiApiKey" as const, + }, } as const; const FEEDBACK_DETECTION_PROMPT = `Du analysierst eine Nutzer-Nachricht aus einer Gambling-Recovery-App. @@ -595,28 +603,21 @@ export default defineEventHandler(async (event) => { } // ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ───────────────── - // Free / Pro → Groq Llama 3.3 70B (schnell, sachlich) - // Legend → OpenRouter Haiku 4.5 (warm, premium) - // Kein sosMode-Override mehr — Coach-Page hat eigenes Routing. + // Default-Chain: Gemini Flash Lite (schnell+billig+warm) → Gemini 2.5 Flash + // (smarter Fallback) → OpenAI gpt-4o-mini (Last-Resort, anderer Provider). + // OpenRouter + Groq sind aktuell ohne Quota/Credits — entfernt aus Chain. const planRaw = (profile?.plan ?? "free").toLowerCase(); const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw; - const llmProvider = plan === "legend" ? "openrouter-haiku" : "groq-llama"; + const llmProvider = "gemini-flash-lite"; - type Candidate = { provider: "groq" | "openrouter"; model: string }; - const candidates: Candidate[] = - llmProvider === "openrouter-haiku" - ? [ - { provider: "openrouter", model: "anthropic/claude-haiku-4.5" }, - { provider: "openrouter", model: "anthropic/claude-3.5-haiku" }, - { provider: "groq", model: "llama-3.3-70b-versatile" }, - ] - : [ - { provider: "groq", model: "llama-3.3-70b-versatile" }, - { provider: "groq", model: "llama-3.1-8b-instant" }, - { provider: "openrouter", model: "meta-llama/llama-3.3-70b-instruct" }, - ]; + type Candidate = { provider: "groq" | "openrouter" | "gemini" | "openai"; model: string }; + const candidates: Candidate[] = [ + { provider: "gemini", model: "gemini-2.5-flash-lite" }, + { provider: "gemini", model: "gemini-2.5-flash" }, + { provider: "openai", model: "gpt-4o-mini" }, + ]; - async function tryModel(providerName: "groq" | "openrouter", model: string) { + async function tryModel(providerName: "groq" | "openrouter" | "gemini" | "openai", model: string) { const p = PROVIDER_CONFIG[providerName]; const key = config[p.keyName]; if (!key) return null; diff --git a/backend/server/api/coach/sos-stream.get.ts b/backend/server/api/coach/sos-stream.get.ts index 84543c4..621e71c 100644 --- a/backend/server/api/coach/sos-stream.get.ts +++ b/backend/server/api/coach/sos-stream.get.ts @@ -219,17 +219,20 @@ export default defineEventHandler(async (event) => { if (userToggle && userToggle !== "auto") { llmProvider = userToggle; } else { - const planRaw = (profile?.plan ?? "free").toLowerCase(); - // legacy "premium"/"standard" → legend/pro - const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw; - llmProvider = plan === "legend" ? "openrouter-haiku" : "groq-llama"; + // Default chain: Gemini Flash Lite (schnell+billig+warm) für alle Pläne. + // OpenRouter (Haiku/Sonnet) + Groq haben aktuell keine Quota/Credits. + llmProvider = "gemini-flash-lite"; } let upstreamUrl: string; let upstreamKey: string | undefined; let upstreamModel: string; const upstreamHeaders: Record = { "Content-Type": "application/json" }; let upstreamProviderField: { sort: string } | undefined; - if (llmProvider === "groq-llama") { + if (llmProvider === "gemini-flash-lite" || llmProvider === "gemini-flash") { + upstreamUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"; + upstreamKey = config.geminiApiKey as string | undefined; + upstreamModel = llmProvider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite"; + } else if (llmProvider === "groq-llama") { upstreamUrl = "https://api.groq.com/openai/v1/chat/completions"; upstreamKey = config.groqApiKey as string | undefined; upstreamModel = "llama-3.3-70b-versatile"; @@ -243,6 +246,15 @@ export default defineEventHandler(async (event) => { upstreamHeaders["X-Title"] = "ReBreak SOS"; upstreamProviderField = { sort: "latency" }; } + // Fallback: wenn gewählter Provider keinen Key hat → OpenAI gpt-4o-mini als Anker + if (!upstreamKey && config.openaiApiKey) { + console.warn(`[coach/sos-stream] ${llmProvider} key missing → fallback openai/gpt-4o-mini`); + llmProvider = "openai-mini"; + upstreamUrl = "https://api.openai.com/v1/chat/completions"; + upstreamKey = config.openaiApiKey as string; + upstreamModel = "gpt-4o-mini"; + upstreamProviderField = undefined; + } if (!upstreamKey) { throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` }); } diff --git a/backend/start-staging.sh b/backend/start-staging.sh index f5a23ca..30ab7d2 100755 --- a/backend/start-staging.sh +++ b/backend/start-staging.sh @@ -44,6 +44,7 @@ exec infisical run \ export OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-${NUXT_OPENROUTER_API_KEY:-}}" export GROQ_API_KEY="${GROQ_API_KEY:-${NUXT_GROQ_API_KEY:-}}" export GOOGLE_API_KEY="${GOOGLE_API_KEY:-${NUXT_GOOGLE_API_KEY:-}}" + export GEMINI_API_KEY="${GEMINI_API_KEY:-}" export DEEPGRAM_API_KEY="${DEEPGRAM_API_KEY:-${NUXT_DEEPGRAM_API_KEY:-}}" export DATABASE_URL="${DATABASE_URL:-${NUXT_DATABASE_URL:-}}" export LYRA_BOT_USER_ID="${LYRA_BOT_USER_ID:-${NUXT_LYRA_BOT_USER_ID:-}}" @@ -59,6 +60,7 @@ exec infisical run \ [[ -n "${GROQ_API_KEY:-}" ]] && export NITRO_GROQ_API_KEY="$GROQ_API_KEY" [[ -n "${GOOGLE_AI_API_KEY:-}" ]] && export NITRO_GOOGLE_AI_API_KEY="$GOOGLE_AI_API_KEY" [[ -n "${GOOGLE_API_KEY:-}" ]] && export NITRO_GOOGLE_API_KEY="$GOOGLE_API_KEY" + [[ -n "${GEMINI_API_KEY:-}" ]] && export NITRO_GEMINI_API_KEY="$GEMINI_API_KEY" [[ -n "${DEEPGRAM_API_KEY:-}" ]] && export NITRO_DEEPGRAM_API_KEY="$DEEPGRAM_API_KEY" [[ -n "${CARTESIA_API_KEY:-}" ]] && export NITRO_CARTESIA_API_KEY="$CARTESIA_API_KEY" [[ -n "${ELEVENLABS_API_KEY:-}" ]] && export NITRO_ELEVENLABS_API_KEY="$ELEVENLABS_API_KEY"