diff --git a/backend/nitro.config.ts b/backend/nitro.config.ts index d63903f..1584870 100644 --- a/backend/nitro.config.ts +++ b/backend/nitro.config.ts @@ -81,6 +81,9 @@ export default defineNitroConfig({ "", openaiApiKey: process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "", + // Direkter Anthropic-Pfad (native Messages-API) — primärer Lyra-Provider. + anthropicApiKey: + process.env.ANTHROPIC_API_KEY ?? process.env.NUXT_ANTHROPIC_API_KEY ?? "", groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "", googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "", geminiApiKey: process.env.GEMINI_API_KEY ?? "", diff --git a/backend/server/api/coach/message.post.ts b/backend/server/api/coach/message.post.ts index eb1fce2..374ae17 100644 --- a/backend/server/api/coach/message.post.ts +++ b/backend/server/api/coach/message.post.ts @@ -654,28 +654,69 @@ export default defineEventHandler(async (event) => { } // ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ───────────────── - // Default-Chain: Gemini Flash Lite (schnell+billig+warm) → Gemini 2.5 Flash - // (smarter Fallback) → OpenAI gpt-4o-mini (Last-Resort, anderer Provider). - // OpenRouter + Groq sind aktuell ohne Quota/Credits — entfernt aus Chain. + // Default-Chain: Anthropic Haiku 4.5 (direkt, native Messages-API — warmer + // Lyra-Ton, günstig+zuverlässig) → Gemini Flash Lite → Gemini 2.5 Flash → + // OpenAI gpt-4o-mini (Last-Resort, anderer Provider). OpenRouter + Groq sind + // aktuell ohne Quota/Credits — entfernt aus Chain. const planRaw = (profile?.plan ?? "free").toLowerCase(); const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw; - const llmProvider = "gemini-flash-lite"; + const llmProvider = "anthropic-haiku"; type Candidate = { - provider: "groq" | "openrouter" | "gemini" | "openai"; + provider: "anthropic" | "groq" | "openrouter" | "gemini" | "openai"; model: string; }; const candidates: Candidate[] = [ + { provider: "anthropic", model: "claude-haiku-4-5" }, { provider: "gemini", model: "gemini-2.5-flash-lite" }, { provider: "gemini", model: "gemini-2.5-flash" }, { provider: "openai", model: "gpt-4o-mini" }, ]; async function tryModel( - providerName: "groq" | "openrouter" | "gemini" | "openai", + providerName: "anthropic" | "groq" | "openrouter" | "gemini" | "openai", model: string, ) { + // ── Direkter Anthropic-Pfad: native Messages-API (kein OpenAI-Shape) ── + if (providerName === "anthropic") { + const key = config.anthropicApiKey as string | undefined; + if (!key) return null; + // Anthropic verlangt user-first; .slice(-8) kann mit assistant beginnen. + const aMsgs = + trimmed[0]?.role === "assistant" ? trimmed.slice(1) : trimmed; + try { + const res = await $fetch<{ content: { type: string; text: string }[] }>( + "https://api.anthropic.com/v1/messages", + { + method: "POST", + headers: { + "x-api-key": key, + "anthropic-version": "2023-06-01", + "Content-Type": "application/json", + }, + body: { + model, + max_tokens: 500, + system: systemPrompt, + messages: aMsgs, + }, + timeout: 15000, + }, + ); + return ( + res.content?.find((b) => b.type === "text")?.text ?? null + ); + } catch (err: any) { + console.warn( + `[coach/tryModel] anthropic:${model} FAIL:`, + err?.statusCode ?? err?.status ?? "?", + err?.data?.error?.message ?? err?.message ?? String(err).slice(0, 200), + ); + return null; + } + } + const p = PROVIDER_CONFIG[providerName]; const key = config[p.keyName]; if (!key) return null; diff --git a/backend/server/api/coach/sos-stream.get.ts b/backend/server/api/coach/sos-stream.get.ts index 5e39985..98a41df 100644 --- a/backend/server/api/coach/sos-stream.get.ts +++ b/backend/server/api/coach/sos-stream.get.ts @@ -231,77 +231,158 @@ export default defineEventHandler(async (event) => { // Tier-based LLM: Pro=Groq (sachlich+schnell), Legend=Haiku 4.5 (warm+fast), // Free=Groq (kostenkontrolle). User kann via Toggle override (debug); // 'auto' (oder undefined) → plan-based default. + // Default-Kette: Anthropic Haiku 4.5 (direkt, native Messages-API — warmer + // Lyra-Ton, günstig+zuverlässig) → Gemini Flash Lite → OpenAI gpt-4o-mini. + // Die ERSTE Kandidatin, die einen OK-Stream liefert, gewinnt — SOS darf im + // Krisen-Kontext nie ohne Antwort bleiben (vorher: single fetch ohne Fallback + // → bei Provider-Fail sofort Krisen-Text). Debug-Toggle override ohne Fallback. const userToggle = sessionData.llmProvider; - let llmProvider: string; - if (userToggle && userToggle !== "auto") { - llmProvider = userToggle; - } else { - // Default chain: Gemini Flash Lite (schnell+billig+warm) für alle Pläne. - // OpenRouter (Haiku/Sonnet) + Groq haben aktuell keine Quota/Credits. - llmProvider = "gemini-flash-lite"; - } - let upstreamUrl: string; - let upstreamKey: string | undefined; - let upstreamModel: string; - const upstreamHeaders: Record = { "Content-Type": "application/json" }; - let upstreamProviderField: { sort: string } | undefined; - if (llmProvider === "gemini-flash-lite" || llmProvider === "gemini-flash") { - upstreamUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"; - upstreamKey = config.geminiApiKey as string | undefined; - upstreamModel = llmProvider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite"; - } else if (llmProvider === "groq-llama") { - upstreamUrl = "https://api.groq.com/openai/v1/chat/completions"; - upstreamKey = config.groqApiKey as string | undefined; - upstreamModel = "llama-3.3-70b-versatile"; - } else { - upstreamUrl = "https://openrouter.ai/api/v1/chat/completions"; - upstreamKey = config.openrouterApiKey as string | undefined; - upstreamModel = llmProvider === "openrouter-haiku" - ? "anthropic/claude-haiku-4.5" - : "anthropic/claude-sonnet-4.5"; - upstreamHeaders["HTTP-Referer"] = "https://rebreak.org"; - upstreamHeaders["X-Title"] = "ReBreak SOS"; - upstreamProviderField = { sort: "latency" }; - } - // Fallback: wenn gewählter Provider keinen Key hat → OpenAI gpt-4o-mini als Anker - if (!upstreamKey && config.openaiApiKey) { - console.warn(`[coach/sos-stream] ${llmProvider} key missing → fallback openai/gpt-4o-mini`); - llmProvider = "openai-mini"; - upstreamUrl = "https://api.openai.com/v1/chat/completions"; - upstreamKey = config.openaiApiKey as string; - upstreamModel = "gpt-4o-mini"; - upstreamProviderField = undefined; - } - if (!upstreamKey) { - throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` }); - } - upstreamHeaders.Authorization = `Bearer ${upstreamKey}`; - console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`); + type SosCandidate = { provider: string; model: string }; + const candidates: SosCandidate[] = + userToggle && userToggle !== "auto" + ? [{ provider: userToggle, model: "" }] + : [ + { provider: "anthropic-haiku", model: "claude-haiku-4-5" }, + { provider: "gemini-flash-lite", model: "gemini-2.5-flash-lite" }, + { provider: "openai-mini", model: "gpt-4o-mini" }, + ]; - const upstream = await fetch(upstreamUrl, { - method: "POST", - headers: upstreamHeaders, - body: JSON.stringify({ - model: upstreamModel, - max_tokens: 400, - stream: true, - messages: [{ role: "system", content: systemPrompt }, ...trimmed], - ...(upstreamProviderField ? { provider: upstreamProviderField } : {}), - }), - }); + // Baut die Upstream-Request-Config für einen Provider; null wenn Key fehlt. + // Anthropic nutzt die native Messages-API (system top-level, x-api-key); + // alle anderen das OpenAI-kompatible /chat/completions-Shape. + const buildUpstream = ( + provider: string, + ): { url: string; headers: Record; body: string; label: string } | null => { + const headers: Record = { "Content-Type": "application/json" }; + if (provider === "anthropic-haiku") { + const key = config.anthropicApiKey as string | undefined; + if (!key) return null; + headers["x-api-key"] = key; + headers["anthropic-version"] = "2023-06-01"; + // Anthropic verlangt user-first; trimmed kann mit assistant beginnen. + const aMsgs = trimmed[0]?.role === "assistant" ? trimmed.slice(1) : trimmed; + return { + url: "https://api.anthropic.com/v1/messages", + headers, + body: JSON.stringify({ + model: "claude-haiku-4-5", + max_tokens: 400, + stream: true, + system: systemPrompt, + messages: aMsgs, + }), + label: "anthropic:claude-haiku-4-5", + }; + } + if (provider === "gemini-flash-lite" || provider === "gemini-flash") { + const key = config.geminiApiKey as string | undefined; + if (!key) return null; + headers.Authorization = `Bearer ${key}`; + const model = provider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite"; + return { + url: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions", + headers, + body: JSON.stringify({ + model, + max_tokens: 400, + stream: true, + messages: [{ role: "system", content: systemPrompt }, ...trimmed], + }), + label: `gemini:${model}`, + }; + } + if (provider === "groq-llama") { + const key = config.groqApiKey as string | undefined; + if (!key) return null; + headers.Authorization = `Bearer ${key}`; + return { + url: "https://api.groq.com/openai/v1/chat/completions", + headers, + body: JSON.stringify({ + model: "llama-3.3-70b-versatile", + max_tokens: 400, + stream: true, + messages: [{ role: "system", content: systemPrompt }, ...trimmed], + }), + label: "groq:llama-3.3-70b", + }; + } + if (provider === "openrouter-haiku" || provider === "openrouter-sonnet") { + const key = config.openrouterApiKey as string | undefined; + if (!key) return null; + headers.Authorization = `Bearer ${key}`; + headers["HTTP-Referer"] = "https://rebreak.org"; + headers["X-Title"] = "ReBreak SOS"; + const model = + provider === "openrouter-haiku" + ? "anthropic/claude-haiku-4.5" + : "anthropic/claude-sonnet-4.5"; + return { + url: "https://openrouter.ai/api/v1/chat/completions", + headers, + body: JSON.stringify({ + model, + max_tokens: 400, + stream: true, + messages: [{ role: "system", content: systemPrompt }, ...trimmed], + provider: { sort: "latency" }, + }), + label: `openrouter:${model}`, + }; + } + // openai-mini — letzter Anker (anderer Provider). + if (provider === "openai-mini") { + const key = config.openaiApiKey as string | undefined; + if (!key) return null; + headers.Authorization = `Bearer ${key}`; + return { + url: "https://api.openai.com/v1/chat/completions", + headers, + body: JSON.stringify({ + model: "gpt-4o-mini", + max_tokens: 400, + stream: true, + messages: [{ role: "system", content: systemPrompt }, ...trimmed], + }), + label: "openai:gpt-4o-mini", + }; + } + return null; + }; + + // Erste Kandidatin, die einen OK-Stream liefert. + let upstream: Response | undefined; + let usedLabel = "none"; + for (const c of candidates) { + const cfg = buildUpstream(c.provider); + if (!cfg) continue; + try { + const resp = await fetch(cfg.url, { + method: "POST", + headers: cfg.headers, + body: cfg.body, + }); + if (resp.ok && resp.body) { + upstream = resp; + usedLabel = cfg.label; + break; + } + const errText = await resp.text().catch(() => ""); + console.error( + `[coach/sos-stream] ${cfg.label} upstream error:`, + resp.status, + errText.slice(0, 300), + ); + } catch (e) { + console.error(`[coach/sos-stream] ${cfg.label} fetch threw:`, e); + } + } // ── Upstream-Fehler: SSE-Header trotzdem setzen, dann Fallback senden ────── // (b) Timeout/Leer-Fallback: Kein 502-Throw im Krisen-Kontext — User muss // immer eine Antwort sehen. Bei LLM-Fehler sofort Krisen-Fallback liefern. - const upstreamFailed = !upstream.ok || !upstream.body; - if (upstreamFailed) { - const errText = await upstream.text().catch(() => ""); - console.error( - "[coach/sos-stream] upstream error:", - upstream.status, - errText.slice(0, 300), - ); - } + const upstreamFailed = !upstream || !upstream.body; + console.log(`[coach/sos-stream] using ${usedLabel}`); // Direkt zu Node res schreiben — sendStream(ReadableStream) pumpt pull() in Nitro nicht zuverlässig const res = event.node.res; @@ -352,7 +433,7 @@ export default defineEventHandler(async (event) => { return; } - const reader = upstream.body.getReader(); + const reader = upstream!.body!.getReader(); const decoder = new TextDecoder(); let buffer = ""; let fullText = ""; @@ -382,8 +463,14 @@ export default defineEventHandler(async (event) => { try { const json = JSON.parse(payload) as { choices?: { delta?: { content?: string } }[]; + type?: string; + delta?: { type?: string; text?: string }; }; - const delta = json.choices?.[0]?.delta?.content; + // OpenAI-kompatibel: choices[].delta.content + // Anthropic nativ: event content_block_delta → delta.text + const delta = + json.choices?.[0]?.delta?.content ?? + (json.type === "content_block_delta" ? json.delta?.text : undefined); if (delta) { fullText += delta; chunkCount++; diff --git a/backend/server/utils/plan-features.ts b/backend/server/utils/plan-features.ts index c1822fa..dae1268 100644 --- a/backend/server/utils/plan-features.ts +++ b/backend/server/utils/plan-features.ts @@ -120,16 +120,12 @@ export const PLAN_LIMITS: Record, PlanLimits> = { { provider: "groq", model: "llama-3.3-70b-versatile" }, ], aiProvider: "openrouter", - // ⚠️ TEMPORÄR (2026-06-08): ElevenLabs-Account hat ein Zahlungsproblem - // (HTTP 401 payment_issue) → Legend würde sonst 502 bekommen. Übergangsweise - // auf Cartesia (sonic-3) umgeleitet, damit Legend-User trotzdem Sprachausgabe - // haben. ZURÜCKSETZEN sobald die ElevenLabs-Rechnung bezahlt ist: - // provider: "elevenlabs", model: "eleven_turbo_v2_5", dailyQuotaSeconds: 0 - // (Hinweis: mit Cartesia greift der Legend-Voice-Picker / lyraVoiceId nicht — - // Legend hört solange Cartesias Default-Stimme wie Pro.) + // Legend → ElevenLabs Turbo v2.5, unlimited. Voice-Picker / lyraVoiceId greift. + // (2026-06-08 war wg. ElevenLabs-Zahlungsproblem temporär auf Cartesia umgeleitet; + // 2026-06-11 neuer Key + bezahlt → zurückgesetzt.) voice: { - provider: "cartesia", - model: "sonic-3", + provider: "elevenlabs", + model: "eleven_turbo_v2_5", dailyQuotaSeconds: 0, // Legend bleibt unlimited }, },