feat(coach): Lyra-LLM auf direkten Anthropic Haiku 4.5 + SOS-Fallback-Kette; ElevenLabs reaktiviert
- Chat (message.post.ts): neuer nativer anthropic-Branch in tryModel (api.anthropic.com/v1/messages, x-api-key, system top-level), führt die Fallback-Kette claude-haiku-4-5 → gemini-flash-lite → gemini-flash → gpt-4o-mini. - SOS (sos-stream.get.ts): Dispatch-Refactor mit buildUpstream() + Kandidaten- Fallback-Kette (anthropic → gemini → openai). Behebt strukturell den Bug "SOS liefert nur Krisen-Text" (vorher single fetch ohne Fallback). Nativer Anthropic-Stream: Delta-Parser liest content_block_delta.delta.text. - nitro.config.ts: anthropicApiKey deklariert (ANTHROPIC_API_KEY). - plan-features.ts: Legend-Voice zurück auf ElevenLabs eleven_turbo_v2_5 (Cartesia-Übergang nach Zahlungsproblem 2026-06-08 aufgehoben, neuer Key). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
21c1e31877
commit
1a270739bc
@ -81,6 +81,9 @@ export default defineNitroConfig({
|
|||||||
"",
|
"",
|
||||||
openaiApiKey:
|
openaiApiKey:
|
||||||
process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "",
|
process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "",
|
||||||
|
// Direkter Anthropic-Pfad (native Messages-API) — primärer Lyra-Provider.
|
||||||
|
anthropicApiKey:
|
||||||
|
process.env.ANTHROPIC_API_KEY ?? process.env.NUXT_ANTHROPIC_API_KEY ?? "",
|
||||||
groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "",
|
groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "",
|
||||||
googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "",
|
googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "",
|
||||||
geminiApiKey: process.env.GEMINI_API_KEY ?? "",
|
geminiApiKey: process.env.GEMINI_API_KEY ?? "",
|
||||||
|
|||||||
@ -654,28 +654,69 @@ export default defineEventHandler(async (event) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ─────────────────
|
// ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ─────────────────
|
||||||
// Default-Chain: Gemini Flash Lite (schnell+billig+warm) → Gemini 2.5 Flash
|
// Default-Chain: Anthropic Haiku 4.5 (direkt, native Messages-API — warmer
|
||||||
// (smarter Fallback) → OpenAI gpt-4o-mini (Last-Resort, anderer Provider).
|
// Lyra-Ton, günstig+zuverlässig) → Gemini Flash Lite → Gemini 2.5 Flash →
|
||||||
// OpenRouter + Groq sind aktuell ohne Quota/Credits — entfernt aus Chain.
|
// OpenAI gpt-4o-mini (Last-Resort, anderer Provider). OpenRouter + Groq sind
|
||||||
|
// aktuell ohne Quota/Credits — entfernt aus Chain.
|
||||||
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
||||||
const plan =
|
const plan =
|
||||||
planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
||||||
const llmProvider = "gemini-flash-lite";
|
const llmProvider = "anthropic-haiku";
|
||||||
|
|
||||||
type Candidate = {
|
type Candidate = {
|
||||||
provider: "groq" | "openrouter" | "gemini" | "openai";
|
provider: "anthropic" | "groq" | "openrouter" | "gemini" | "openai";
|
||||||
model: string;
|
model: string;
|
||||||
};
|
};
|
||||||
const candidates: Candidate[] = [
|
const candidates: Candidate[] = [
|
||||||
|
{ provider: "anthropic", model: "claude-haiku-4-5" },
|
||||||
{ provider: "gemini", model: "gemini-2.5-flash-lite" },
|
{ provider: "gemini", model: "gemini-2.5-flash-lite" },
|
||||||
{ provider: "gemini", model: "gemini-2.5-flash" },
|
{ provider: "gemini", model: "gemini-2.5-flash" },
|
||||||
{ provider: "openai", model: "gpt-4o-mini" },
|
{ provider: "openai", model: "gpt-4o-mini" },
|
||||||
];
|
];
|
||||||
|
|
||||||
async function tryModel(
|
async function tryModel(
|
||||||
providerName: "groq" | "openrouter" | "gemini" | "openai",
|
providerName: "anthropic" | "groq" | "openrouter" | "gemini" | "openai",
|
||||||
model: string,
|
model: string,
|
||||||
) {
|
) {
|
||||||
|
// ── Direkter Anthropic-Pfad: native Messages-API (kein OpenAI-Shape) ──
|
||||||
|
if (providerName === "anthropic") {
|
||||||
|
const key = config.anthropicApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
// Anthropic verlangt user-first; .slice(-8) kann mit assistant beginnen.
|
||||||
|
const aMsgs =
|
||||||
|
trimmed[0]?.role === "assistant" ? trimmed.slice(1) : trimmed;
|
||||||
|
try {
|
||||||
|
const res = await $fetch<{ content: { type: string; text: string }[] }>(
|
||||||
|
"https://api.anthropic.com/v1/messages",
|
||||||
|
{
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"x-api-key": key,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: {
|
||||||
|
model,
|
||||||
|
max_tokens: 500,
|
||||||
|
system: systemPrompt,
|
||||||
|
messages: aMsgs,
|
||||||
|
},
|
||||||
|
timeout: 15000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
return (
|
||||||
|
res.content?.find((b) => b.type === "text")?.text ?? null
|
||||||
|
);
|
||||||
|
} catch (err: any) {
|
||||||
|
console.warn(
|
||||||
|
`[coach/tryModel] anthropic:${model} FAIL:`,
|
||||||
|
err?.statusCode ?? err?.status ?? "?",
|
||||||
|
err?.data?.error?.message ?? err?.message ?? String(err).slice(0, 200),
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const p = PROVIDER_CONFIG[providerName];
|
const p = PROVIDER_CONFIG[providerName];
|
||||||
const key = config[p.keyName];
|
const key = config[p.keyName];
|
||||||
if (!key) return null;
|
if (!key) return null;
|
||||||
|
|||||||
@ -231,77 +231,158 @@ export default defineEventHandler(async (event) => {
|
|||||||
// Tier-based LLM: Pro=Groq (sachlich+schnell), Legend=Haiku 4.5 (warm+fast),
|
// Tier-based LLM: Pro=Groq (sachlich+schnell), Legend=Haiku 4.5 (warm+fast),
|
||||||
// Free=Groq (kostenkontrolle). User kann via Toggle override (debug);
|
// Free=Groq (kostenkontrolle). User kann via Toggle override (debug);
|
||||||
// 'auto' (oder undefined) → plan-based default.
|
// 'auto' (oder undefined) → plan-based default.
|
||||||
|
// Default-Kette: Anthropic Haiku 4.5 (direkt, native Messages-API — warmer
|
||||||
|
// Lyra-Ton, günstig+zuverlässig) → Gemini Flash Lite → OpenAI gpt-4o-mini.
|
||||||
|
// Die ERSTE Kandidatin, die einen OK-Stream liefert, gewinnt — SOS darf im
|
||||||
|
// Krisen-Kontext nie ohne Antwort bleiben (vorher: single fetch ohne Fallback
|
||||||
|
// → bei Provider-Fail sofort Krisen-Text). Debug-Toggle override ohne Fallback.
|
||||||
const userToggle = sessionData.llmProvider;
|
const userToggle = sessionData.llmProvider;
|
||||||
let llmProvider: string;
|
type SosCandidate = { provider: string; model: string };
|
||||||
if (userToggle && userToggle !== "auto") {
|
const candidates: SosCandidate[] =
|
||||||
llmProvider = userToggle;
|
userToggle && userToggle !== "auto"
|
||||||
} else {
|
? [{ provider: userToggle, model: "" }]
|
||||||
// Default chain: Gemini Flash Lite (schnell+billig+warm) für alle Pläne.
|
: [
|
||||||
// OpenRouter (Haiku/Sonnet) + Groq haben aktuell keine Quota/Credits.
|
{ provider: "anthropic-haiku", model: "claude-haiku-4-5" },
|
||||||
llmProvider = "gemini-flash-lite";
|
{ provider: "gemini-flash-lite", model: "gemini-2.5-flash-lite" },
|
||||||
}
|
{ provider: "openai-mini", model: "gpt-4o-mini" },
|
||||||
let upstreamUrl: string;
|
];
|
||||||
let upstreamKey: string | undefined;
|
|
||||||
let upstreamModel: string;
|
|
||||||
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
|
||||||
let upstreamProviderField: { sort: string } | undefined;
|
|
||||||
if (llmProvider === "gemini-flash-lite" || llmProvider === "gemini-flash") {
|
|
||||||
upstreamUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions";
|
|
||||||
upstreamKey = config.geminiApiKey as string | undefined;
|
|
||||||
upstreamModel = llmProvider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite";
|
|
||||||
} else if (llmProvider === "groq-llama") {
|
|
||||||
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
|
||||||
upstreamKey = config.groqApiKey as string | undefined;
|
|
||||||
upstreamModel = "llama-3.3-70b-versatile";
|
|
||||||
} else {
|
|
||||||
upstreamUrl = "https://openrouter.ai/api/v1/chat/completions";
|
|
||||||
upstreamKey = config.openrouterApiKey as string | undefined;
|
|
||||||
upstreamModel = llmProvider === "openrouter-haiku"
|
|
||||||
? "anthropic/claude-haiku-4.5"
|
|
||||||
: "anthropic/claude-sonnet-4.5";
|
|
||||||
upstreamHeaders["HTTP-Referer"] = "https://rebreak.org";
|
|
||||||
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
|
||||||
upstreamProviderField = { sort: "latency" };
|
|
||||||
}
|
|
||||||
// Fallback: wenn gewählter Provider keinen Key hat → OpenAI gpt-4o-mini als Anker
|
|
||||||
if (!upstreamKey && config.openaiApiKey) {
|
|
||||||
console.warn(`[coach/sos-stream] ${llmProvider} key missing → fallback openai/gpt-4o-mini`);
|
|
||||||
llmProvider = "openai-mini";
|
|
||||||
upstreamUrl = "https://api.openai.com/v1/chat/completions";
|
|
||||||
upstreamKey = config.openaiApiKey as string;
|
|
||||||
upstreamModel = "gpt-4o-mini";
|
|
||||||
upstreamProviderField = undefined;
|
|
||||||
}
|
|
||||||
if (!upstreamKey) {
|
|
||||||
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
|
||||||
}
|
|
||||||
upstreamHeaders.Authorization = `Bearer ${upstreamKey}`;
|
|
||||||
console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`);
|
|
||||||
|
|
||||||
const upstream = await fetch(upstreamUrl, {
|
// Baut die Upstream-Request-Config für einen Provider; null wenn Key fehlt.
|
||||||
method: "POST",
|
// Anthropic nutzt die native Messages-API (system top-level, x-api-key);
|
||||||
headers: upstreamHeaders,
|
// alle anderen das OpenAI-kompatible /chat/completions-Shape.
|
||||||
|
const buildUpstream = (
|
||||||
|
provider: string,
|
||||||
|
): { url: string; headers: Record<string, string>; body: string; label: string } | null => {
|
||||||
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||||||
|
if (provider === "anthropic-haiku") {
|
||||||
|
const key = config.anthropicApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
headers["x-api-key"] = key;
|
||||||
|
headers["anthropic-version"] = "2023-06-01";
|
||||||
|
// Anthropic verlangt user-first; trimmed kann mit assistant beginnen.
|
||||||
|
const aMsgs = trimmed[0]?.role === "assistant" ? trimmed.slice(1) : trimmed;
|
||||||
|
return {
|
||||||
|
url: "https://api.anthropic.com/v1/messages",
|
||||||
|
headers,
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: upstreamModel,
|
model: "claude-haiku-4-5",
|
||||||
|
max_tokens: 400,
|
||||||
|
stream: true,
|
||||||
|
system: systemPrompt,
|
||||||
|
messages: aMsgs,
|
||||||
|
}),
|
||||||
|
label: "anthropic:claude-haiku-4-5",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (provider === "gemini-flash-lite" || provider === "gemini-flash") {
|
||||||
|
const key = config.geminiApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
headers.Authorization = `Bearer ${key}`;
|
||||||
|
const model = provider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite";
|
||||||
|
return {
|
||||||
|
url: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
max_tokens: 400,
|
max_tokens: 400,
|
||||||
stream: true,
|
stream: true,
|
||||||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||||
...(upstreamProviderField ? { provider: upstreamProviderField } : {}),
|
|
||||||
}),
|
}),
|
||||||
|
label: `gemini:${model}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (provider === "groq-llama") {
|
||||||
|
const key = config.groqApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
headers.Authorization = `Bearer ${key}`;
|
||||||
|
return {
|
||||||
|
url: "https://api.groq.com/openai/v1/chat/completions",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "llama-3.3-70b-versatile",
|
||||||
|
max_tokens: 400,
|
||||||
|
stream: true,
|
||||||
|
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||||
|
}),
|
||||||
|
label: "groq:llama-3.3-70b",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (provider === "openrouter-haiku" || provider === "openrouter-sonnet") {
|
||||||
|
const key = config.openrouterApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
headers.Authorization = `Bearer ${key}`;
|
||||||
|
headers["HTTP-Referer"] = "https://rebreak.org";
|
||||||
|
headers["X-Title"] = "ReBreak SOS";
|
||||||
|
const model =
|
||||||
|
provider === "openrouter-haiku"
|
||||||
|
? "anthropic/claude-haiku-4.5"
|
||||||
|
: "anthropic/claude-sonnet-4.5";
|
||||||
|
return {
|
||||||
|
url: "https://openrouter.ai/api/v1/chat/completions",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
max_tokens: 400,
|
||||||
|
stream: true,
|
||||||
|
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||||
|
provider: { sort: "latency" },
|
||||||
|
}),
|
||||||
|
label: `openrouter:${model}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// openai-mini — letzter Anker (anderer Provider).
|
||||||
|
if (provider === "openai-mini") {
|
||||||
|
const key = config.openaiApiKey as string | undefined;
|
||||||
|
if (!key) return null;
|
||||||
|
headers.Authorization = `Bearer ${key}`;
|
||||||
|
return {
|
||||||
|
url: "https://api.openai.com/v1/chat/completions",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "gpt-4o-mini",
|
||||||
|
max_tokens: 400,
|
||||||
|
stream: true,
|
||||||
|
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||||
|
}),
|
||||||
|
label: "openai:gpt-4o-mini",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Erste Kandidatin, die einen OK-Stream liefert.
|
||||||
|
let upstream: Response | undefined;
|
||||||
|
let usedLabel = "none";
|
||||||
|
for (const c of candidates) {
|
||||||
|
const cfg = buildUpstream(c.provider);
|
||||||
|
if (!cfg) continue;
|
||||||
|
try {
|
||||||
|
const resp = await fetch(cfg.url, {
|
||||||
|
method: "POST",
|
||||||
|
headers: cfg.headers,
|
||||||
|
body: cfg.body,
|
||||||
});
|
});
|
||||||
|
if (resp.ok && resp.body) {
|
||||||
|
upstream = resp;
|
||||||
|
usedLabel = cfg.label;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const errText = await resp.text().catch(() => "");
|
||||||
|
console.error(
|
||||||
|
`[coach/sos-stream] ${cfg.label} upstream error:`,
|
||||||
|
resp.status,
|
||||||
|
errText.slice(0, 300),
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`[coach/sos-stream] ${cfg.label} fetch threw:`, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ── Upstream-Fehler: SSE-Header trotzdem setzen, dann Fallback senden ──────
|
// ── Upstream-Fehler: SSE-Header trotzdem setzen, dann Fallback senden ──────
|
||||||
// (b) Timeout/Leer-Fallback: Kein 502-Throw im Krisen-Kontext — User muss
|
// (b) Timeout/Leer-Fallback: Kein 502-Throw im Krisen-Kontext — User muss
|
||||||
// immer eine Antwort sehen. Bei LLM-Fehler sofort Krisen-Fallback liefern.
|
// immer eine Antwort sehen. Bei LLM-Fehler sofort Krisen-Fallback liefern.
|
||||||
const upstreamFailed = !upstream.ok || !upstream.body;
|
const upstreamFailed = !upstream || !upstream.body;
|
||||||
if (upstreamFailed) {
|
console.log(`[coach/sos-stream] using ${usedLabel}`);
|
||||||
const errText = await upstream.text().catch(() => "");
|
|
||||||
console.error(
|
|
||||||
"[coach/sos-stream] upstream error:",
|
|
||||||
upstream.status,
|
|
||||||
errText.slice(0, 300),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Direkt zu Node res schreiben — sendStream(ReadableStream) pumpt pull() in Nitro nicht zuverlässig
|
// Direkt zu Node res schreiben — sendStream(ReadableStream) pumpt pull() in Nitro nicht zuverlässig
|
||||||
const res = event.node.res;
|
const res = event.node.res;
|
||||||
@ -352,7 +433,7 @@ export default defineEventHandler(async (event) => {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const reader = upstream.body.getReader();
|
const reader = upstream!.body!.getReader();
|
||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
let buffer = "";
|
let buffer = "";
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
@ -382,8 +463,14 @@ export default defineEventHandler(async (event) => {
|
|||||||
try {
|
try {
|
||||||
const json = JSON.parse(payload) as {
|
const json = JSON.parse(payload) as {
|
||||||
choices?: { delta?: { content?: string } }[];
|
choices?: { delta?: { content?: string } }[];
|
||||||
|
type?: string;
|
||||||
|
delta?: { type?: string; text?: string };
|
||||||
};
|
};
|
||||||
const delta = json.choices?.[0]?.delta?.content;
|
// OpenAI-kompatibel: choices[].delta.content
|
||||||
|
// Anthropic nativ: event content_block_delta → delta.text
|
||||||
|
const delta =
|
||||||
|
json.choices?.[0]?.delta?.content ??
|
||||||
|
(json.type === "content_block_delta" ? json.delta?.text : undefined);
|
||||||
if (delta) {
|
if (delta) {
|
||||||
fullText += delta;
|
fullText += delta;
|
||||||
chunkCount++;
|
chunkCount++;
|
||||||
|
|||||||
@ -120,16 +120,12 @@ export const PLAN_LIMITS: Record<Exclude<Plan, "free">, PlanLimits> = {
|
|||||||
{ provider: "groq", model: "llama-3.3-70b-versatile" },
|
{ provider: "groq", model: "llama-3.3-70b-versatile" },
|
||||||
],
|
],
|
||||||
aiProvider: "openrouter",
|
aiProvider: "openrouter",
|
||||||
// ⚠️ TEMPORÄR (2026-06-08): ElevenLabs-Account hat ein Zahlungsproblem
|
// Legend → ElevenLabs Turbo v2.5, unlimited. Voice-Picker / lyraVoiceId greift.
|
||||||
// (HTTP 401 payment_issue) → Legend würde sonst 502 bekommen. Übergangsweise
|
// (2026-06-08 war wg. ElevenLabs-Zahlungsproblem temporär auf Cartesia umgeleitet;
|
||||||
// auf Cartesia (sonic-3) umgeleitet, damit Legend-User trotzdem Sprachausgabe
|
// 2026-06-11 neuer Key + bezahlt → zurückgesetzt.)
|
||||||
// haben. ZURÜCKSETZEN sobald die ElevenLabs-Rechnung bezahlt ist:
|
|
||||||
// provider: "elevenlabs", model: "eleven_turbo_v2_5", dailyQuotaSeconds: 0
|
|
||||||
// (Hinweis: mit Cartesia greift der Legend-Voice-Picker / lyraVoiceId nicht —
|
|
||||||
// Legend hört solange Cartesias Default-Stimme wie Pro.)
|
|
||||||
voice: {
|
voice: {
|
||||||
provider: "cartesia",
|
provider: "elevenlabs",
|
||||||
model: "sonic-3",
|
model: "eleven_turbo_v2_5",
|
||||||
dailyQuotaSeconds: 0, // Legend bleibt unlimited
|
dailyQuotaSeconds: 0, // Legend bleibt unlimited
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user