chahinebrini f2e822be95 feat(sos): llmProvider toggle + sort:latency + bench scaffolding
- backend/coach: routing zu Sonnet (default) / Haiku / Groq Llama je nach
  sessionData.llmProvider. sort:latency für Anthropic-Modelle (-30..58% TTFB).
- frontend: LlmProviderToggle (Sonnet/Haiku/Groq pills), llmProvider.ts
  Storage-Helper. sosStream.ts schickt llmProvider im /sos-session-Body.
- bench: SosTtsBenchmark sammelt Marker (req->session, lyra-ttfb, lyra-done,
  tts-fired/headers/body/file, audio-loaded, first-audio); Output als console.table.
- ops: backend/scripts/llm-bench.sh + Python-Variante für realistic SOS-Prompt.
- speak-cartesia + speak-elevenlabs Endpoints (waren ungetracked, jetzt mit drin).
2026-05-06 13:58:07 +02:00

242 lines
8.5 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* GET /api/coach/sos-stream?session=xyz — Streaming SOS Coach (Claude Sonnet 4.5)
*
* Streamt Sonnets Antwort als SSE (Server-Sent Events).
* Frontend nutzt react-native-sse (EventSource) für progressives Streaming.
*
* Format (SSE-Standard):
* event: message
* data: <text chunk>
*
* event: chips
* data: [{"label":"...","action":"..."}]
*
* Flow:
* 1. Client POSTet zu /api/coach/sos-session → { sessionId }
* 2. Client öffnet GET /api/coach/sos-stream?session=xyz via EventSource
* 3. Backend lädt Session-Daten (messages/locale) aus In-Memory Store
* 4. Streamt Antwort als SSE-Events
*
* Fallback: bei Sonnet-Fehler wirft 503; Frontend kann auf /coach/message zurückfallen.
*/
import { COACH_SYSTEM_PROMPT } from "./message.post";
const SOS_INSTRUCTION = `\n\nDU BEFINDEST DICH IN EINEM AKUTEN SOS-MOMENT. WICHTIGE REGELN:
- Antworte als REINER TEXT, KEINE JSON-Wrapper, KEIN Markdown, KEINE Aufzählungen
- Sei warm, präsent, menschlich — wie eine echte Freundin am Telefon
- 2-4 Sätze, ruhiger Rhythmus mit kurzen Pausen (Sätze klar trennen mit . oder !)
- Validiere zuerst das Gefühl, dann sanfte Frage ODER Vorschlag
- Am ENDE der Antwort genau EINE neue Zeile mit Chips im Format:
[[CHIPS]]:[{"label":"…","action":"…"},…]
- Erlaubte Chip-Actions: breathing, game_picker, send_text:<text>, overcome, share_success, rate_session, close, show_stats, need_help, feel:<text>
- KEIN Text nach der CHIPS-Zeile`;
export default defineEventHandler(async (event) => {
const user = await requireUser(event);
// Session-ID aus Query-Parameter holen
const query = getQuery(event);
const sessionId = query.session as string | undefined;
if (!sessionId) {
throw createError({
statusCode: 400,
message: "session query param fehlt",
});
}
// Session-Daten laden (messages + locale)
const { getSosSession, deleteSosSession } = await import(
"../../utils/sosSessions"
);
const sessionData = getSosSession(sessionId);
if (!sessionData) {
throw createError({
statusCode: 404,
message: "Session nicht gefunden oder abgelaufen (TTL 5min)",
});
}
// Security: Session gehört diesem User
if (sessionData.userId !== user.id) {
throw createError({ statusCode: 403, message: "Nicht deine Session" });
}
const { messages, locale } = sessionData;
// Session löschen (One-Time-Use)
deleteSosSession(sessionId);
const config = useRuntimeConfig();
// System-Prompt: Coach-Basis + SOS-Streaming-Regeln
const LANG: Record<string, string> = {
de: "Antworte IMMER auf Deutsch.",
en: "Always respond in English.",
tr: "Her zaman Türkçe yanıt ver.",
ar: "رد دائماً باللغة العربية.",
};
const lang = LANG[locale ?? "de"] ?? LANG.de;
const systemPrompt = `${lang}\n\n${COACH_SYSTEM_PROMPT.replace("{{PLAN_DETAILS}}", "")}${SOS_INSTRUCTION}`;
// Erste Nachricht muss user sein
const firstUserIdx = messages.findIndex((m) => m.role === "user");
const conversation =
firstUserIdx > 0 ? messages.slice(firstUserIdx) : messages;
const trimmed = conversation.slice(-8);
// LLM-Routing: client schickt llmProvider via /sos-session-Body (Toggle).
// Default openrouter-sonnet. sort:latency bei Anthropic über OR spart 30-58% TTFB
// (server-curl-bench gemessen). Groq bypasst OpenRouter-Hop für ~157ms TTFB.
const llmProvider = sessionData.llmProvider ?? "openrouter-sonnet";
let upstreamUrl: string;
let upstreamKey: string | undefined;
let upstreamModel: string;
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
let upstreamProviderField: { sort: string } | undefined;
if (llmProvider === "groq-llama") {
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
upstreamKey = config.groqApiKey as string | undefined;
upstreamModel = "llama-3.3-70b-versatile";
} else {
upstreamUrl = "https://openrouter.ai/api/v1/chat/completions";
upstreamKey = config.openrouterApiKey as string | undefined;
upstreamModel = llmProvider === "openrouter-haiku"
? "anthropic/claude-haiku-4.5"
: "anthropic/claude-sonnet-4.5";
upstreamHeaders["HTTP-Referer"] = "https://rebreak.org";
upstreamHeaders["X-Title"] = "ReBreak SOS";
upstreamProviderField = { sort: "latency" };
}
if (!upstreamKey) {
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
}
upstreamHeaders.Authorization = `Bearer ${upstreamKey}`;
console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`);
const upstream = await fetch(upstreamUrl, {
method: "POST",
headers: upstreamHeaders,
body: JSON.stringify({
model: upstreamModel,
max_tokens: 400,
stream: true,
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
...(upstreamProviderField ? { provider: upstreamProviderField } : {}),
}),
});
if (!upstream.ok || !upstream.body) {
const errText = await upstream.text().catch(() => "");
console.error(
"[coach/sos-stream] upstream error:",
upstream.status,
errText.slice(0, 300),
);
throw createError({
statusCode: 502,
message: "SOS-Stream nicht verfügbar",
});
}
setHeader(event, "Content-Type", "text/event-stream; charset=utf-8");
setHeader(event, "Cache-Control", "no-store");
setHeader(event, "X-Accel-Buffering", "no");
setHeader(event, "Connection", "keep-alive");
// OpenRouter SSE → parse deltas → SSE-Format für react-native-sse
const reader = upstream.body.getReader();
const decoder = new TextDecoder();
const encoder = new TextEncoder();
let buffer = "";
let fullText = "";
const stream = new ReadableStream<Uint8Array>({
start(controller) {
// SSE comment als keepalive (react-native-sse braucht kein Padding)
controller.enqueue(encoder.encode(": connected\n\n"));
},
async pull(controller) {
try {
const { value, done } = await reader.read();
if (done) {
// Stream zu Ende → [[CHIPS]]: aus fullText extrahieren + als event senden
const markerIdx = fullText.indexOf("[[CHIPS]]:");
let message = fullText;
let chips: any[] = [];
if (markerIdx >= 0) {
message = fullText.slice(0, markerIdx).trim();
const chipsRaw = fullText.slice(markerIdx + "[[CHIPS]]:".length);
try {
chips = JSON.parse(chipsRaw.trim());
} catch {
console.warn("[sos-stream] chips parse failed:", chipsRaw);
}
}
// Chips als separates SSE-Event
if (chips.length > 0) {
controller.enqueue(
encoder.encode(
`event: chips\ndata: ${JSON.stringify(chips)}\n\n`,
),
);
}
// Finales done-Event
controller.enqueue(encoder.encode("event: done\ndata: {}\n\n"));
controller.close();
return;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
const trimLine = line.trim();
if (!trimLine || !trimLine.startsWith("data:")) continue;
const payload = trimLine.slice(5).trim();
if (payload === "[DONE]") continue;
try {
const json = JSON.parse(payload) as {
choices?: { delta?: { content?: string } }[];
};
const delta = json.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
// SSE-Spec: Newlines im Payload müssen als separate `data:`-Zeilen kodiert werden
const dataLines = delta
.split("\n")
.map((l: string) => `data: ${l}`)
.join("\n");
const sseChunk = `event: message\n${dataLines}\n\n`;
controller.enqueue(encoder.encode(sseChunk));
}
} catch {
// Ignore parse errors on partial lines
}
}
} catch (err) {
console.error("[coach/sos-stream] read error:", err);
controller.enqueue(
encoder.encode(
`event: error\ndata: ${JSON.stringify({ error: "stream failed" })}\n\n`,
),
);
controller.close();
}
},
cancel() {
reader.cancel().catch(() => {});
},
});
console.log(
`[coach/sos-stream] stream started for ${user.id}, session ${sessionId}`,
);
return sendStream(event, stream as never);
});