let profile vor try-block hoisten, damit es im plan-routing-Fallback (line 210, profile?.plan) sichtbar ist. Vorher: const profile innerhalb try-block → block-scoped → ReferenceError außerhalb. Demographics-Block-Injection added — gracefully no-op wenn neue Felder (birthYear/gender/etc.) noch nicht im DB-Schema sind (optional chaining). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
397 lines
15 KiB
TypeScript
397 lines
15 KiB
TypeScript
/**
|
||
* GET /api/coach/sos-stream?session=xyz — Streaming SOS Coach (Claude Sonnet 4.5)
|
||
*
|
||
* Streamt Sonnets Antwort als SSE (Server-Sent Events).
|
||
* Frontend nutzt react-native-sse (EventSource) für progressives Streaming.
|
||
*
|
||
* Format (SSE-Standard):
|
||
* event: message
|
||
* data: <text chunk>
|
||
*
|
||
* event: chips
|
||
* data: [{"label":"...","action":"..."}]
|
||
*
|
||
* Flow:
|
||
* 1. Client POSTet zu /api/coach/sos-session → { sessionId }
|
||
* 2. Client öffnet GET /api/coach/sos-stream?session=xyz via EventSource
|
||
* 3. Backend lädt Session-Daten (messages/locale) aus In-Memory Store
|
||
* 4. Streamt Antwort als SSE-Events
|
||
*
|
||
* Fallback: bei Sonnet-Fehler wirft 503; Frontend kann auf /coach/message zurückfallen.
|
||
*/
|
||
import { COACH_SYSTEM_PROMPT } from "./message.post";
|
||
import { getMemoriesForUser, markReferenced } from "../../db/lyraMemory";
|
||
import { extractAndStoreMemories } from "../../utils/lyraMemoryExtract";
|
||
import { getProfile } from "../../db/profile";
|
||
|
||
const SOS_INSTRUCTION = `\n\nDU BEFINDEST DICH IN EINEM AKUTEN SOS-MOMENT. WICHTIGE REGELN:
|
||
- Antworte als REINER TEXT, KEINE JSON-Wrapper, KEIN Markdown, KEINE Aufzählungen.
|
||
- Sei warm, präsent, menschlich — wie eine echte Freundin am Telefon.
|
||
- KURZ: 1-2 Sätze, max 3 nur in seltenen Ausnahmen. Ruhiger Rhythmus mit kurzen Pausen.
|
||
- Validiere zuerst das Gefühl, dann sanfte Frage ODER Vorschlag.
|
||
|
||
ABSOLUT KRITISCH — NIEMALS die Chip-Optionen im Prosa-Text auflisten oder paraphrasieren.
|
||
Der Prosa-Text wird dem User VORGELESEN (TTS) — Chip-Aufzählung klingt unnatürlich
|
||
("warum sprichst du eine Liste?"). Die Chips erscheinen visuell als Buttons.
|
||
|
||
✗ FALSCH: "Magst du atmen oder lieber spielen?" (= Aufzählung)
|
||
✗ FALSCH: "Du kannst eine Atemübung oder ein Spiel machen."
|
||
✗ FALSCH: "Hier sind ein paar Optionen: Atmen, Spielen oder Reden."
|
||
✗ FALSCH: "Magst du mit mir reden, eine Atemübung machen oder ein Spiel?"
|
||
|
||
✓ RICHTIG: "Magst du was probieren?"
|
||
✓ RICHTIG: "Was hilft dir gerade?"
|
||
✓ RICHTIG: "Hier hast du Möglichkeiten."
|
||
✓ RICHTIG: "Was passt für dich grad?"
|
||
✓ RICHTIG: "Ich bin da. Was brauchst du jetzt?"
|
||
|
||
- Am ENDE der Antwort genau EINE neue Zeile mit Chips im Format:
|
||
[[CHIPS]]:[{"label":"…","action":"…"},…]
|
||
- Erlaubte Chip-Actions: breathing, game_picker, send_text:<text>, overcome, share_success, rate_session, close, show_stats, need_help, feel:<text>
|
||
- KEIN Text nach der CHIPS-Zeile`;
|
||
|
||
export default defineEventHandler(async (event) => {
|
||
const user = await requireUser(event);
|
||
|
||
// Session-ID aus Query-Parameter holen
|
||
const query = getQuery(event);
|
||
const sessionId = query.session as string | undefined;
|
||
|
||
if (!sessionId) {
|
||
throw createError({
|
||
statusCode: 400,
|
||
message: "session query param fehlt",
|
||
});
|
||
}
|
||
|
||
// Session-Daten laden (messages + locale)
|
||
const { getSosSession, deleteSosSession } = await import(
|
||
"../../utils/sosSessions"
|
||
);
|
||
const sessionData = getSosSession(sessionId);
|
||
|
||
if (!sessionData) {
|
||
throw createError({
|
||
statusCode: 404,
|
||
message: "Session nicht gefunden oder abgelaufen (TTL 5min)",
|
||
});
|
||
}
|
||
|
||
// Security: Session gehört diesem User
|
||
if (sessionData.userId !== user.id) {
|
||
throw createError({ statusCode: 403, message: "Nicht deine Session" });
|
||
}
|
||
|
||
const { messages, locale } = sessionData;
|
||
|
||
// Session löschen (One-Time-Use)
|
||
deleteSosSession(sessionId);
|
||
|
||
const config = useRuntimeConfig();
|
||
|
||
// System-Prompt: Coach-Basis + SOS-Streaming-Regeln
|
||
const LANG: Record<string, string> = {
|
||
de: "Antworte IMMER auf Deutsch.",
|
||
en: "Always respond in English.",
|
||
tr: "Her zaman Türkçe yanıt ver.",
|
||
ar: "رد دائماً باللغة العربية.",
|
||
};
|
||
const lang = LANG[locale ?? "de"] ?? LANG.de;
|
||
|
||
// Memory-Injection: Lyra-Erinnerungen aus früheren Sessions laden
|
||
let memoryBlock = "";
|
||
let loadedMemoryIds: string[] = [];
|
||
try {
|
||
const memories = await getMemoriesForUser(user.id);
|
||
if (memories.length > 0) {
|
||
loadedMemoryIds = memories.map((m) => m.id);
|
||
const TYPE_LABELS: Record<string, string> = {
|
||
trigger: "Trigger",
|
||
habit: "Gewohnheit",
|
||
strength: "Stärke",
|
||
relationship: "Wichtige Person",
|
||
milestone: "Meilenstein",
|
||
pain_point: "Sensibles Thema",
|
||
goal: "Ziel",
|
||
preference: "Präferenz",
|
||
};
|
||
const lines = memories
|
||
.map((m) => `- ${TYPE_LABELS[m.type] ?? m.type}: ${m.content}`)
|
||
.join("\n");
|
||
memoryBlock = `[WAS DU ÜBER DIESEN USER WEISST — aus früheren Gesprächen]\n${lines}\nNutze diese Infos um GENAU diesen Menschen anzusprechen — wie ein echter Begleiter, nicht eine generische KI. Sprich Personen mit Namen an. Erinnere an Stärken die dir bekannt sind.\n\n`;
|
||
console.log(
|
||
`[lyra-memory] injected ${memories.length} memories for ${user.id}`,
|
||
);
|
||
}
|
||
} catch (e) {
|
||
// Nicht kritisch — Memory-Fehler dürfen SOS nicht blockieren
|
||
console.error("[lyra-memory] load error (non-fatal):", e);
|
||
}
|
||
|
||
// Nickname-Injektion + Demographics-Block (Pattern aus message.post.ts) —
|
||
// sonst halluziniert Lyra Namen wie "Max" weil sie keinen Anker hat.
|
||
// WICHTIG: Demographie-Daten werden NUR vom User über die Profile-Form
|
||
// gesetzt (memory/feedback_demographics_user_initiated.md) — Lyra
|
||
// darf sie LESEN aber NIE EXTRAHIEREN.
|
||
let nicknamePrefix = "";
|
||
let demographicsBlock = "";
|
||
let profile: Awaited<ReturnType<typeof getProfile>> = null;
|
||
try {
|
||
profile = await getProfile(user.id);
|
||
const nickname = profile?.nickname || profile?.username;
|
||
if (nickname) {
|
||
nicknamePrefix = `NUTZER-NAME: Der Nutzer heißt "${nickname}" – nenne ihn gelegentlich bei seinem Namen wenn es natürlich passt.\n\n`;
|
||
}
|
||
|
||
// Demographics-Block GANZ separat von memoryBlock — strukturierte
|
||
// DiGA-Daten, NICHT extrahierbar, NICHT änderbar durch Lyra.
|
||
const demoLines: string[] = [];
|
||
if (profile?.birthYear) {
|
||
const age = new Date().getFullYear() - profile.birthYear;
|
||
demoLines.push(`- Alter: ca. ${age} Jahre (Geburtsjahr ${profile.birthYear})`);
|
||
}
|
||
if (profile?.gender) {
|
||
const GENDER_LABEL: Record<string, string> = {
|
||
male: "männlich",
|
||
female: "weiblich",
|
||
diverse: "divers",
|
||
no_answer: "keine Angabe",
|
||
};
|
||
demoLines.push(`- Geschlecht: ${GENDER_LABEL[profile.gender] ?? profile.gender}`);
|
||
}
|
||
if (profile?.maritalStatus) {
|
||
const MS_LABEL: Record<string, string> = {
|
||
single: "ledig",
|
||
partnered: "in Partnerschaft",
|
||
married: "verheiratet",
|
||
divorced: "geschieden",
|
||
widowed: "verwitwet",
|
||
no_answer: "keine Angabe",
|
||
};
|
||
demoLines.push(
|
||
`- Familienstand: ${MS_LABEL[profile.maritalStatus] ?? profile.maritalStatus}`,
|
||
);
|
||
}
|
||
if (profile?.profession) {
|
||
demoLines.push(`- Beruf: ${profile.profession}`);
|
||
}
|
||
if (profile?.bundesland) {
|
||
demoLines.push(`- Bundesland: ${profile.bundesland}`);
|
||
}
|
||
if (profile?.city) {
|
||
demoLines.push(`- Stadt: ${profile.city}`);
|
||
}
|
||
if (demoLines.length > 0) {
|
||
demographicsBlock = `[USER-DEMOGRAPHIE — vom User selbst angegeben]\n${demoLines.join("\n")}\nNutze diese Infos nur für Empathie + Kontext (z.B. "Schichtarbeit erschwert deinen Rhythmus"). Frage NIEMALS nach diesen Daten — der User pflegt sie selbst in der Profile-Form. Diese Daten persistieren in keinem Memory-Store, sondern sind strukturierte DiGA-Daten.\n\n`;
|
||
}
|
||
} catch (e) {
|
||
console.error("[sos-stream] profile load (non-fatal):", e);
|
||
}
|
||
|
||
const systemPrompt = `${nicknamePrefix}${demographicsBlock}${memoryBlock}${lang}\n\n${COACH_SYSTEM_PROMPT.replace("{{PLAN_DETAILS}}", "")}${SOS_INSTRUCTION}`;
|
||
|
||
// Erste Nachricht muss user sein
|
||
const firstUserIdx = messages.findIndex((m) => m.role === "user");
|
||
const conversation =
|
||
firstUserIdx > 0 ? messages.slice(firstUserIdx) : messages;
|
||
const trimmed = conversation.slice(-8);
|
||
|
||
// LLM-Routing: client schickt llmProvider via /sos-session-Body (Toggle).
|
||
// Default openrouter-sonnet. sort:latency bei Anthropic über OR spart 30-58% TTFB
|
||
// (server-curl-bench gemessen). Groq bypasst OpenRouter-Hop für ~157ms TTFB.
|
||
// Tier-based LLM: Pro=Groq (sachlich+schnell), Legend=Haiku 4.5 (warm+fast),
|
||
// Free=Groq (kostenkontrolle). User kann via Toggle override (debug);
|
||
// 'auto' (oder undefined) → plan-based default.
|
||
const userToggle = sessionData.llmProvider;
|
||
let llmProvider: string;
|
||
if (userToggle && userToggle !== "auto") {
|
||
llmProvider = userToggle;
|
||
} else {
|
||
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
||
// legacy "premium"/"standard" → legend/pro
|
||
const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
||
llmProvider = plan === "legend" ? "openrouter-haiku" : "groq-llama";
|
||
}
|
||
let upstreamUrl: string;
|
||
let upstreamKey: string | undefined;
|
||
let upstreamModel: string;
|
||
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
||
let upstreamProviderField: { sort: string } | undefined;
|
||
if (llmProvider === "groq-llama") {
|
||
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
||
upstreamKey = config.groqApiKey as string | undefined;
|
||
upstreamModel = "llama-3.3-70b-versatile";
|
||
} else {
|
||
upstreamUrl = "https://openrouter.ai/api/v1/chat/completions";
|
||
upstreamKey = config.openrouterApiKey as string | undefined;
|
||
upstreamModel = llmProvider === "openrouter-haiku"
|
||
? "anthropic/claude-haiku-4.5"
|
||
: "anthropic/claude-sonnet-4.5";
|
||
upstreamHeaders["HTTP-Referer"] = "https://rebreak.org";
|
||
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
||
upstreamProviderField = { sort: "latency" };
|
||
}
|
||
if (!upstreamKey) {
|
||
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
||
}
|
||
upstreamHeaders.Authorization = `Bearer ${upstreamKey}`;
|
||
console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`);
|
||
|
||
const upstream = await fetch(upstreamUrl, {
|
||
method: "POST",
|
||
headers: upstreamHeaders,
|
||
body: JSON.stringify({
|
||
model: upstreamModel,
|
||
max_tokens: 400,
|
||
stream: true,
|
||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||
...(upstreamProviderField ? { provider: upstreamProviderField } : {}),
|
||
}),
|
||
});
|
||
|
||
if (!upstream.ok || !upstream.body) {
|
||
const errText = await upstream.text().catch(() => "");
|
||
console.error(
|
||
"[coach/sos-stream] upstream error:",
|
||
upstream.status,
|
||
errText.slice(0, 300),
|
||
);
|
||
throw createError({
|
||
statusCode: 502,
|
||
message: "SOS-Stream nicht verfügbar",
|
||
});
|
||
}
|
||
|
||
// Direkt zu Node res schreiben — sendStream(ReadableStream) pumpt pull() in Nitro nicht zuverlässig
|
||
const res = event.node.res;
|
||
res.statusCode = 200;
|
||
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
|
||
res.setHeader("Cache-Control", "no-store");
|
||
res.setHeader("X-Accel-Buffering", "no");
|
||
res.setHeader("Connection", "keep-alive");
|
||
res.flushHeaders?.();
|
||
|
||
const write = (chunk: string) => {
|
||
try {
|
||
res.write(chunk);
|
||
} catch (e) {
|
||
console.error("[coach/sos-stream] write error:", e);
|
||
}
|
||
};
|
||
|
||
console.log(
|
||
`[coach/sos-stream] stream started for ${user.id}, session ${sessionId}`,
|
||
);
|
||
write(": connected\n\n");
|
||
|
||
const reader = upstream.body.getReader();
|
||
const decoder = new TextDecoder();
|
||
let buffer = "";
|
||
let fullText = "";
|
||
let chunkCount = 0;
|
||
|
||
// Client disconnect detection
|
||
let aborted = false;
|
||
res.on("close", () => {
|
||
aborted = true;
|
||
reader.cancel().catch(() => {});
|
||
});
|
||
|
||
let chipsMarkerSeen = false;
|
||
try {
|
||
while (!aborted) {
|
||
const { value, done } = await reader.read();
|
||
if (done) break;
|
||
buffer += decoder.decode(value, { stream: true });
|
||
const lines = buffer.split("\n");
|
||
buffer = lines.pop() ?? "";
|
||
|
||
for (const line of lines) {
|
||
const trimLine = line.trim();
|
||
if (!trimLine || !trimLine.startsWith("data:")) continue;
|
||
const payload = trimLine.slice(5).trim();
|
||
if (payload === "[DONE]") continue;
|
||
try {
|
||
const json = JSON.parse(payload) as {
|
||
choices?: { delta?: { content?: string } }[];
|
||
};
|
||
const delta = json.choices?.[0]?.delta?.content;
|
||
if (delta) {
|
||
fullText += delta;
|
||
chunkCount++;
|
||
|
||
// ─── CHIPS-Marker nicht streamen ───
|
||
if (chipsMarkerSeen) continue; // alles nach Marker = Chips-JSON
|
||
|
||
// Prüfe ob fullText jetzt den Marker enthält
|
||
const markerStart = fullText.indexOf("[[");
|
||
if (markerStart >= 0) {
|
||
// Marker (oder Anfang davon) erkannt — nur sicheren Teil bis "[[" senden
|
||
const safeText = fullText.slice(0, markerStart);
|
||
const alreadySent = fullText.length - delta.length;
|
||
if (safeText.length > alreadySent) {
|
||
const toSend = safeText.slice(alreadySent);
|
||
// JSON-encode → Whitespace bleibt erhalten
|
||
write(`event: message\ndata: ${JSON.stringify(toSend)}\n\n`);
|
||
}
|
||
// Wenn vollständiger Marker da: ab jetzt nichts mehr streamen
|
||
if (fullText.indexOf("[[CHIPS]]:") >= 0) {
|
||
chipsMarkerSeen = true;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// Normales Delta — JSON-encoded senden (preserved Whitespace + Newlines)
|
||
write(`event: message\ndata: ${JSON.stringify(delta)}\n\n`);
|
||
}
|
||
} catch {
|
||
// partial line, ignore
|
||
}
|
||
}
|
||
}
|
||
|
||
// Stream zu Ende → [[CHIPS]]: aus fullText extrahieren
|
||
const markerIdx = fullText.indexOf("[[CHIPS]]:");
|
||
let chips: unknown[] = [];
|
||
if (markerIdx >= 0) {
|
||
const chipsRaw = fullText.slice(markerIdx + "[[CHIPS]]:".length);
|
||
try {
|
||
chips = JSON.parse(chipsRaw.trim());
|
||
} catch {
|
||
console.warn(
|
||
"[sos-stream] chips parse failed:",
|
||
chipsRaw.slice(0, 100),
|
||
);
|
||
}
|
||
}
|
||
if (chips.length > 0) {
|
||
write(`event: chips\ndata: ${JSON.stringify(chips)}\n\n`);
|
||
}
|
||
write("event: done\ndata: {}\n\n");
|
||
console.log(
|
||
`[coach/sos-stream] stream done, ${chunkCount} chunks, ${fullText.length} chars`,
|
||
);
|
||
|
||
// Memory-Extraction: fire-and-forget nach Stream-Ende
|
||
// markReferenced + async Extraction laufen parallel, blockieren nichts
|
||
if (loadedMemoryIds.length > 0) {
|
||
markReferenced(loadedMemoryIds).catch(() => {});
|
||
}
|
||
const allMessages: Array<{ role: string; content: string }> = [
|
||
...messages,
|
||
{ role: "assistant", content: fullText.split("[[CHIPS]]:")[0].trim() },
|
||
];
|
||
// Memory-extraction nutzt OpenRouter unabhängig vom user-toggle (Sonnet/Haiku/Groq) —
|
||
// Memory extraction ist eigener LLM-call, kein Bezug zur SOS-Antwort
|
||
const memoryExtractKey = (config.openrouterApiKey as string | undefined) ?? "";
|
||
extractAndStoreMemories(user.id, allMessages, sessionId, memoryExtractKey).catch(
|
||
() => {},
|
||
);
|
||
} catch (err) {
|
||
console.error("[coach/sos-stream] read error:", err);
|
||
write(`event: error\ndata: {"error":"stream failed"}\n\n`);
|
||
} finally {
|
||
res.end();
|
||
}
|
||
});
|