- Chat (message.post.ts): neuer nativer anthropic-Branch in tryModel (api.anthropic.com/v1/messages, x-api-key, system top-level), führt die Fallback-Kette claude-haiku-4-5 → gemini-flash-lite → gemini-flash → gpt-4o-mini. - SOS (sos-stream.get.ts): Dispatch-Refactor mit buildUpstream() + Kandidaten- Fallback-Kette (anthropic → gemini → openai). Behebt strukturell den Bug "SOS liefert nur Krisen-Text" (vorher single fetch ohne Fallback). Nativer Anthropic-Stream: Delta-Parser liest content_block_delta.delta.text. - nitro.config.ts: anthropicApiKey deklariert (ANTHROPIC_API_KEY). - plan-features.ts: Legend-Voice zurück auf ElevenLabs eleven_turbo_v2_5 (Cartesia-Übergang nach Zahlungsproblem 2026-06-08 aufgehoben, neuer Key). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
576 lines
23 KiB
TypeScript
576 lines
23 KiB
TypeScript
/**
|
||
* GET /api/coach/sos-stream?session=xyz — Streaming SOS Coach
|
||
*
|
||
* Streamt die LLM-Antwort als SSE (Server-Sent Events).
|
||
* Frontend nutzt react-native-sse (EventSource) für progressives Streaming.
|
||
*
|
||
* Format (SSE-Standard):
|
||
* event: message
|
||
* data: <text chunk>
|
||
*
|
||
* event: chips
|
||
* data: [{"label":"...","action":"..."}]
|
||
*
|
||
* event: crisis_chips ← NEU: deterministischer Krisen-Override
|
||
* data: [{"label":"...","action":"..."}]
|
||
*
|
||
* Flow:
|
||
* 1. Client POSTet zu /api/coach/sos-session → { sessionId, crisisDetected, crisisLevel }
|
||
* 2. Client öffnet GET /api/coach/sos-stream?session=xyz via EventSource
|
||
* 3. Backend lädt Session-Daten inkl. crisisLevel (deterministische Vorprüfung)
|
||
* 4. Streamt Antwort als SSE-Events
|
||
*
|
||
* Safety-Layer (R-LYRA-01):
|
||
* (a) Crisis-Pre-Filter: Wenn crisisLevel != "none" → event: crisis_chips
|
||
* wird als ERSTES Event gesendet, BEVOR das LLM antwortet.
|
||
* Das Frontend kann diese Chips sofort permanent einblenden — unabhängig
|
||
* davon was das LLM zurückliefert.
|
||
* (b) Timeout/Leer-Fallback: Wenn das LLM abbricht, in Timeout läuft oder
|
||
* keine validen Chips liefert, wird ein sicherer Default gesendet.
|
||
* Kein leerer Screen im Krisen-Kontext möglich.
|
||
*
|
||
* Fallback: bei LLM-Fehler → event: crisis_chips + event: done (kein 502-Throw).
|
||
*/
|
||
import { COACH_SYSTEM_PROMPT } from "./message.post";
|
||
import { getCrisisChips, getCrisisFallback } from "../../utils/crisis-filter";
|
||
import type { CrisisLevel } from "../../utils/crisis-filter";
|
||
import { getMemoriesForUser, markReferenced } from "../../db/lyraMemory";
|
||
import { extractAndStoreMemories } from "../../utils/lyraMemoryExtract";
|
||
import { getProfile } from "../../db/profile";
|
||
import { detectLang } from "../../utils/detect-lang";
|
||
|
||
const SOS_INSTRUCTION = `\n\nDU BEFINDEST DICH IN EINEM AKUTEN SOS-MOMENT. WICHTIGE REGELN:
|
||
- Antworte als REINER TEXT, KEINE JSON-Wrapper, KEIN Markdown, KEINE Aufzählungen.
|
||
- Sei warm, präsent, menschlich — wie eine echte Freundin am Telefon.
|
||
- KURZ: 1-2 Sätze, max 3 nur in seltenen Ausnahmen. Ruhiger Rhythmus mit kurzen Pausen.
|
||
- Validiere zuerst das Gefühl, dann sanfte Frage ODER Vorschlag.
|
||
|
||
ABSOLUT KRITISCH — NIEMALS die Chip-Optionen im Prosa-Text auflisten oder paraphrasieren.
|
||
Der Prosa-Text wird dem User VORGELESEN (TTS) — Chip-Aufzählung klingt unnatürlich
|
||
("warum sprichst du eine Liste?"). Die Chips erscheinen visuell als Buttons.
|
||
|
||
✗ FALSCH: "Magst du atmen oder lieber spielen?" (= Aufzählung)
|
||
✗ FALSCH: "Du kannst eine Atemübung oder ein Spiel machen."
|
||
✗ FALSCH: "Hier sind ein paar Optionen: Atmen, Spielen oder Reden."
|
||
✗ FALSCH: "Magst du mit mir reden, eine Atemübung machen oder ein Spiel?"
|
||
|
||
✓ RICHTIG: "Magst du was probieren?"
|
||
✓ RICHTIG: "Was hilft dir gerade?"
|
||
✓ RICHTIG: "Hier hast du Möglichkeiten."
|
||
✓ RICHTIG: "Was passt für dich grad?"
|
||
✓ RICHTIG: "Ich bin da. Was brauchst du jetzt?"
|
||
|
||
- Am ENDE der Antwort genau EINE neue Zeile mit Chips im Format:
|
||
[[CHIPS]]:[{"label":"…","action":"…"},…]
|
||
- Erlaubte Chip-Actions: breathing, game_picker, send_text:<text>, overcome, share_success, rate_session, close, show_stats, need_help, feel:<text>
|
||
- KEIN Text nach der CHIPS-Zeile`;
|
||
|
||
export default defineEventHandler(async (event) => {
|
||
const user = await requireUser(event);
|
||
|
||
// Session-ID aus Query-Parameter holen
|
||
const query = getQuery(event);
|
||
const sessionId = query.session as string | undefined;
|
||
|
||
if (!sessionId) {
|
||
throw createError({
|
||
statusCode: 400,
|
||
message: "session query param fehlt",
|
||
});
|
||
}
|
||
|
||
// Session-Daten laden (messages + locale)
|
||
const { getSosSession, deleteSosSession } = await import(
|
||
"../../utils/sosSessions"
|
||
);
|
||
const sessionData = getSosSession(sessionId);
|
||
|
||
if (!sessionData) {
|
||
throw createError({
|
||
statusCode: 404,
|
||
message: "Session nicht gefunden oder abgelaufen (TTL 5min)",
|
||
});
|
||
}
|
||
|
||
// Security: Session gehört diesem User
|
||
if (sessionData.userId !== user.id) {
|
||
throw createError({ statusCode: 403, message: "Nicht deine Session" });
|
||
}
|
||
|
||
const { messages, locale } = sessionData;
|
||
|
||
// Crisis-Level aus deterministischem Pre-Filter (gesetzt von sos-session.post.ts)
|
||
const crisisLevel = (sessionData.crisisLevel ?? "none") as CrisisLevel;
|
||
|
||
// Session löschen (One-Time-Use)
|
||
deleteSosSession(sessionId);
|
||
|
||
const config = useRuntimeConfig();
|
||
|
||
// Sprach-Instruktion: dynamisch nach User-Message-Sprache + App-Locale-
|
||
// Fallback. Vorher hardcoded "antworte immer auf X" — ignorierte
|
||
// Sprach-Wechsel mitten in der Session.
|
||
const LANG_NAMES: Record<string, string> = {
|
||
de: "German", en: "English", tr: "Turkish", ar: "Arabic",
|
||
fr: "French", es: "Spanish", it: "Italian", pt: "Portuguese",
|
||
ru: "Russian", ja: "Japanese", ko: "Korean", zh: "Chinese",
|
||
he: "Hebrew", th: "Thai",
|
||
};
|
||
const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
|
||
const detectedFromUser = detectLang(lastUserMsg?.content ?? "", locale);
|
||
const appLangCode = locale ? locale.split("-")[0].toLowerCase() : null;
|
||
const appLangName =
|
||
(appLangCode && LANG_NAMES[appLangCode]) || "the user's language";
|
||
const detectedName =
|
||
(detectedFromUser && LANG_NAMES[detectedFromUser]) || null;
|
||
const lang = detectedName
|
||
? `LANGUAGE: Reply in ${detectedName} — match the language the user just wrote in. If they switch languages, switch with them. App default fallback: ${appLangName}.`
|
||
: `LANGUAGE: Reply in ${appLangName} (the user's app language). If the user clearly writes in another language, match theirs.`;
|
||
|
||
// Memory-Injection: Lyra-Erinnerungen aus früheren Sessions laden
|
||
let memoryBlock = "";
|
||
let loadedMemoryIds: string[] = [];
|
||
try {
|
||
const memories = await getMemoriesForUser(user.id);
|
||
if (memories.length > 0) {
|
||
loadedMemoryIds = memories.map((m) => m.id);
|
||
const TYPE_LABELS: Record<string, string> = {
|
||
trigger: "Trigger",
|
||
habit: "Gewohnheit",
|
||
strength: "Stärke",
|
||
relationship: "Wichtige Person",
|
||
milestone: "Meilenstein",
|
||
pain_point: "Sensibles Thema",
|
||
goal: "Ziel",
|
||
preference: "Präferenz",
|
||
};
|
||
const lines = memories
|
||
.map((m) => `- ${TYPE_LABELS[m.type] ?? m.type}: ${m.content}`)
|
||
.join("\n");
|
||
memoryBlock = `[WAS DU ÜBER DIESEN USER WEISST — aus früheren Gesprächen]\n${lines}\nNutze diese Infos um GENAU diesen Menschen anzusprechen — wie ein echter Begleiter, nicht eine generische KI. Sprich Personen mit Namen an. Erinnere an Stärken die dir bekannt sind.\n\n`;
|
||
console.log(
|
||
`[lyra-memory] injected ${memories.length} memories for ${user.id}`,
|
||
);
|
||
}
|
||
} catch (e) {
|
||
// Nicht kritisch — Memory-Fehler dürfen SOS nicht blockieren
|
||
console.error("[lyra-memory] load error (non-fatal):", e);
|
||
}
|
||
|
||
// Nickname-Injektion + Demographics-Block (Pattern aus message.post.ts) —
|
||
// sonst halluziniert Lyra Namen wie "Max" weil sie keinen Anker hat.
|
||
// WICHTIG: Demographie-Daten werden NUR vom User über die Profile-Form
|
||
// gesetzt (memory/feedback_demographics_user_initiated.md) — Lyra
|
||
// darf sie LESEN aber NIE EXTRAHIEREN.
|
||
let nicknamePrefix = "";
|
||
let demographicsBlock = "";
|
||
let profile: Awaited<ReturnType<typeof getProfile>> = null;
|
||
try {
|
||
profile = await getProfile(user.id);
|
||
const nickname = profile?.nickname || profile?.username;
|
||
if (nickname) {
|
||
nicknamePrefix = `NUTZER-NAME: Der Nutzer heißt "${nickname}" – nenne ihn gelegentlich bei seinem Namen wenn es natürlich passt.\n\n`;
|
||
}
|
||
|
||
// Demographics-Block GANZ separat von memoryBlock — strukturierte
|
||
// DiGA-Daten, NICHT extrahierbar, NICHT änderbar durch Lyra.
|
||
const demoLines: string[] = [];
|
||
if (profile?.birthYear) {
|
||
const age = new Date().getFullYear() - profile.birthYear;
|
||
demoLines.push(`- Alter: ca. ${age} Jahre (Geburtsjahr ${profile.birthYear})`);
|
||
}
|
||
if (profile?.gender) {
|
||
const GENDER_LABEL: Record<string, string> = {
|
||
male: "männlich",
|
||
female: "weiblich",
|
||
diverse: "divers",
|
||
no_answer: "keine Angabe",
|
||
};
|
||
demoLines.push(`- Geschlecht: ${GENDER_LABEL[profile.gender] ?? profile.gender}`);
|
||
}
|
||
if (profile?.maritalStatus) {
|
||
const MS_LABEL: Record<string, string> = {
|
||
single: "ledig",
|
||
partnered: "in Partnerschaft",
|
||
married: "verheiratet",
|
||
divorced: "geschieden",
|
||
widowed: "verwitwet",
|
||
no_answer: "keine Angabe",
|
||
};
|
||
demoLines.push(
|
||
`- Familienstand: ${MS_LABEL[profile.maritalStatus] ?? profile.maritalStatus}`,
|
||
);
|
||
}
|
||
if (profile?.profession) {
|
||
demoLines.push(`- Beruf: ${profile.profession}`);
|
||
}
|
||
if (profile?.bundesland) {
|
||
demoLines.push(`- Bundesland: ${profile.bundesland}`);
|
||
}
|
||
if (profile?.city) {
|
||
demoLines.push(`- Stadt: ${profile.city}`);
|
||
}
|
||
if (demoLines.length > 0) {
|
||
demographicsBlock = `[USER-DEMOGRAPHIE — vom User selbst angegeben]\n${demoLines.join("\n")}\nNutze diese Infos nur für Empathie + Kontext (z.B. "Schichtarbeit erschwert deinen Rhythmus"). Frage NIEMALS nach diesen Daten — der User pflegt sie selbst in der Profile-Form. Diese Daten persistieren in keinem Memory-Store, sondern sind strukturierte DiGA-Daten.\n\n`;
|
||
}
|
||
} catch (e) {
|
||
console.error("[sos-stream] profile load (non-fatal):", e);
|
||
}
|
||
|
||
const systemPrompt = `${nicknamePrefix}${demographicsBlock}${memoryBlock}${lang}\n\n${COACH_SYSTEM_PROMPT.replace("{{PLAN_DETAILS}}", "")}${SOS_INSTRUCTION}\n\n${lang}`;
|
||
|
||
// Erste Nachricht muss user sein
|
||
const firstUserIdx = messages.findIndex((m) => m.role === "user");
|
||
const conversation =
|
||
firstUserIdx > 0 ? messages.slice(firstUserIdx) : messages;
|
||
const trimmed = conversation.slice(-8);
|
||
|
||
// LLM-Routing: client schickt llmProvider via /sos-session-Body (Toggle).
|
||
// Default openrouter-sonnet. sort:latency bei Anthropic über OR spart 30-58% TTFB
|
||
// (server-curl-bench gemessen). Groq bypasst OpenRouter-Hop für ~157ms TTFB.
|
||
// Tier-based LLM: Pro=Groq (sachlich+schnell), Legend=Haiku 4.5 (warm+fast),
|
||
// Free=Groq (kostenkontrolle). User kann via Toggle override (debug);
|
||
// 'auto' (oder undefined) → plan-based default.
|
||
// Default-Kette: Anthropic Haiku 4.5 (direkt, native Messages-API — warmer
|
||
// Lyra-Ton, günstig+zuverlässig) → Gemini Flash Lite → OpenAI gpt-4o-mini.
|
||
// Die ERSTE Kandidatin, die einen OK-Stream liefert, gewinnt — SOS darf im
|
||
// Krisen-Kontext nie ohne Antwort bleiben (vorher: single fetch ohne Fallback
|
||
// → bei Provider-Fail sofort Krisen-Text). Debug-Toggle override ohne Fallback.
|
||
const userToggle = sessionData.llmProvider;
|
||
type SosCandidate = { provider: string; model: string };
|
||
const candidates: SosCandidate[] =
|
||
userToggle && userToggle !== "auto"
|
||
? [{ provider: userToggle, model: "" }]
|
||
: [
|
||
{ provider: "anthropic-haiku", model: "claude-haiku-4-5" },
|
||
{ provider: "gemini-flash-lite", model: "gemini-2.5-flash-lite" },
|
||
{ provider: "openai-mini", model: "gpt-4o-mini" },
|
||
];
|
||
|
||
// Baut die Upstream-Request-Config für einen Provider; null wenn Key fehlt.
|
||
// Anthropic nutzt die native Messages-API (system top-level, x-api-key);
|
||
// alle anderen das OpenAI-kompatible /chat/completions-Shape.
|
||
const buildUpstream = (
|
||
provider: string,
|
||
): { url: string; headers: Record<string, string>; body: string; label: string } | null => {
|
||
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||
if (provider === "anthropic-haiku") {
|
||
const key = config.anthropicApiKey as string | undefined;
|
||
if (!key) return null;
|
||
headers["x-api-key"] = key;
|
||
headers["anthropic-version"] = "2023-06-01";
|
||
// Anthropic verlangt user-first; trimmed kann mit assistant beginnen.
|
||
const aMsgs = trimmed[0]?.role === "assistant" ? trimmed.slice(1) : trimmed;
|
||
return {
|
||
url: "https://api.anthropic.com/v1/messages",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model: "claude-haiku-4-5",
|
||
max_tokens: 400,
|
||
stream: true,
|
||
system: systemPrompt,
|
||
messages: aMsgs,
|
||
}),
|
||
label: "anthropic:claude-haiku-4-5",
|
||
};
|
||
}
|
||
if (provider === "gemini-flash-lite" || provider === "gemini-flash") {
|
||
const key = config.geminiApiKey as string | undefined;
|
||
if (!key) return null;
|
||
headers.Authorization = `Bearer ${key}`;
|
||
const model = provider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite";
|
||
return {
|
||
url: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model,
|
||
max_tokens: 400,
|
||
stream: true,
|
||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||
}),
|
||
label: `gemini:${model}`,
|
||
};
|
||
}
|
||
if (provider === "groq-llama") {
|
||
const key = config.groqApiKey as string | undefined;
|
||
if (!key) return null;
|
||
headers.Authorization = `Bearer ${key}`;
|
||
return {
|
||
url: "https://api.groq.com/openai/v1/chat/completions",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model: "llama-3.3-70b-versatile",
|
||
max_tokens: 400,
|
||
stream: true,
|
||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||
}),
|
||
label: "groq:llama-3.3-70b",
|
||
};
|
||
}
|
||
if (provider === "openrouter-haiku" || provider === "openrouter-sonnet") {
|
||
const key = config.openrouterApiKey as string | undefined;
|
||
if (!key) return null;
|
||
headers.Authorization = `Bearer ${key}`;
|
||
headers["HTTP-Referer"] = "https://rebreak.org";
|
||
headers["X-Title"] = "ReBreak SOS";
|
||
const model =
|
||
provider === "openrouter-haiku"
|
||
? "anthropic/claude-haiku-4.5"
|
||
: "anthropic/claude-sonnet-4.5";
|
||
return {
|
||
url: "https://openrouter.ai/api/v1/chat/completions",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model,
|
||
max_tokens: 400,
|
||
stream: true,
|
||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||
provider: { sort: "latency" },
|
||
}),
|
||
label: `openrouter:${model}`,
|
||
};
|
||
}
|
||
// openai-mini — letzter Anker (anderer Provider).
|
||
if (provider === "openai-mini") {
|
||
const key = config.openaiApiKey as string | undefined;
|
||
if (!key) return null;
|
||
headers.Authorization = `Bearer ${key}`;
|
||
return {
|
||
url: "https://api.openai.com/v1/chat/completions",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model: "gpt-4o-mini",
|
||
max_tokens: 400,
|
||
stream: true,
|
||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||
}),
|
||
label: "openai:gpt-4o-mini",
|
||
};
|
||
}
|
||
return null;
|
||
};
|
||
|
||
// Erste Kandidatin, die einen OK-Stream liefert.
|
||
let upstream: Response | undefined;
|
||
let usedLabel = "none";
|
||
for (const c of candidates) {
|
||
const cfg = buildUpstream(c.provider);
|
||
if (!cfg) continue;
|
||
try {
|
||
const resp = await fetch(cfg.url, {
|
||
method: "POST",
|
||
headers: cfg.headers,
|
||
body: cfg.body,
|
||
});
|
||
if (resp.ok && resp.body) {
|
||
upstream = resp;
|
||
usedLabel = cfg.label;
|
||
break;
|
||
}
|
||
const errText = await resp.text().catch(() => "");
|
||
console.error(
|
||
`[coach/sos-stream] ${cfg.label} upstream error:`,
|
||
resp.status,
|
||
errText.slice(0, 300),
|
||
);
|
||
} catch (e) {
|
||
console.error(`[coach/sos-stream] ${cfg.label} fetch threw:`, e);
|
||
}
|
||
}
|
||
|
||
// ── Upstream-Fehler: SSE-Header trotzdem setzen, dann Fallback senden ──────
|
||
// (b) Timeout/Leer-Fallback: Kein 502-Throw im Krisen-Kontext — User muss
|
||
// immer eine Antwort sehen. Bei LLM-Fehler sofort Krisen-Fallback liefern.
|
||
const upstreamFailed = !upstream || !upstream.body;
|
||
console.log(`[coach/sos-stream] using ${usedLabel}`);
|
||
|
||
// Direkt zu Node res schreiben — sendStream(ReadableStream) pumpt pull() in Nitro nicht zuverlässig
|
||
const res = event.node.res;
|
||
res.statusCode = 200;
|
||
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
|
||
res.setHeader("Cache-Control", "no-store");
|
||
res.setHeader("X-Accel-Buffering", "no");
|
||
res.setHeader("Connection", "keep-alive");
|
||
res.flushHeaders?.();
|
||
|
||
const write = (chunk: string) => {
|
||
try {
|
||
res.write(chunk);
|
||
} catch (e) {
|
||
console.error("[coach/sos-stream] write error:", e);
|
||
}
|
||
};
|
||
|
||
console.log(
|
||
`[coach/sos-stream] stream started for ${user.id}, session ${sessionId}`,
|
||
);
|
||
write(": connected\n\n");
|
||
|
||
// ── (a) Crisis-Pre-Filter: Krisen-Chips sofort senden ────────────────────
|
||
// Wird als ERSTES Event gesendet, noch bevor das LLM antwortet.
|
||
// Frontend rendert diese Chips permanent — unabhängig von der LLM-Antwort.
|
||
// Event-Typ "crisis_chips" (nicht "chips") → Frontend behandelt sie anders:
|
||
// kein Ersetzen durch LLM-Chips, sondern dauerhaft oben anzeigen.
|
||
if (crisisLevel !== "none") {
|
||
const crisisChips = getCrisisChips(crisisLevel);
|
||
write(`event: crisis_chips\ndata: ${JSON.stringify(crisisChips)}\n\n`);
|
||
console.log(
|
||
`[crisis-filter] crisis_chips sent for ${user.id}, level=${crisisLevel}`,
|
||
);
|
||
}
|
||
|
||
// ── (b) Upstream-Fehler-Fallback: Safe Default statt leerem Screen ───────
|
||
if (upstreamFailed) {
|
||
const effectiveLevel = crisisLevel !== "none" ? crisisLevel : "elevated";
|
||
const fallback = getCrisisFallback(effectiveLevel);
|
||
write(`event: message\ndata: ${JSON.stringify(fallback.message)}\n\n`);
|
||
write(`event: chips\ndata: ${JSON.stringify(fallback.chips)}\n\n`);
|
||
write("event: done\ndata: {}\n\n");
|
||
console.log(
|
||
`[coach/sos-stream] LLM unavailable — crisis fallback sent for ${user.id}`,
|
||
);
|
||
res.end();
|
||
return;
|
||
}
|
||
|
||
const reader = upstream!.body!.getReader();
|
||
const decoder = new TextDecoder();
|
||
let buffer = "";
|
||
let fullText = "";
|
||
let chunkCount = 0;
|
||
|
||
// Client disconnect detection
|
||
let aborted = false;
|
||
res.on("close", () => {
|
||
aborted = true;
|
||
reader.cancel().catch(() => {});
|
||
});
|
||
|
||
let chipsMarkerSeen = false;
|
||
try {
|
||
while (!aborted) {
|
||
const { value, done } = await reader.read();
|
||
if (done) break;
|
||
buffer += decoder.decode(value, { stream: true });
|
||
const lines = buffer.split("\n");
|
||
buffer = lines.pop() ?? "";
|
||
|
||
for (const line of lines) {
|
||
const trimLine = line.trim();
|
||
if (!trimLine || !trimLine.startsWith("data:")) continue;
|
||
const payload = trimLine.slice(5).trim();
|
||
if (payload === "[DONE]") continue;
|
||
try {
|
||
const json = JSON.parse(payload) as {
|
||
choices?: { delta?: { content?: string } }[];
|
||
type?: string;
|
||
delta?: { type?: string; text?: string };
|
||
};
|
||
// OpenAI-kompatibel: choices[].delta.content
|
||
// Anthropic nativ: event content_block_delta → delta.text
|
||
const delta =
|
||
json.choices?.[0]?.delta?.content ??
|
||
(json.type === "content_block_delta" ? json.delta?.text : undefined);
|
||
if (delta) {
|
||
fullText += delta;
|
||
chunkCount++;
|
||
|
||
// ─── CHIPS-Marker nicht streamen ───
|
||
if (chipsMarkerSeen) continue; // alles nach Marker = Chips-JSON
|
||
|
||
// Prüfe ob fullText jetzt den Marker enthält
|
||
const markerStart = fullText.indexOf("[[");
|
||
if (markerStart >= 0) {
|
||
// Marker (oder Anfang davon) erkannt — nur sicheren Teil bis "[[" senden
|
||
const safeText = fullText.slice(0, markerStart);
|
||
const alreadySent = fullText.length - delta.length;
|
||
if (safeText.length > alreadySent) {
|
||
const toSend = safeText.slice(alreadySent);
|
||
// JSON-encode → Whitespace bleibt erhalten
|
||
write(`event: message\ndata: ${JSON.stringify(toSend)}\n\n`);
|
||
}
|
||
// Wenn vollständiger Marker da: ab jetzt nichts mehr streamen
|
||
if (fullText.indexOf("[[CHIPS]]:") >= 0) {
|
||
chipsMarkerSeen = true;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// Normales Delta — JSON-encoded senden (preserved Whitespace + Newlines)
|
||
write(`event: message\ndata: ${JSON.stringify(delta)}\n\n`);
|
||
}
|
||
} catch {
|
||
// partial line, ignore
|
||
}
|
||
}
|
||
}
|
||
|
||
// Stream zu Ende → [[CHIPS]]: aus fullText extrahieren
|
||
const markerIdx = fullText.indexOf("[[CHIPS]]:");
|
||
let chips: unknown[] = [];
|
||
if (markerIdx >= 0) {
|
||
const chipsRaw = fullText.slice(markerIdx + "[[CHIPS]]:".length);
|
||
try {
|
||
chips = JSON.parse(chipsRaw.trim());
|
||
} catch {
|
||
console.warn(
|
||
"[sos-stream] chips parse failed:",
|
||
chipsRaw.slice(0, 100),
|
||
);
|
||
}
|
||
}
|
||
|
||
if (chips.length > 0) {
|
||
write(`event: chips\ndata: ${JSON.stringify(chips)}\n\n`);
|
||
} else {
|
||
// (b) Leer-Antwort-Fallback: LLM hat keine validen Chips geliefert.
|
||
// Im Krisen-Kontext DARF kein leerer Chip-Bereich erscheinen.
|
||
// Nutze Krisen-Chips wenn crisisLevel gesetzt, sonst default need_help.
|
||
if (crisisLevel !== "none") {
|
||
const fallbackChips = getCrisisChips(crisisLevel);
|
||
write(`event: chips\ndata: ${JSON.stringify(fallbackChips)}\n\n`);
|
||
console.log(
|
||
`[coach/sos-stream] LLM chips missing — crisis chip fallback, level=${crisisLevel}`,
|
||
);
|
||
} else {
|
||
// Auch ohne crisis-level: mindestens einen need_help-Chip senden
|
||
const safeDefault = [{ label: "Hilfe holen", action: "need_help" }];
|
||
write(`event: chips\ndata: ${JSON.stringify(safeDefault)}\n\n`);
|
||
console.log(
|
||
`[coach/sos-stream] LLM chips missing — safe default chip sent`,
|
||
);
|
||
}
|
||
}
|
||
write("event: done\ndata: {}\n\n");
|
||
console.log(
|
||
`[coach/sos-stream] stream done, ${chunkCount} chunks, ${fullText.length} chars`,
|
||
);
|
||
|
||
// Memory-Extraction: fire-and-forget nach Stream-Ende
|
||
// markReferenced + async Extraction laufen parallel, blockieren nichts
|
||
if (loadedMemoryIds.length > 0) {
|
||
markReferenced(loadedMemoryIds).catch(() => {});
|
||
}
|
||
const allMessages: Array<{ role: string; content: string }> = [
|
||
...messages,
|
||
{ role: "assistant", content: fullText.split("[[CHIPS]]:")[0].trim() },
|
||
];
|
||
// Memory-extraction nutzt OpenRouter unabhängig vom user-toggle (Sonnet/Haiku/Groq) —
|
||
// Memory extraction ist eigener LLM-call, kein Bezug zur SOS-Antwort
|
||
const memoryExtractKey = (config.openrouterApiKey as string | undefined) ?? "";
|
||
extractAndStoreMemories(user.id, allMessages, sessionId, memoryExtractKey).catch(
|
||
() => {},
|
||
);
|
||
} catch (err) {
|
||
console.error("[coach/sos-stream] read error:", err);
|
||
// (b) Stream-Abbruch-Fallback: auch bei Read-Fehlern kein leerer Screen.
|
||
const effectiveLevel = crisisLevel !== "none" ? crisisLevel : "elevated";
|
||
const fallback = getCrisisFallback(effectiveLevel);
|
||
write(`event: message\ndata: ${JSON.stringify(fallback.message)}\n\n`);
|
||
write(`event: chips\ndata: ${JSON.stringify(fallback.chips)}\n\n`);
|
||
write("event: done\ndata: {}\n\n");
|
||
} finally {
|
||
res.end();
|
||
}
|
||
});
|