- backend/coach: routing zu Sonnet (default) / Haiku / Groq Llama je nach sessionData.llmProvider. sort:latency für Anthropic-Modelle (-30..58% TTFB). - frontend: LlmProviderToggle (Sonnet/Haiku/Groq pills), llmProvider.ts Storage-Helper. sosStream.ts schickt llmProvider im /sos-session-Body. - bench: SosTtsBenchmark sammelt Marker (req->session, lyra-ttfb, lyra-done, tts-fired/headers/body/file, audio-loaded, first-audio); Output als console.table. - ops: backend/scripts/llm-bench.sh + Python-Variante für realistic SOS-Prompt. - speak-cartesia + speak-elevenlabs Endpoints (waren ungetracked, jetzt mit drin).
71 lines
1.9 KiB
TypeScript
71 lines
1.9 KiB
TypeScript
/**
|
|
* POST /api/coach/speak-cartesia
|
|
* Cartesia Sonic-2 — schnellstes TTS (~75ms first-byte), native German.
|
|
*
|
|
* Returns audio/mpeg. Voice via runtimeConfig.cartesiaVoiceId
|
|
* (Fallback `b9de4a89-2257-424b-94c2-db18ba68c81a` wenn unset).
|
|
*/
|
|
const FALLBACK_VOICE_ID = "b9de4a89-2257-424b-94c2-db18ba68c81a";
|
|
|
|
export default defineEventHandler(async (event) => {
|
|
await requireUser(event);
|
|
|
|
const body = await readBody(event);
|
|
const { text } = body as { text: string };
|
|
|
|
if (!text?.trim()) {
|
|
throw createError({ statusCode: 400, message: "text fehlt" });
|
|
}
|
|
|
|
const config = useRuntimeConfig();
|
|
const key =
|
|
(config.cartesiaApiKey as string) || process.env.CARTESIA_API_KEY || "";
|
|
const voiceId =
|
|
(config.cartesiaVoiceId as string) ||
|
|
process.env.CARTESIA_VOICE_ID ||
|
|
FALLBACK_VOICE_ID;
|
|
|
|
if (!key) {
|
|
throw createError({
|
|
statusCode: 503,
|
|
message: "CARTESIA_API_KEY nicht konfiguriert",
|
|
});
|
|
}
|
|
|
|
const upstream = await fetch("https://api.cartesia.ai/tts/bytes", {
|
|
method: "POST",
|
|
headers: {
|
|
"X-API-Key": key,
|
|
"Cartesia-Version": "2024-11-13",
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model_id: "sonic-2",
|
|
transcript: text.slice(0, 4096),
|
|
voice: { mode: "id", id: voiceId },
|
|
output_format: {
|
|
container: "mp3",
|
|
sample_rate: 22050,
|
|
bit_rate: 64000,
|
|
},
|
|
language: "de",
|
|
}),
|
|
});
|
|
|
|
if (!upstream.ok || !upstream.body) {
|
|
const err = await upstream.text().catch(() => "");
|
|
console.error("[speak-cartesia] error:", upstream.status, err);
|
|
throw createError({
|
|
statusCode: 502,
|
|
message: "Cartesia TTS fehlgeschlagen",
|
|
});
|
|
}
|
|
|
|
setHeader(event, "Content-Type", "audio/mpeg");
|
|
setHeader(event, "Cache-Control", "no-store");
|
|
|
|
const { Readable } = await import("node:stream");
|
|
const nodeStream = Readable.fromWeb(upstream.body as never);
|
|
return sendStream(event, nodeStream);
|
|
});
|