rebreak-monorepo/backend/server/api/coach/speak-cartesia.post.ts
chahinebrini 685782b538 fix(coach): dynamische Sprache (Text-Detection + App-Locale-Fallback)
LLM-Prompt (message.post + sos-stream):
- LANG_INSTRUCTIONS Map raus, ersetzt durch dynamische Instruktion
  'Reply in {detectedFromUser} ... fallback: {appLang}'
- Lyra matcht jetzt die Sprache der letzten User-Message (per
  detectLang Unicode-Detection); App-Locale ist nur noch Fallback
- Instruktion doppelt eingehängt (Anfang + Ende des System-Prompts)
  gegen recency bias bei langen deutschen Prompts

TTS (speak dispatcher + speak-cartesia + speak-elevenlabs):
- Kein 'de'-Default mehr für language. detectLang(text, locale) leitet
  Sprache primär aus dem Antwort-Text ab (Arabic/Cyrillic/CJK/Turkish-
  Letters), Locale als Fallback
- Cartesia + ElevenLabs: language/language_code nur senden wenn
  ableitbar, sonst Provider auto-detect statt erzwungenem 'de'
- speak-cartesia: sonic-2 → sonic-3 (Multi-Lang, war beim Dispatcher-
  Fix gestern vergessen worden)
- Google: en-US neutraler Fallback statt de-DE-Bias

Neu: server/utils/detect-lang.ts
2026-05-31 00:12:40 +02:00

80 lines
2.4 KiB
TypeScript

/**
* POST /api/coach/speak-cartesia
* Cartesia Sonic-3 — schnellstes TTS (~75ms first-byte), 42 Sprachen multilingual.
*
* Returns audio/mpeg. Voice via runtimeConfig.cartesiaVoiceId
* (Fallback `b9de4a89-2257-424b-94c2-db18ba68c81a` wenn unset).
*/
import { detectLang } from "../../utils/detect-lang";
const FALLBACK_VOICE_ID = "b9de4a89-2257-424b-94c2-db18ba68c81a";
export default defineEventHandler(async (event) => {
await requireUser(event);
const body = await readBody(event);
const { text, locale } = body as { text: string; locale?: string };
if (!text?.trim()) {
throw createError({ statusCode: 400, message: "text fehlt" });
}
// Sprache dynamisch: Text-Script-Detection > App-Locale-Hint > null (Provider
// auto-detect). Kein "de"-Default mehr, sonst klingt arabischer Text deutsch.
const lang = detectLang(text, locale);
const config = useRuntimeConfig();
const key =
(config.cartesiaApiKey as string) || process.env.CARTESIA_API_KEY || "";
const voiceId =
(config.cartesiaVoiceId as string) ||
process.env.CARTESIA_VOICE_ID ||
FALLBACK_VOICE_ID;
if (!key) {
throw createError({
statusCode: 503,
message: "CARTESIA_API_KEY nicht konfiguriert",
});
}
const upstream = await fetch("https://api.cartesia.ai/tts/bytes", {
method: "POST",
headers: {
"X-API-Key": key,
"Cartesia-Version": "2024-11-13",
"Content-Type": "application/json",
},
body: JSON.stringify({
// sonic-3 unterstützt 42 Sprachen inkl. ar/tr (sonic-2 = nur de/en).
model_id: "sonic-3",
transcript: text.slice(0, 4096),
voice: { mode: "id", id: voiceId },
output_format: {
container: "mp3",
sample_rate: 22050,
bit_rate: 64000,
},
// language nur setzen wenn aus Detection oder Locale ableitbar —
// sonst Sonic-3 auto-detecten lassen.
...(lang ? { language: lang } : {}),
}),
});
if (!upstream.ok || !upstream.body) {
const err = await upstream.text().catch(() => "");
console.error("[speak-cartesia] error:", upstream.status, err);
throw createError({
statusCode: 502,
message: "Cartesia TTS fehlgeschlagen",
});
}
setHeader(event, "Content-Type", "audio/mpeg");
setHeader(event, "Cache-Control", "no-store");
const { Readable } = await import("node:stream");
const nodeStream = Readable.fromWeb(upstream.body as never);
return sendStream(event, nodeStream);
});