/** * POST /api/coach/speak-elevenlabs * ElevenLabs eleven_multilingual_v2 — voice via runtimeConfig.elevenlabsVoiceId * (default: Alexandra `kdmDKE6EkgrWrrykO9Qt` als Fallback wenn unset). * * Returns audio/mpeg. Voice ist deterministisch konstant über mehrere Calls * — identisch zu Gemini-Verhalten, kein Mode-Switch wie bei gpt-4o-mini-tts. */ const FALLBACK_VOICE_ID = "kdmDKE6EkgrWrrykO9Qt"; // Alexandra export default defineEventHandler(async (event) => { await requireUser(event); const body = await readBody(event); const { text } = body as { text: string }; if (!text?.trim()) { throw createError({ statusCode: 400, message: "text fehlt" }); } const config = useRuntimeConfig(); // Fallback chain: runtimeConfig (Nuxt build-time) → process.env (runtime injection // via Infisical at pm2-start). Stellt sicher dass auch dann ein Key vorhanden ist // wenn nuxt's runtimeConfig-Inflate den process.env-Wert nicht mit-bundelt. const key = (config.elevenlabsApiKey as string) || process.env.ELEVENLABS_API_KEY || ""; const voiceId = (config.elevenlabsVoiceId as string) || process.env.ELEVENLABS_VOICE_ID || FALLBACK_VOICE_ID; console.log( "[speak-elevenlabs] cfg-key:", !!config.elevenlabsApiKey, "env-key:", !!process.env.ELEVENLABS_API_KEY, "key-len:", key.length, "voice:", voiceId, ); if (!key) { throw createError({ statusCode: 503, message: "ELEVENLABS_API_KEY nicht konfiguriert", }); } console.log("[speak-elevenlabs] CALL recv, text-len=", text?.length ?? 0, "voice=", voiceId); // /stream endpoint + optimize_streaming_latency=4 (max-latency-optimized, // marginal weniger Quality). ElevenLabs sendet erste Bytes ~200-300ms statt // 600-1000ms beim non-stream endpoint. const upstream = await fetch( `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream?optimize_streaming_latency=4`, { method: "POST", headers: { "xi-api-key": key, "Content-Type": "application/json", Accept: "audio/mpeg", }, body: JSON.stringify({ text: text.slice(0, 4096), // Turbo v2.5: ~50% schneller als multilingual_v2, marginal niedrigere // Quality — Trade-off lohnt sich für SOS (latency > Studio-Polish). model_id: "eleven_turbo_v2_5", voice_settings: { stability: 0.5, similarity_boost: 0.75, style: 0.3, use_speaker_boost: true, }, output_format: "mp3_22050_32", }), }, ); if (!upstream.ok || !upstream.body) { const err = await upstream.text().catch(() => ""); console.error("[speak-elevenlabs] error:", upstream.status, err); throw createError({ statusCode: 502, message: "ElevenLabs TTS fehlgeschlagen", }); } setHeader(event, "Content-Type", "audio/mpeg"); setHeader(event, "Cache-Control", "no-store"); const { Readable } = await import("node:stream"); const nodeStream = Readable.fromWeb(upstream.body as never); return sendStream(event, nodeStream); });