/** * POST /api/coach/speak-openai — v5 * OpenAI TTS — gpt-4o-mini-tts (Mar 2025), Stimmen: nova (chat) / shimmer (sos). * * Modes: * - "chat" → nova, neutral * - "sos" → shimmer, single warm-empathic instruction set * - "sos-continuation" → shimmer, **identical** instructions zu "sos" * * Warum identisch: gpt-4o-mini-tts interpretiert `instructions` so kreativ, * dass unterschiedliche Strings im selben SOS-Flow als "Stimme wechselt" * wahrgenommen werden. Single-instruction-Mode eliminiert den Voice-Boundary. */ export default defineEventHandler(async (event) => { await requireUser(event); const body = await readBody(event); const { text, mode } = body as { text: string; mode?: "sos" | "sos-continuation" | "chat"; }; if (!text?.trim()) { throw createError({ statusCode: 400, message: "text fehlt" }); } const isSos = mode === "sos" || mode === "sos-continuation"; const config = useRuntimeConfig(); const key = config.openaiApiKey as string | undefined; if (!key) { throw createError({ statusCode: 503, message: "OpenAI API Key nicht konfiguriert", }); } // Identische instructions für sos + sos-continuation → keine wahrgenommene // Stimm-Drift zwischen aufeinanderfolgenden TTS-Calls in derselben SOS-Session. const instructions = isSos ? "Warm, gentle, empathic — like a calm friend on the phone in a difficult moment. " + "Speak slowly with natural pauses between sentences. " + "Soft delivery, lower energy than chat-mode. " + "German native pronunciation. No fake-cheerful intonation." : undefined; const upstream = await fetch("https://api.openai.com/v1/audio/speech", { method: "POST", headers: { Authorization: `Bearer ${key}`, "Content-Type": "application/json", }, body: JSON.stringify({ model: "gpt-4o-mini-tts", input: text.slice(0, 4096), voice: isSos ? "shimmer" : "nova", response_format: "mp3", speed: 1.08, ...(instructions ? { instructions } : {}), }), }); if (!upstream.ok || !upstream.body) { const err = await upstream.text().catch(() => ""); console.error("[speak-openai] error:", upstream.status, err); throw createError({ statusCode: 502, message: "OpenAI TTS fehlgeschlagen", }); } setHeader(event, "Content-Type", "audio/mpeg"); setHeader(event, "Cache-Control", "no-store"); const { Readable } = await import("node:stream"); const nodeStream = Readable.fromWeb(upstream.body as never); return sendStream(event, nodeStream); });