80 lines
2.6 KiB
TypeScript
80 lines
2.6 KiB
TypeScript
/**
|
|
* POST /api/coach/speak-openai — v5
|
|
* OpenAI TTS — gpt-4o-mini-tts (Mar 2025), Stimmen: nova (chat) / shimmer (sos).
|
|
*
|
|
* Modes:
|
|
* - "chat" → nova, neutral
|
|
* - "sos" → shimmer, single warm-empathic instruction set
|
|
* - "sos-continuation" → shimmer, **identical** instructions zu "sos"
|
|
*
|
|
* Warum identisch: gpt-4o-mini-tts interpretiert `instructions` so kreativ,
|
|
* dass unterschiedliche Strings im selben SOS-Flow als "Stimme wechselt"
|
|
* wahrgenommen werden. Single-instruction-Mode eliminiert den Voice-Boundary.
|
|
*/
|
|
export default defineEventHandler(async (event) => {
|
|
await requireUser(event);
|
|
|
|
const body = await readBody(event);
|
|
const { text, mode } = body as {
|
|
text: string;
|
|
mode?: "sos" | "sos-continuation" | "chat";
|
|
};
|
|
|
|
if (!text?.trim()) {
|
|
throw createError({ statusCode: 400, message: "text fehlt" });
|
|
}
|
|
|
|
const isSos = mode === "sos" || mode === "sos-continuation";
|
|
|
|
const config = useRuntimeConfig();
|
|
const key = config.openaiApiKey as string | undefined;
|
|
|
|
if (!key) {
|
|
throw createError({
|
|
statusCode: 503,
|
|
message: "OpenAI API Key nicht konfiguriert",
|
|
});
|
|
}
|
|
|
|
// Identische instructions für sos + sos-continuation → keine wahrgenommene
|
|
// Stimm-Drift zwischen aufeinanderfolgenden TTS-Calls in derselben SOS-Session.
|
|
const instructions = isSos
|
|
? "Warm, gentle, empathic — like a calm friend on the phone in a difficult moment. " +
|
|
"Speak slowly with natural pauses between sentences. " +
|
|
"Soft delivery, lower energy than chat-mode. " +
|
|
"German native pronunciation. No fake-cheerful intonation."
|
|
: undefined;
|
|
|
|
const upstream = await fetch("https://api.openai.com/v1/audio/speech", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Bearer ${key}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model: "gpt-4o-mini-tts",
|
|
input: text.slice(0, 4096),
|
|
voice: isSos ? "shimmer" : "nova",
|
|
response_format: "mp3",
|
|
speed: 1.08,
|
|
...(instructions ? { instructions } : {}),
|
|
}),
|
|
});
|
|
|
|
if (!upstream.ok || !upstream.body) {
|
|
const err = await upstream.text().catch(() => "");
|
|
console.error("[speak-openai] error:", upstream.status, err);
|
|
throw createError({
|
|
statusCode: 502,
|
|
message: "OpenAI TTS fehlgeschlagen",
|
|
});
|
|
}
|
|
|
|
setHeader(event, "Content-Type", "audio/mpeg");
|
|
setHeader(event, "Cache-Control", "no-store");
|
|
|
|
const { Readable } = await import("node:stream");
|
|
const nodeStream = Readable.fromWeb(upstream.body as never);
|
|
return sendStream(event, nodeStream);
|
|
});
|