rebreak-monorepo/backend/server/api/coach/speak-azure.post.ts

/**
 * POST /api/coach/speak-azure
 * Azure Cognitive Services TTS — de-DE-KatjaNeural
 * Benötigt: AZURE_TTS_KEY + AZURE_TTS_REGION in Infisical
 */
export default defineEventHandler(async (event) => {
  await requireUser(event);

  const body = await readBody(event);
  const { text } = body as { text: string };

  if (!text?.trim()) {
    throw createError({ statusCode: 400, message: "text fehlt" });
  }

  const config = useRuntimeConfig();
  const key = config.azureTtsKey as string | undefined;
  const region = (config.azureTtsRegion as string | undefined) || "westeurope";

  if (!key) {
    throw createError({ statusCode: 503, message: "Azure TTS Key nicht konfiguriert" });
  }

  const ssml = `<speak version='1.0' xml:lang='de-DE'>
    <voice name='de-DE-KatjaNeural'>
      ${text.slice(0, 2000).replace(/[<>&'"]/g, (c) => ({ '<': '&lt;', '>': '&gt;', '&': '&amp;', "'": '&apos;', '"': '&quot;' }[c] ?? c))}
    </voice>
  </speak>`;

  const response = await fetch(
    `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`,
    {
      method: "POST",
      headers: {
        "Ocp-Apim-Subscription-Key": key,
        "Content-Type": "application/ssml+xml",
        "X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
      },
      body: ssml,
    },
  );

  if (!response.ok) {
    const err = await response.text();
    console.error("[speak-azure] error:", response.status, err);
    throw createError({ statusCode: 502, message: "Azure TTS fehlgeschlagen" });
  }

  const buffer = await response.arrayBuffer();
  const base64 = Buffer.from(buffer).toString("base64");

  return { audio: `data:audio/mp3;base64,${base64}` };
});