/** * POST /api/coach/transcribe * Empfängt Audio (base64 webm/mp4/aac) → Deepgram → gibt Text zurück * iOS sendet rohes AAC (ADTS) → wird via ffmpeg in M4A konvertiert */ import { execSync } from "node:child_process"; import { writeFileSync, readFileSync, unlinkSync, existsSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { randomUUID } from "node:crypto"; export default defineEventHandler(async (event) => { await requireUser(event); const body = await readBody(event); const { audio, mimeType, language } = body as { audio: string; mimeType?: string; language?: string; }; if (!audio) { throw createError({ statusCode: 400, message: "audio fehlt" }); } const config = useRuntimeConfig(); if (!config.deepgramApiKey) { throw createError({ statusCode: 503, message: "Deepgram nicht konfiguriert", }); } // Base64 → Buffer const base64Data = audio.includes(",") ? audio.split(",")[1] : audio; let buffer = Buffer.from(base64Data, "base64"); // Max 25MB (API-Limit) if (buffer.length > 25 * 1024 * 1024) { throw createError({ statusCode: 400, message: "Audio zu groß (max 25 MB)", }); } // iOS capacitor-voice-recorder liefert rohes AAC (ADTS) — Deepgram akzeptiert das. // Aber konvertiere trotzdem zu M4A für bessere Kompatibilität. const isRawAac = mimeType?.includes("aac"); let ext = "webm"; let blobType = "audio/webm"; if (isRawAac) { const id = randomUUID(); const inPath = join(tmpdir(), `${id}.aac`); const outPath = join(tmpdir(), `${id}.m4a`); try { writeFileSync(inPath, buffer); execSync(`ffmpeg -i ${inPath} -c:a copy ${outPath} -y 2>/dev/null`); buffer = readFileSync(outPath); ext = "m4a"; blobType = "audio/mp4"; } catch (e) { console.error("[transcribe] ffmpeg convert failed:", e); ext = "m4a"; blobType = "audio/mp4"; } finally { if (existsSync(inPath)) unlinkSync(inPath); if (existsSync(outPath)) unlinkSync(outPath); } } else if (mimeType?.includes("mp4") || mimeType?.includes("m4a")) { ext = "m4a"; blobType = "audio/mp4"; } console.log( "[transcribe] mimeType:", mimeType, "→ ext:", ext, "converted:", isRawAac, "bytes:", buffer.length, ); // Deepgram language mapping (de/en/tr/ar direkt unterstützt) const deepgramLang = language && ["de", "en", "tr", "ar", "fr", "es", "pt", "it"].includes(language) ? language : "de"; try { const response = await fetch( `https://api.deepgram.com/v1/listen?language=${deepgramLang}&model=nova-2`, { method: "POST", headers: { Authorization: `Token ${config.deepgramApiKey}`, "Content-Type": blobType, }, body: buffer, }, ); const result = await response.json(); if (!response.ok) { console.error("[transcribe] Deepgram error:", JSON.stringify(result)); throw createError({ statusCode: response.status, message: JSON.stringify(result), }); } const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript || ""; return { text: transcript }; } catch (err: any) { if (err.statusCode) throw err; console.error("[transcribe] Unexpected error:", err); throw createError({ statusCode: 500, message: err?.message || "Transcribe fehlgeschlagen", }); } });