/** * POST /api/coach/transcribe * Empfängt Audio (base64 webm/mp4/aac) → Deepgram → gibt Text zurück * iOS sendet rohes AAC (ADTS) → wird via ffmpeg in M4A konvertiert */ import { execSync } from "node:child_process"; import { writeFileSync, readFileSync, unlinkSync, existsSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { randomUUID } from "node:crypto"; export default defineEventHandler(async (event) => { await requireUser(event); const body = await readBody(event); const { audio, mimeType, language } = body as { audio: string; mimeType?: string; language?: string; }; if (!audio) { throw createError({ statusCode: 400, message: "audio fehlt" }); } const config = useRuntimeConfig(); if (!config.deepgramApiKey) { throw createError({ statusCode: 503, message: "Deepgram nicht konfiguriert", }); } // Base64 → Buffer const base64Data = audio.includes(",") ? audio.split(",")[1] : audio; let buffer = Buffer.from(base64Data, "base64"); // Max 25MB (API-Limit) if (buffer.length > 25 * 1024 * 1024) { throw createError({ statusCode: 400, message: "Audio zu groß (max 25 MB)", }); } // iOS capacitor-voice-recorder liefert rohes AAC (ADTS) — Deepgram akzeptiert das. // Aber konvertiere trotzdem zu M4A für bessere Kompatibilität. const isRawAac = mimeType?.includes("aac"); let ext = "webm"; let blobType = "audio/webm"; if (isRawAac) { const id = randomUUID(); const inPath = join(tmpdir(), `${id}.aac`); const outPath = join(tmpdir(), `${id}.m4a`); try { writeFileSync(inPath, buffer); execSync(`ffmpeg -i ${inPath} -c:a copy ${outPath} -y 2>/dev/null`); buffer = readFileSync(outPath); ext = "m4a"; blobType = "audio/mp4"; } catch (e) { console.error("[transcribe] ffmpeg convert failed:", e); ext = "m4a"; blobType = "audio/mp4"; } finally { if (existsSync(inPath)) unlinkSync(inPath); if (existsSync(outPath)) unlinkSync(outPath); } } else if (mimeType?.includes("mp4") || mimeType?.includes("m4a")) { ext = "m4a"; blobType = "audio/mp4"; } console.log( "[transcribe] mimeType:", mimeType, "→ ext:", ext, "converted:", isRawAac, "bytes:", buffer.length, ); // Deepgram language mapping. // Live-Diagnose (2026-05-30): nova-3 lehnt language=ar (und tr) mit // 400 "No such model/language/tier combination found" ab — entgegen // der vorherigen Annahme. Fallback für ar/tr: nova-2-general // (multilingual auto-detect). Für alle anderen Sprachen bleibt nova-3 // (bessere Genauigkeit, diskrete language-codes). const deepgramLang = language && ["de", "en", "tr", "ar", "fr", "es", "pt", "it"].includes(language) ? language : "de"; // nova-2-general unterstützt language=ar/tr (im Gegensatz zu nova-3). // Ohne expliziten language-Param fällt nova-2 auf Auto-Detect zurück und // misdetektiert arabisches Audio oft als Englisch (phonetisches Transcript // wie "salam alaikum" statt "السلام عليكم") — Lyra antwortet dann nicht // auf Arabisch. Mit language=ar wird der korrekte Acoustic-Model-Pfad // verwendet und die Schrift bleibt arabisch. const needsGeneralModel = ["ar", "tr"].includes(deepgramLang); const deepgramUrl = needsGeneralModel ? `https://api.deepgram.com/v1/listen?language=${deepgramLang}&model=nova-2-general` : `https://api.deepgram.com/v1/listen?language=${deepgramLang}&model=nova-3`; console.log( "[transcribe] language:", deepgramLang, "model:", needsGeneralModel ? "nova-2-general" : "nova-3", ); try { const response = await fetch(deepgramUrl, { method: "POST", headers: { Authorization: `Token ${config.deepgramApiKey}`, "Content-Type": blobType, }, body: buffer, }); const result = await response.json(); if (!response.ok) { console.error("[transcribe] Deepgram error:", JSON.stringify(result)); throw createError({ statusCode: response.status, message: JSON.stringify(result), }); } const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript || ""; return { text: transcript }; } catch (err: any) { if (err.statusCode) throw err; console.error("[transcribe] Unexpected error:", err); throw createError({ statusCode: 500, message: err?.message || "Transcribe fehlgeschlagen", }); } });