nova-2 unterstützt kein ar/tr → Deepgram 400 "No such model/language/tier
combination" → leeres Transcript ("kein Text nach Speech"). nova-3 deckt alle
gelisteten Sprachen als diskrete Codes ab (de/en/tr/ar/fr/es/pt/it), ohne
Regression. Verifiziert gg. Deepgram models-languages-overview.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
134 lines
3.9 KiB
TypeScript
134 lines
3.9 KiB
TypeScript
/**
|
|
* POST /api/coach/transcribe
|
|
* Empfängt Audio (base64 webm/mp4/aac) → Deepgram → gibt Text zurück
|
|
* iOS sendet rohes AAC (ADTS) → wird via ffmpeg in M4A konvertiert
|
|
*/
|
|
import { execSync } from "node:child_process";
|
|
import { writeFileSync, readFileSync, unlinkSync, existsSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
import { tmpdir } from "node:os";
|
|
import { randomUUID } from "node:crypto";
|
|
|
|
export default defineEventHandler(async (event) => {
|
|
await requireUser(event);
|
|
|
|
const body = await readBody(event);
|
|
const { audio, mimeType, language } = body as {
|
|
audio: string;
|
|
mimeType?: string;
|
|
language?: string;
|
|
};
|
|
|
|
if (!audio) {
|
|
throw createError({ statusCode: 400, message: "audio fehlt" });
|
|
}
|
|
|
|
const config = useRuntimeConfig();
|
|
if (!config.deepgramApiKey) {
|
|
throw createError({
|
|
statusCode: 503,
|
|
message: "Deepgram nicht konfiguriert",
|
|
});
|
|
}
|
|
|
|
// Base64 → Buffer
|
|
const base64Data = audio.includes(",") ? audio.split(",")[1] : audio;
|
|
let buffer = Buffer.from(base64Data, "base64");
|
|
|
|
// Max 25MB (API-Limit)
|
|
if (buffer.length > 25 * 1024 * 1024) {
|
|
throw createError({
|
|
statusCode: 400,
|
|
message: "Audio zu groß (max 25 MB)",
|
|
});
|
|
}
|
|
|
|
// iOS capacitor-voice-recorder liefert rohes AAC (ADTS) — Deepgram akzeptiert das.
|
|
// Aber konvertiere trotzdem zu M4A für bessere Kompatibilität.
|
|
const isRawAac = mimeType?.includes("aac");
|
|
let ext = "webm";
|
|
let blobType = "audio/webm";
|
|
|
|
if (isRawAac) {
|
|
const id = randomUUID();
|
|
const inPath = join(tmpdir(), `${id}.aac`);
|
|
const outPath = join(tmpdir(), `${id}.m4a`);
|
|
try {
|
|
writeFileSync(inPath, buffer);
|
|
execSync(`ffmpeg -i ${inPath} -c:a copy ${outPath} -y 2>/dev/null`);
|
|
buffer = readFileSync(outPath);
|
|
ext = "m4a";
|
|
blobType = "audio/mp4";
|
|
} catch (e) {
|
|
console.error("[transcribe] ffmpeg convert failed:", e);
|
|
ext = "m4a";
|
|
blobType = "audio/mp4";
|
|
} finally {
|
|
if (existsSync(inPath)) unlinkSync(inPath);
|
|
if (existsSync(outPath)) unlinkSync(outPath);
|
|
}
|
|
} else if (mimeType?.includes("mp4") || mimeType?.includes("m4a")) {
|
|
ext = "m4a";
|
|
blobType = "audio/mp4";
|
|
}
|
|
|
|
console.log(
|
|
"[transcribe] mimeType:",
|
|
mimeType,
|
|
"→ ext:",
|
|
ext,
|
|
"converted:",
|
|
isRawAac,
|
|
"bytes:",
|
|
buffer.length,
|
|
);
|
|
|
|
// Deepgram language mapping. WICHTIG: model=nova-3, NICHT nova-2 —
|
|
// nova-2 unterstützt weder Arabisch (ar) noch Türkisch (tr) und antwortet
|
|
// dafür mit 400 "No such model/language/tier combination" → leeres
|
|
// Transcript / Fehler beim Client ("kein Text nach Speech"). nova-3 deckt
|
|
// alle hier gelisteten Sprachen als diskrete Codes ab (inkl. ar + tr),
|
|
// ohne Regression für de/en/fr/es/pt/it. Verifiziert gg. Deepgram-Doc
|
|
// models-languages-overview (2026-05).
|
|
const deepgramLang =
|
|
language &&
|
|
["de", "en", "tr", "ar", "fr", "es", "pt", "it"].includes(language)
|
|
? language
|
|
: "de";
|
|
|
|
try {
|
|
const response = await fetch(
|
|
`https://api.deepgram.com/v1/listen?language=${deepgramLang}&model=nova-3`,
|
|
{
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Token ${config.deepgramApiKey}`,
|
|
"Content-Type": blobType,
|
|
},
|
|
body: buffer,
|
|
},
|
|
);
|
|
|
|
const result = await response.json();
|
|
|
|
if (!response.ok) {
|
|
console.error("[transcribe] Deepgram error:", JSON.stringify(result));
|
|
throw createError({
|
|
statusCode: response.status,
|
|
message: JSON.stringify(result),
|
|
});
|
|
}
|
|
|
|
const transcript =
|
|
result.results?.channels?.[0]?.alternatives?.[0]?.transcript || "";
|
|
return { text: transcript };
|
|
} catch (err: any) {
|
|
if (err.statusCode) throw err;
|
|
console.error("[transcribe] Unexpected error:", err);
|
|
throw createError({
|
|
statusCode: 500,
|
|
message: err?.message || "Transcribe fehlgeschlagen",
|
|
});
|
|
}
|
|
});
|