Two features in one push (both backend, deploy together):
LYRA FOUNDER-STORY (per strategist Option C — mixed/medium-detail):
- COACH_CASUAL_SYSTEM_PROMPT: GRÜNDER-STORY sub-block
- Sharing-rules: ALWAYS on direct ask, RARELY proactive (only on
explicit isolation expressions "niemand versteht das"), NEVER in
SOS-mode, NEVER first-3-msgs, NEVER if user appears minor
- Detail-level: "aus persönlicher Erfahrung mit Spielsucht in seiner
Familie" — KEINE Namen, Verwandtschaftsgrade, Verlust-Details
- Post-share-pivot: "...aber jetzt zu dir: was ist gerade los?"
- COACH_SYSTEM_PROMPT (SOS): SOS-MODE LOCK — hard-Verbot Gründer-Story
zu erwähnen, auch bei direct-ask. Re-trigger-Risk zu hoch.
- DSGVO: brother bleibt komplett anonymisiert. Hans-Müller-DSB-review für
verbal-consent-doc empfohlen.
VOICE TIER-MAPPING (per user-decision: voice für ALLE tiers):
- New plan-features.voice config: provider + model + voiceId + dailyQuotaSeconds
- Tier-mapping:
- Free → Google TTS Neural2-F (de-DE), 60s/day, ~$4/1M chars
- Pro → Cartesia Sonic-2, 300s/day, ~$4/1M chars + ~75ms TTFT
- Legend → ElevenLabs Turbo v2.5, unlimited, ~$30/1M chars
- New backend/server/db/voiceQuota.ts:
- getRemainingVoiceQuota(userId, plan)
- consumeVoiceQuota(userId, seconds)
- estimateAudioSeconds(text)
- speak.post.ts komplett umgeschrieben als plan-aware dispatcher
- 14 tests passing (partial-consume, exhausted, day-rollover, edge-cases)
- Schema-migration 20260509_voice_quota:
ADD voice_seconds_used_today, voice_quota_reset_at to profiles
(auto-deploy via pipeline)
Pending Frontend (separate task):
- Voice-quota-UI in Settings/Profile (remaining seconds + upgrade-prompt
bei 429 quota_exceeded)
⚠️ Schema-migration auto-deploy via b38bf17 detection.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
262 lines
8.5 KiB
TypeScript
262 lines
8.5 KiB
TypeScript
import type { H3Event } from "h3";
|
|
import type { VoiceConfig } from "../../utils/plan-features";
|
|
|
|
/**
|
|
* POST /api/coach/speak
|
|
*
|
|
* Plan-aware TTS dispatcher:
|
|
* Free → Google Cloud TTS Neural2 (60 s/day quota)
|
|
* Pro → Cartesia Sonic-2 (300 s/day quota)
|
|
* Legend → ElevenLabs Turbo v2.5 (unlimited)
|
|
*
|
|
* Request body:
|
|
* { text: string; mode?: "chat" | "sos" | "sos-continuation" }
|
|
*
|
|
* Response:
|
|
* audio/mpeg stream — on success
|
|
* { error: "voice_quota_exceeded", resetAt: string, plan: string } — 429
|
|
*
|
|
* Quota logic lives in server/db/voiceQuota.ts.
|
|
* Provider implementations live in server/api/coach/speak-*.post.ts but are
|
|
* NOT called via HTTP redirect — logic is inlined here to avoid double-auth
|
|
* overhead and keep quota-consume atomic with the actual provider call.
|
|
*/
|
|
export default defineEventHandler(async (event) => {
|
|
const user = await requireUser(event);
|
|
|
|
const body = await readBody(event);
|
|
const { text, mode } = body as {
|
|
text?: string;
|
|
mode?: "chat" | "sos" | "sos-continuation";
|
|
};
|
|
|
|
if (!text?.trim()) {
|
|
throw createError({ statusCode: 400, message: "text fehlt" });
|
|
}
|
|
|
|
const trimmed = text.slice(0, 4096);
|
|
|
|
// ─── Load profile + plan ────────────────────────────────────────────────
|
|
const db = usePrisma();
|
|
const profile = await db.profile.findUnique({
|
|
where: { id: user.id },
|
|
select: { plan: true },
|
|
});
|
|
const plan = (profile?.plan ?? "free").toLowerCase();
|
|
|
|
const limits = getPlanLimits(plan);
|
|
const voiceCfg = limits.voice;
|
|
|
|
// ─── Quota check ────────────────────────────────────────────────────────
|
|
const remaining = await getRemainingVoiceQuota(user.id, plan);
|
|
if (remaining === 0) {
|
|
// Compute reset timestamp (next UTC midnight)
|
|
const resetAt = new Date();
|
|
resetAt.setUTCDate(resetAt.getUTCDate() + 1);
|
|
resetAt.setUTCHours(0, 0, 0, 0);
|
|
|
|
setResponseStatus(event, 429);
|
|
return {
|
|
error: "voice_quota_exceeded",
|
|
resetAt: resetAt.toISOString(),
|
|
plan,
|
|
};
|
|
}
|
|
|
|
const config = useRuntimeConfig();
|
|
|
|
// ─── Dispatch per provider ───────────────────────────────────────────────
|
|
switch (voiceCfg.provider) {
|
|
case "google":
|
|
return await speakGoogle(event, trimmed, config, voiceCfg, user.id, plan);
|
|
case "cartesia":
|
|
return await speakCartesia(event, trimmed, config, voiceCfg, user.id, plan);
|
|
case "elevenlabs":
|
|
return await speakElevenLabs(event, trimmed, mode, config, voiceCfg, user.id, plan);
|
|
default: {
|
|
// Unknown provider in config — fallback to Google with warning
|
|
console.warn("[speak] unknown provider in plan-features:", voiceCfg.provider, "→ falling back to google");
|
|
return await speakGoogle(event, trimmed, config, voiceCfg, user.id, plan);
|
|
}
|
|
}
|
|
});
|
|
|
|
// ─── Provider implementations ────────────────────────────────────────────────
|
|
|
|
async function speakGoogle(
|
|
event: H3Event,
|
|
text: string,
|
|
config: ReturnType<typeof useRuntimeConfig>,
|
|
voiceCfg: VoiceConfig,
|
|
userId: string,
|
|
plan: string,
|
|
) {
|
|
const key = (config.googleApiKey as string) || process.env.GOOGLE_API_KEY || "";
|
|
if (!key) {
|
|
throw createError({ statusCode: 503, message: "Google TTS API Key nicht konfiguriert" });
|
|
}
|
|
|
|
const voiceName = voiceCfg.model ?? "de-DE-Neural2-F";
|
|
|
|
const response = await fetch(
|
|
`https://texttospeech.googleapis.com/v1/text:synthesize?key=${key}`,
|
|
{
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
input: { text },
|
|
voice: {
|
|
languageCode: "de-DE",
|
|
name: voiceName,
|
|
ssmlGender: "FEMALE",
|
|
},
|
|
audioConfig: {
|
|
audioEncoding: "MP3",
|
|
speakingRate: 1.0,
|
|
pitch: 0,
|
|
},
|
|
}),
|
|
},
|
|
);
|
|
|
|
if (!response.ok) {
|
|
const err = await response.json().catch(() => ({}));
|
|
console.error("[speak/google] error:", response.status, err);
|
|
throw createError({ statusCode: 502, message: "Google TTS fehlgeschlagen" });
|
|
}
|
|
|
|
const result = await response.json();
|
|
if (!result.audioContent) {
|
|
throw createError({ statusCode: 502, message: "Google TTS: kein Audio zurückgegeben" });
|
|
}
|
|
|
|
await consumeVoiceQuota(userId, plan, estimateAudioSeconds(text));
|
|
|
|
// Google returns base64 — convert to buffer and stream
|
|
const audioBuffer = Buffer.from(result.audioContent, "base64");
|
|
setHeader(event, "Content-Type", "audio/mpeg");
|
|
setHeader(event, "Cache-Control", "no-store");
|
|
setHeader(event, "Content-Length", String(audioBuffer.length));
|
|
|
|
// Send raw bytes — h3 will flush buffer response
|
|
return audioBuffer;
|
|
}
|
|
|
|
async function speakCartesia(
|
|
event: H3Event,
|
|
text: string,
|
|
config: ReturnType<typeof useRuntimeConfig>,
|
|
voiceCfg: VoiceConfig,
|
|
userId: string,
|
|
plan: string,
|
|
) {
|
|
const key = (config.cartesiaApiKey as string) || process.env.CARTESIA_API_KEY || "";
|
|
if (!key) {
|
|
throw createError({ statusCode: 503, message: "Cartesia API Key nicht konfiguriert" });
|
|
}
|
|
|
|
const CARTESIA_FALLBACK_VOICE = "b9de4a89-2257-424b-94c2-db18ba68c81a";
|
|
const voiceId =
|
|
voiceCfg.voiceId ||
|
|
(config.cartesiaVoiceId as string) ||
|
|
process.env.CARTESIA_VOICE_ID ||
|
|
CARTESIA_FALLBACK_VOICE;
|
|
|
|
const upstream = await fetch("https://api.cartesia.ai/tts/bytes", {
|
|
method: "POST",
|
|
headers: {
|
|
"X-API-Key": key,
|
|
"Cartesia-Version": "2024-11-13",
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model_id: voiceCfg.model ?? "sonic-2",
|
|
transcript: text,
|
|
voice: { mode: "id", id: voiceId },
|
|
output_format: {
|
|
container: "mp3",
|
|
sample_rate: 22050,
|
|
bit_rate: 64000,
|
|
},
|
|
language: "de",
|
|
}),
|
|
});
|
|
|
|
if (!upstream.ok || !upstream.body) {
|
|
const err = await upstream.text().catch(() => "");
|
|
console.error("[speak/cartesia] error:", upstream.status, err);
|
|
throw createError({ statusCode: 502, message: "Cartesia TTS fehlgeschlagen" });
|
|
}
|
|
|
|
await consumeVoiceQuota(userId, plan, estimateAudioSeconds(text));
|
|
|
|
setHeader(event, "Content-Type", "audio/mpeg");
|
|
setHeader(event, "Cache-Control", "no-store");
|
|
|
|
const { Readable } = await import("node:stream");
|
|
return sendStream(event, Readable.fromWeb(upstream.body as never));
|
|
}
|
|
|
|
async function speakElevenLabs(
|
|
event: H3Event,
|
|
text: string,
|
|
_mode: "chat" | "sos" | "sos-continuation" | undefined,
|
|
config: ReturnType<typeof useRuntimeConfig>,
|
|
voiceCfg: VoiceConfig,
|
|
userId: string,
|
|
plan: string,
|
|
) {
|
|
const key =
|
|
(config.elevenlabsApiKey as string) || process.env.ELEVENLABS_API_KEY || "";
|
|
if (!key) {
|
|
throw createError({ statusCode: 503, message: "ElevenLabs API Key nicht konfiguriert" });
|
|
}
|
|
|
|
const ELEVENLABS_FALLBACK_VOICE = "kdmDKE6EkgrWrrykO9Qt"; // Alexandra
|
|
const voiceId =
|
|
voiceCfg.voiceId ||
|
|
(config.elevenlabsVoiceId as string) ||
|
|
process.env.ELEVENLABS_VOICE_ID ||
|
|
ELEVENLABS_FALLBACK_VOICE;
|
|
|
|
const modelId = voiceCfg.model ?? "eleven_turbo_v2_5";
|
|
|
|
const upstream = await fetch(
|
|
`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream?optimize_streaming_latency=4`,
|
|
{
|
|
method: "POST",
|
|
headers: {
|
|
"xi-api-key": key,
|
|
"Content-Type": "application/json",
|
|
Accept: "audio/mpeg",
|
|
},
|
|
body: JSON.stringify({
|
|
text,
|
|
model_id: modelId,
|
|
voice_settings: {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
style: 0.3,
|
|
use_speaker_boost: true,
|
|
},
|
|
output_format: "mp3_22050_32",
|
|
}),
|
|
},
|
|
);
|
|
|
|
if (!upstream.ok || !upstream.body) {
|
|
const err = await upstream.text().catch(() => "");
|
|
console.error("[speak/elevenlabs] error:", upstream.status, err);
|
|
throw createError({ statusCode: 502, message: "ElevenLabs TTS fehlgeschlagen" });
|
|
}
|
|
|
|
// Legend = unlimited → consumeVoiceQuota is a no-op (see db/voiceQuota.ts)
|
|
await consumeVoiceQuota(userId, plan, estimateAudioSeconds(text));
|
|
|
|
setHeader(event, "Content-Type", "audio/mpeg");
|
|
setHeader(event, "Cache-Control", "no-store");
|
|
|
|
const { Readable } = await import("node:stream");
|
|
return sendStream(event, Readable.fromWeb(upstream.body as never));
|
|
}
|