feat(sos): llmProvider toggle + sort:latency + bench scaffolding
- backend/coach: routing zu Sonnet (default) / Haiku / Groq Llama je nach sessionData.llmProvider. sort:latency für Anthropic-Modelle (-30..58% TTFB). - frontend: LlmProviderToggle (Sonnet/Haiku/Groq pills), llmProvider.ts Storage-Helper. sosStream.ts schickt llmProvider im /sos-session-Body. - bench: SosTtsBenchmark sammelt Marker (req->session, lyra-ttfb, lyra-done, tts-fired/headers/body/file, audio-loaded, first-audio); Output als console.table. - ops: backend/scripts/llm-bench.sh + Python-Variante für realistic SOS-Prompt. - speak-cartesia + speak-elevenlabs Endpoints (waren ungetracked, jetzt mit drin).
This commit is contained in:
parent
b58588cf3c
commit
f2e822be95
@ -26,12 +26,15 @@ import GamePickerDrawer from '../components/urge/GamePickerDrawer';
|
||||
import { VoiceBars } from '../components/urge/InlineIndicators';
|
||||
import MessageRow, { GameHeader, type SosMsg } from '../components/urge/MessageRow';
|
||||
import { SOS_BOOT } from '../lib/sosPrompts';
|
||||
import { CHIP_SETS, type ChipSet } from '../lib/sosConstants';
|
||||
import { CHIP_SETS, BREATH_PHASES, type ChipSet } from '../lib/sosConstants';
|
||||
import { parseLyraResponse, detectEmotion, type LyraEmotion, type ChipSpec } from '../lib/lyraResponse';
|
||||
import { streamSosLyra } from '../lib/sosStream';
|
||||
import { SosTtsQueue } from '../lib/sosTtsQueue';
|
||||
import { endpointForProvider, useTtsProvider, type TtsProvider } from '../lib/ttsProvider';
|
||||
import { endpointForProvider, useTtsProvider, currentProvider, type TtsProvider } from '../lib/ttsProvider';
|
||||
import { TtsProviderToggle } from '../components/urge/TtsProviderToggle';
|
||||
import { LlmProviderToggle } from '../components/urge/LlmProviderToggle';
|
||||
import { currentLlmProvider } from '../lib/llmProvider';
|
||||
import { BenchSession } from '../lib/sosTtsBenchmark';
|
||||
|
||||
// ── Main Screen ───────────────────────────────────────────────────────────────
|
||||
|
||||
@ -98,11 +101,32 @@ export default function SOSScreen() {
|
||||
|
||||
useEffect(() => { soundEnabledRef.current = soundEnabled; }, [soundEnabled]);
|
||||
|
||||
// Aktueller TTS-Provider — Ref damit async-Code (sendToLyra) den frischen Wert
|
||||
// sieht ohne stale-closure aus dem ursprünglichen Render.
|
||||
// Aktueller TTS-Provider — currentProvider() liest immer den frischen Wert,
|
||||
// ttsProvider state ist nur für UI-Re-Renders + cache-invalidation hier.
|
||||
const [ttsProvider] = useTtsProvider();
|
||||
const ttsProviderRef = useRef<TtsProvider>(ttsProvider);
|
||||
useEffect(() => { ttsProviderRef.current = ttsProvider; }, [ttsProvider]);
|
||||
|
||||
// Pre-cache der Atemübungs-Voice-Cues (Einatmen / Halten / Ausatmen).
|
||||
// Bei Phase-Wechsel im BreathingDrawer kommt das Audio sofort statt mit
|
||||
// ~600ms TTS-Roundtrip — so bleibt Voice synchron mit der Pulse-Animation.
|
||||
// Cache wird invalidiert wenn User den TTS-Provider wechselt.
|
||||
const breathAudioCacheRef = useRef<Map<string, string>>(new Map());
|
||||
useEffect(() => {
|
||||
breathAudioCacheRef.current.clear();
|
||||
const phrases = BREATH_PHASES
|
||||
.map((p) => p.speakLine)
|
||||
.filter((s): s is string => Boolean(s));
|
||||
let cancelled = false;
|
||||
(async () => {
|
||||
for (const text of phrases) {
|
||||
if (cancelled) return;
|
||||
const audio = await fetchTtsAudio(text).catch(() => null);
|
||||
if (cancelled) return;
|
||||
if (audio) breathAudioCacheRef.current.set(text, audio.uri);
|
||||
}
|
||||
})();
|
||||
return () => { cancelled = true; };
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [ttsProvider]);
|
||||
|
||||
// Audio-Mode: bei SOS-Mount Audio-Session konfigurieren.
|
||||
// - playsInSilentModeIOS: Lyra spricht auch wenn iPhone auf "stumm"
|
||||
@ -216,7 +240,10 @@ export default function SOSScreen() {
|
||||
const session = (await supabase.auth.getSession()).data.session;
|
||||
if (controller.signal.aborted) return null;
|
||||
const apiBase = Constants.expoConfig?.extra?.apiUrl as string;
|
||||
const ttsRes = await fetch(`${apiBase}/api/coach/speak-openai`, {
|
||||
// Endpoint folgt User-Provider-Toggle (TtsProviderToggle im SOS-Header).
|
||||
const endpoint = endpointForProvider(currentProvider());
|
||||
const isGoogleCloud = endpoint.endsWith('/speak-google');
|
||||
const ttsRes = await fetch(`${apiBase}${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@ -226,15 +253,29 @@ export default function SOSScreen() {
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!ttsRes.ok || controller.signal.aborted) return null;
|
||||
const buffer = await ttsRes.arrayBuffer();
|
||||
if (controller.signal.aborted || buffer.byteLength === 0) return null;
|
||||
const bytes = new Uint8Array(buffer);
|
||||
const chunks: string[] = [];
|
||||
const cs = 0x8000;
|
||||
for (let i = 0; i < bytes.length; i += cs)
|
||||
chunks.push(String.fromCharCode(...bytes.subarray(i, Math.min(i + cs, bytes.length))));
|
||||
const base64 = btoa(chunks.join(''));
|
||||
const tmpPath = `${FileSystem.cacheDirectory}sos-tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.mp3`;
|
||||
|
||||
let base64: string;
|
||||
let ext: 'mp3' | 'wav';
|
||||
if (isGoogleCloud) {
|
||||
const json = (await ttsRes.json()) as { audio?: string };
|
||||
const dataUri = json.audio ?? '';
|
||||
const comma = dataUri.indexOf(',');
|
||||
if (comma === -1) return null;
|
||||
base64 = dataUri.slice(comma + 1);
|
||||
ext = 'mp3';
|
||||
} else {
|
||||
const buffer = await ttsRes.arrayBuffer();
|
||||
if (controller.signal.aborted || buffer.byteLength === 0) return null;
|
||||
const bytes = new Uint8Array(buffer);
|
||||
const chunks: string[] = [];
|
||||
const cs = 0x8000;
|
||||
for (let i = 0; i < bytes.length; i += cs)
|
||||
chunks.push(String.fromCharCode(...bytes.subarray(i, Math.min(i + cs, bytes.length))));
|
||||
base64 = btoa(chunks.join(''));
|
||||
ext = endpoint.endsWith('/speak-gemini') ? 'wav' : 'mp3';
|
||||
}
|
||||
|
||||
const tmpPath = `${FileSystem.cacheDirectory}sos-tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.${ext}`;
|
||||
await FileSystem.writeAsStringAsync(tmpPath, base64, { encoding: FileSystem.EncodingType.Base64 });
|
||||
if (controller.signal.aborted) return null;
|
||||
return { uri: tmpPath, controller };
|
||||
@ -288,6 +329,13 @@ export default function SOSScreen() {
|
||||
ttsRef.current = null;
|
||||
setIsSpeaking(false);
|
||||
}
|
||||
// Cache-hit (Atemübung-Phrasen): instant playback, kein API-roundtrip.
|
||||
const cleaned = rawText.replace(/\s+/g, ' ').trim();
|
||||
const cachedUri = breathAudioCacheRef.current.get(cleaned);
|
||||
if (cachedUri) {
|
||||
await playTtsAudio(cleaned, { uri: cachedUri, controller: new AbortController() });
|
||||
return;
|
||||
}
|
||||
const audio = await fetchTtsAudio(rawText).catch(() => null);
|
||||
if (!audio) return;
|
||||
await playTtsAudio(rawText, audio);
|
||||
@ -299,6 +347,10 @@ export default function SOSScreen() {
|
||||
addMessage({ id: Date.now().toString(), role: 'user', content: userText, timestamp: new Date() });
|
||||
setUserTurnCount((n) => n + 1);
|
||||
setThinking(true); setEmotion('thinking');
|
||||
// Latenz-Benchmark — eine Session pro sendToLyra-Call. Marker werden in
|
||||
// stream/queue über onMetric gesammelt, gedruckt im onIdle (oder als
|
||||
// Fallback im finally bei Errors / sound-off).
|
||||
const bench = new BenchSession({ provider: currentProvider(), label: 'send' });
|
||||
try {
|
||||
const visibleHistory = messages.filter((m) => !m.cardType).map((m) => ({ role: m.role, content: m.content }));
|
||||
|
||||
@ -393,12 +445,13 @@ export default function SOSScreen() {
|
||||
apiBase,
|
||||
accessToken: session.access_token,
|
||||
locale: i18n.language,
|
||||
endpoint: endpointForProvider(ttsProviderRef.current),
|
||||
endpoint: endpointForProvider(currentProvider()),
|
||||
onStart: () => { setIsSpeaking(true); setIsTtsLoading(false); },
|
||||
onIdle: () => { setIsSpeaking(false); setIsTtsLoading(false); scheduleEmotionReset(0); },
|
||||
onIdle: () => { setIsSpeaking(false); setIsTtsLoading(false); scheduleEmotionReset(0); bench.print(); },
|
||||
onError: (err, sentence) => {
|
||||
console.warn('[sos-tts-queue] segment failed:', sentence.slice(0, 50), err);
|
||||
},
|
||||
onMetric: bench.mark,
|
||||
})
|
||||
: null;
|
||||
ttsQueueRef.current = ttsQueue;
|
||||
@ -411,6 +464,8 @@ export default function SOSScreen() {
|
||||
token: session.access_token,
|
||||
messages: apiMessages,
|
||||
locale: i18n.language,
|
||||
llmProvider: currentLlmProvider(),
|
||||
onMetric: bench.mark,
|
||||
onTextUpdate: (full) => {
|
||||
visible = full;
|
||||
ensureBubble(full);
|
||||
@ -532,7 +587,17 @@ export default function SOSScreen() {
|
||||
} catch {
|
||||
addMessage({ id: (Date.now() + 1).toString(), role: 'assistant', content: t('coach.error'), timestamp: new Date() });
|
||||
setEmotion('idle');
|
||||
} finally { setThinking(false); }
|
||||
} finally {
|
||||
setThinking(false);
|
||||
// Fallback-Print NUR wenn keine TTS-Queue (mehr) aktiv ist. Sonst feuert
|
||||
// das finally bei kurzen Antworten zu früh — der TTS-Fetch läuft dann
|
||||
// gerade erst, headers kommen erst Sekunden später, und ein print()
|
||||
// hier würde alle TTS-Marker verwerfen. Im aktiven Fall übernimmt
|
||||
// ttsQueue.onIdle den Print.
|
||||
if (!ttsQueueRef.current?.isActive()) {
|
||||
bench.print('finally');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Opening greeting on mount — nutzt gleichen Streaming-Pfad wie sendToLyra,
|
||||
@ -558,18 +623,22 @@ export default function SOSScreen() {
|
||||
if (!session?.access_token) throw new Error('no token');
|
||||
const apiBase = Constants.expoConfig?.extra?.apiUrl as string;
|
||||
|
||||
// Latenz-Benchmark fürs Greeting — gleiches Pattern wie sendToLyra.
|
||||
const greetingBench = new BenchSession({ provider: currentProvider(), label: 'greeting' });
|
||||
|
||||
// Hybrid-TTS-Queue, gleiches Pattern wie sendToLyra
|
||||
const ttsQueue = soundEnabledRef.current
|
||||
? new SosTtsQueue({
|
||||
apiBase,
|
||||
accessToken: session.access_token,
|
||||
locale: i18n.language,
|
||||
endpoint: endpointForProvider(ttsProviderRef.current),
|
||||
endpoint: endpointForProvider(currentProvider()),
|
||||
onStart: () => { setIsSpeaking(true); setIsTtsLoading(false); },
|
||||
onIdle: () => { setIsSpeaking(false); setIsTtsLoading(false); scheduleEmotionReset(0); },
|
||||
onIdle: () => { setIsSpeaking(false); setIsTtsLoading(false); scheduleEmotionReset(0); greetingBench.print(); },
|
||||
onError: (err, sentence) => {
|
||||
console.warn('[sos-tts-greeting] segment failed:', sentence.slice(0, 50), err);
|
||||
},
|
||||
onMetric: greetingBench.mark,
|
||||
})
|
||||
: null;
|
||||
ttsQueueRef.current?.abort();
|
||||
@ -588,6 +657,8 @@ export default function SOSScreen() {
|
||||
token: session.access_token,
|
||||
messages: SOS_BOOT,
|
||||
locale: i18n.language,
|
||||
llmProvider: currentLlmProvider(),
|
||||
onMetric: greetingBench.mark,
|
||||
onTextUpdate: (full) => {
|
||||
if (cancelled) return;
|
||||
visible = full;
|
||||
@ -1026,7 +1097,7 @@ export default function SOSScreen() {
|
||||
<View style={st.avatarCenter}>
|
||||
<RiveAvatar emotion={emotion} size="md" />
|
||||
<View style={st.avatarMeta}>
|
||||
<Text style={st.avatarName}>Lyra · SOS</Text>
|
||||
<Text style={st.avatarName}>Lyra · SOS [v2]</Text>
|
||||
{(thinking || isLoading) && !isSpeaking && (
|
||||
<View style={st.speakingRow}>
|
||||
<VoiceBars count={5} baseColor="#3b82f6" />
|
||||
@ -1056,6 +1127,7 @@ export default function SOSScreen() {
|
||||
|
||||
<View style={[st.ttsToggleBar, { top: topBarHeight - 36 }]} pointerEvents="box-none">
|
||||
<TtsProviderToggle />
|
||||
<LlmProviderToggle />
|
||||
</View>
|
||||
|
||||
{playingGame ? (
|
||||
@ -1226,7 +1298,7 @@ const st = StyleSheet.create({
|
||||
container: { flex: 1, backgroundColor: '#ffffff' },
|
||||
topBar: { position: 'absolute', left: 0, right: 0, zIndex: 10, flexDirection: 'row', alignItems: 'flex-start', justifyContent: 'space-between', paddingHorizontal: 12 },
|
||||
topBarBackdrop: { position: 'absolute', top: 0, left: 0, right: 0, zIndex: 9, backgroundColor: '#ffffff' },
|
||||
ttsToggleBar: { position: 'absolute', left: 0, right: 0, zIndex: 8, alignItems: 'center' },
|
||||
ttsToggleBar: { position: "absolute", left: 0, right: 0, zIndex: 11, alignItems: "center" },
|
||||
actionBtn: { width: 40, height: 40, borderRadius: 20, backgroundColor: 'rgba(255,255,255,0.92)', alignItems: 'center', justifyContent: 'center', shadowColor: '#000', shadowOffset: { width: 0, height: 2 }, shadowOpacity: 0.08, shadowRadius: 6, elevation: 4 },
|
||||
avatarCenter: { flex: 1, alignItems: 'center', gap: 4 },
|
||||
avatarMeta: { alignItems: 'center', gap: 2 },
|
||||
|
||||
60
apps/rebreak-native/components/urge/LlmProviderToggle.tsx
Normal file
60
apps/rebreak-native/components/urge/LlmProviderToggle.tsx
Normal file
@ -0,0 +1,60 @@
|
||||
import { Pressable, Text, View } from 'react-native';
|
||||
import { LLM_PROVIDER_LABEL, type LlmProvider, useLlmProvider } from '../../lib/llmProvider';
|
||||
|
||||
const PROVIDERS: LlmProvider[] = ['openrouter-sonnet', 'openrouter-haiku', 'groq-llama'];
|
||||
|
||||
export function LlmProviderToggle() {
|
||||
const [current, set] = useLlmProvider();
|
||||
return (
|
||||
<View
|
||||
style={{
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
gap: 6,
|
||||
paddingHorizontal: 12,
|
||||
paddingVertical: 6,
|
||||
}}
|
||||
>
|
||||
<Text
|
||||
style={{
|
||||
fontSize: 9,
|
||||
color: '#9ca3af',
|
||||
textTransform: 'uppercase',
|
||||
letterSpacing: 0.5,
|
||||
marginRight: 4,
|
||||
}}
|
||||
>
|
||||
LLM
|
||||
</Text>
|
||||
{PROVIDERS.map((p) => {
|
||||
const active = p === current;
|
||||
return (
|
||||
<Pressable
|
||||
key={p}
|
||||
onPress={() => { void set(p); }}
|
||||
hitSlop={6}
|
||||
style={{
|
||||
paddingHorizontal: 10,
|
||||
paddingVertical: 4,
|
||||
borderRadius: 999,
|
||||
backgroundColor: active ? '#1f2937' : '#e5e7eb',
|
||||
borderWidth: 1.5,
|
||||
borderColor: active ? '#1f2937' : '#9ca3af',
|
||||
}}
|
||||
>
|
||||
<Text
|
||||
style={{
|
||||
fontSize: 10,
|
||||
fontFamily: 'Nunito_700Bold',
|
||||
color: active ? '#ffffff' : '#374151',
|
||||
}}
|
||||
>
|
||||
{LLM_PROVIDER_LABEL[p]}
|
||||
</Text>
|
||||
</Pressable>
|
||||
);
|
||||
})}
|
||||
</View>
|
||||
);
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
import { Pressable, Text, View } from 'react-native';
|
||||
import { TTS_PROVIDER_LABEL, type TtsProvider, useTtsProvider } from '../../lib/ttsProvider';
|
||||
|
||||
const PROVIDERS: TtsProvider[] = ['openai', 'gemini', 'google-cloud'];
|
||||
const PROVIDERS: TtsProvider[] = ['openai', 'gemini', 'elevenlabs', 'cartesia', 'google-cloud'];
|
||||
|
||||
export function TtsProviderToggle() {
|
||||
const [current, set] = useTtsProvider();
|
||||
@ -38,16 +38,16 @@ export function TtsProviderToggle() {
|
||||
paddingHorizontal: 10,
|
||||
paddingVertical: 4,
|
||||
borderRadius: 999,
|
||||
backgroundColor: active ? '#1f2937' : '#f9fafb',
|
||||
borderWidth: 1,
|
||||
borderColor: active ? '#1f2937' : '#e5e7eb',
|
||||
backgroundColor: active ? '#1f2937' : '#e5e7eb',
|
||||
borderWidth: 1.5,
|
||||
borderColor: active ? '#1f2937' : '#9ca3af',
|
||||
}}
|
||||
>
|
||||
<Text
|
||||
style={{
|
||||
fontSize: 10,
|
||||
fontFamily: 'Nunito_700Bold',
|
||||
color: active ? '#ffffff' : '#6b7280',
|
||||
color: active ? '#ffffff' : '#374151',
|
||||
}}
|
||||
>
|
||||
{TTS_PROVIDER_LABEL[p]}
|
||||
|
||||
52
apps/rebreak-native/lib/llmProvider.ts
Normal file
52
apps/rebreak-native/lib/llmProvider.ts
Normal file
@ -0,0 +1,52 @@
|
||||
// SOS-LLM-Provider mit AsyncStorage-Persist + Listener-Pattern.
|
||||
// Live-Switch im SOS-Screen — analog zu lib/ttsProvider.ts.
|
||||
//
|
||||
// Backend (sos-session.post.ts) nimmt das Feld entgegen, sos-stream.get.ts
|
||||
// routet dann je nach Wert zu OpenRouter (Sonnet/Haiku) oder Groq (Llama).
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import { useEffect, useState } from 'react';
|
||||
|
||||
export type LlmProvider = 'openrouter-sonnet' | 'openrouter-haiku' | 'groq-llama';
|
||||
|
||||
const STORAGE_KEY = 'rebreak-sos-llm-provider';
|
||||
const DEFAULT_PROVIDER: LlmProvider = 'openrouter-sonnet';
|
||||
|
||||
export const LLM_PROVIDER_LABEL: Record<LlmProvider, string> = {
|
||||
'openrouter-sonnet': 'Sonnet',
|
||||
'openrouter-haiku': 'Haiku',
|
||||
'groq-llama': 'Groq',
|
||||
};
|
||||
|
||||
const listeners = new Set<(p: LlmProvider) => void>();
|
||||
let cached: LlmProvider | null = null;
|
||||
|
||||
export async function loadLlmProvider(): Promise<LlmProvider> {
|
||||
if (cached) return cached;
|
||||
const raw = await AsyncStorage.getItem(STORAGE_KEY).catch(() => null);
|
||||
cached =
|
||||
raw === 'openrouter-haiku' || raw === 'groq-llama' ? raw : DEFAULT_PROVIDER;
|
||||
return cached;
|
||||
}
|
||||
|
||||
export async function setLlmProvider(p: LlmProvider): Promise<void> {
|
||||
cached = p;
|
||||
await AsyncStorage.setItem(STORAGE_KEY, p).catch(() => {});
|
||||
for (const cb of listeners) cb(p);
|
||||
}
|
||||
|
||||
/** Always-fresh read — analog zu currentProvider() in ttsProvider.ts. */
|
||||
export function currentLlmProvider(): LlmProvider {
|
||||
return cached ?? DEFAULT_PROVIDER;
|
||||
}
|
||||
|
||||
export function useLlmProvider(): [LlmProvider, (p: LlmProvider) => Promise<void>] {
|
||||
const [p, setP] = useState<LlmProvider>(cached ?? DEFAULT_PROVIDER);
|
||||
useEffect(() => {
|
||||
let mounted = true;
|
||||
loadLlmProvider().then((v) => { if (mounted) setP(v); });
|
||||
const cb = (v: LlmProvider) => { if (mounted) setP(v); };
|
||||
listeners.add(cb);
|
||||
return () => { mounted = false; listeners.delete(cb); };
|
||||
}, []);
|
||||
return [p, setLlmProvider];
|
||||
}
|
||||
@ -46,9 +46,9 @@ export type BreathState = 'idle' | 'countdown' | 'active';
|
||||
// speakLine bewusst durchgehend null — Phase-TTS würde Lyras laufende Audio abbrechen
|
||||
// (User-Wahrnehmung: "Stimme ändert sich"). Visuelles Pulsieren + Countdown reicht.
|
||||
export const BREATH_PHASES: { phase: BreathPhase; duration: number; label: string; color: string; speakLine: string | null }[] = [
|
||||
{ phase: 'inhale', duration: 4, label: 'Einatmen', color: '#6366f1', speakLine: null },
|
||||
{ phase: 'hold', duration: 7, label: 'Halten', color: '#f97316', speakLine: null },
|
||||
{ phase: 'exhale', duration: 8, label: 'Ausatmen', color: '#16a34a', speakLine: null },
|
||||
{ phase: 'inhale', duration: 4, label: 'Einatmen', color: '#6366f1', speakLine: 'Einatmen' },
|
||||
{ phase: 'hold', duration: 7, label: 'Halten', color: '#f97316', speakLine: 'Halten' },
|
||||
{ phase: 'exhale', duration: 8, label: 'Ausatmen', color: '#16a34a', speakLine: 'Ausatmen' },
|
||||
];
|
||||
export const TOTAL_ROUNDS = 3;
|
||||
|
||||
|
||||
@ -11,6 +11,8 @@
|
||||
// direkt in eine TTS-Queue schieben → erste Audio-Wiedergabe ~3s früher als
|
||||
// "warten bis fullText fertig".
|
||||
import EventSource from 'react-native-sse';
|
||||
import type { BenchOnMetric } from './sosTtsBenchmark';
|
||||
import type { LlmProvider } from './llmProvider';
|
||||
|
||||
type SseEvents = 'message' | 'chips' | 'done';
|
||||
|
||||
@ -19,6 +21,9 @@ export type StreamSosLyraOpts = {
|
||||
token: string;
|
||||
messages: Array<{ role: 'user' | 'assistant'; content: string }>;
|
||||
locale: string;
|
||||
/** LLM-Provider-Switch: bestimmt welches Modell der Server für diese Session
|
||||
* benutzt. Default (undefined) → openrouter-sonnet auf Server-Seite. */
|
||||
llmProvider?: LlmProvider;
|
||||
onTextUpdate: (full: string) => void;
|
||||
onChips: (chips: Array<{ label: string; action: string }>) => void;
|
||||
/** Phase B: feuert pro fertigem Satz live während des Streams + Tail beim
|
||||
@ -27,6 +32,9 @@ export type StreamSosLyraOpts = {
|
||||
onSentence?: (sentence: string) => void;
|
||||
onDone: (full: string) => void;
|
||||
onError: (err: unknown) => void;
|
||||
/** Latenz-Benchmark: feuert session-post-start, session-post-done,
|
||||
* sse-first-chunk, sse-done. Siehe lib/sosTtsBenchmark.ts. */
|
||||
onMetric?: BenchOnMetric;
|
||||
};
|
||||
|
||||
// Min-Länge für sentence-level TTS — winzige "Hm." / "Ja." kommen mit dem
|
||||
@ -59,16 +67,18 @@ function consumeCompletedSentences(text: string): { sentences: string[]; consume
|
||||
|
||||
export async function streamSosLyra(opts: StreamSosLyraOpts): Promise<() => void> {
|
||||
// Step 1: POST zu /api/coach/sos-session → sessionId holen
|
||||
opts.onMetric?.('session-post-start');
|
||||
const sessRes = await fetch(`${opts.apiBase}/api/coach/sos-session`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${opts.token}`,
|
||||
},
|
||||
body: JSON.stringify({ messages: opts.messages, locale: opts.locale }),
|
||||
body: JSON.stringify({ messages: opts.messages, locale: opts.locale, llmProvider: opts.llmProvider }),
|
||||
});
|
||||
if (!sessRes.ok) throw new Error(`session: ${sessRes.status}`);
|
||||
const { sessionId } = await sessRes.json();
|
||||
opts.onMetric?.('session-post-done');
|
||||
|
||||
// Step 2: EventSource für SSE-Stream
|
||||
// pollingInterval: 0 → KEIN Auto-Reconnect (Session ist one-time-use)
|
||||
@ -80,6 +90,7 @@ export async function streamSosLyra(opts: StreamSosLyraOpts): Promise<() => void
|
||||
|
||||
let fullText = '';
|
||||
let sentenceConsumedIndex = 0;
|
||||
let firstChunkSeen = false;
|
||||
|
||||
const flushNewSentences = () => {
|
||||
if (!opts.onSentence) return;
|
||||
@ -104,6 +115,10 @@ export async function streamSosLyra(opts: StreamSosLyraOpts): Promise<() => void
|
||||
chunk = event.data;
|
||||
}
|
||||
if (!chunk) return;
|
||||
if (!firstChunkSeen) {
|
||||
firstChunkSeen = true;
|
||||
opts.onMetric?.('sse-first-chunk');
|
||||
}
|
||||
fullText += chunk;
|
||||
opts.onTextUpdate(fullText);
|
||||
// Phase B: live sentence-detection für TTS-Queue
|
||||
@ -119,6 +134,7 @@ export async function streamSosLyra(opts: StreamSosLyraOpts): Promise<() => void
|
||||
});
|
||||
|
||||
es.addEventListener('done', () => {
|
||||
opts.onMetric?.('sse-done');
|
||||
// Phase B: Tail flushen (letzter Satz ohne folgendes Capital-Letter wird
|
||||
// sonst nie als "complete" erkannt). Trim leeren Tail away.
|
||||
if (opts.onSentence) {
|
||||
|
||||
105
apps/rebreak-native/lib/sosTtsBenchmark.ts
Normal file
105
apps/rebreak-native/lib/sosTtsBenchmark.ts
Normal file
@ -0,0 +1,105 @@
|
||||
// SOS+TTS Latenz-Benchmark.
|
||||
//
|
||||
// Eine BenchSession pro sendToLyra-Call. Aggregiert Timing-Marker aus
|
||||
// sosStream + sosTtsQueue und druckt am Ende eine Tabelle ins Dev-Console.
|
||||
//
|
||||
// Marker-Reihenfolge im typischen Flow:
|
||||
// t0 request-fired (sendToLyra start)
|
||||
// t1 session-post-done (POST /sos-session resolved → sessionId da)
|
||||
// t2 sse-first-chunk ("Lyra denkt fertig" — erstes Token)
|
||||
// t3 sse-done (full text fertig)
|
||||
// t4 tts-fetch-start (POST an /api/coach/speak-* fired)
|
||||
// t5 tts-fetch-headers (response headers da, body kommt noch)
|
||||
// t6 tts-body-done (kompletter Audio-Body geladen — DAS ist der Bottleneck)
|
||||
// t7 tts-file-written (base64 → File geschrieben)
|
||||
// t8 audio-loaded (Audio.Sound.createAsync resolved)
|
||||
// t9 first-audio (erstes onPlaybackStatusUpdate mit isPlaying)
|
||||
//
|
||||
// Bottleneck-Diagnose:
|
||||
// - (t6 - t5) groß → Body-Download dominiert. Cartesia's TTFB-Vorteil
|
||||
// verpufft hier weil wir auf alles warten statt zu streamen.
|
||||
// - (t9 - t8) groß → expo-av lädt langsam (file-codec-detect etc.)
|
||||
|
||||
export type BenchMarker =
|
||||
| 'session-post-start'
|
||||
| 'session-post-done'
|
||||
| 'sse-first-chunk'
|
||||
| 'sse-done'
|
||||
| 'tts-fetch-start'
|
||||
| 'tts-fetch-headers'
|
||||
| 'tts-body-done'
|
||||
| 'tts-file-written'
|
||||
| 'audio-loaded'
|
||||
| 'first-audio';
|
||||
|
||||
export type BenchOnMetric = (marker: BenchMarker, meta?: Record<string, unknown>) => void;
|
||||
|
||||
type MarkerEntry = {
|
||||
marker: BenchMarker;
|
||||
/** ms relativ zu t0 */
|
||||
tRel: number;
|
||||
meta?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export class BenchSession {
|
||||
readonly t0: number;
|
||||
readonly provider: string;
|
||||
readonly label: string;
|
||||
private entries: MarkerEntry[] = [];
|
||||
private printed = false;
|
||||
|
||||
constructor(opts: { provider: string; label?: string }) {
|
||||
this.t0 = Date.now();
|
||||
this.provider = opts.provider;
|
||||
this.label = opts.label ?? 'sos-turn';
|
||||
}
|
||||
|
||||
/** Bound version — kann direkt als onMetric weitergegeben werden. */
|
||||
readonly mark: BenchOnMetric = (marker, meta) => {
|
||||
if (this.printed) return;
|
||||
this.entries.push({ marker, tRel: Date.now() - this.t0, meta });
|
||||
};
|
||||
|
||||
/** Druckt eine kompakte Tabelle. Idempotent (nur 1x pro Session). */
|
||||
print(extraNote?: string): void {
|
||||
if (this.printed) return;
|
||||
this.printed = true;
|
||||
|
||||
const get = (m: BenchMarker) => this.entries.find((e) => e.marker === m)?.tRel;
|
||||
const fmt = (v: number | undefined) => (v == null ? '—' : `${v}ms`);
|
||||
const diff = (a: BenchMarker, b: BenchMarker) => {
|
||||
const va = get(a), vb = get(b);
|
||||
return va != null && vb != null ? `${vb - va}ms` : '—';
|
||||
};
|
||||
|
||||
const stages = {
|
||||
provider: this.provider,
|
||||
label: this.label,
|
||||
'req→session': fmt(get('session-post-done')),
|
||||
'lyra-ttfb': fmt(get('sse-first-chunk')),
|
||||
'lyra-done': fmt(get('sse-done')),
|
||||
'tts-fired': fmt(get('tts-fetch-start')),
|
||||
'tts-ttfb (rel)': diff('tts-fetch-start', 'tts-fetch-headers'),
|
||||
'tts-body (rel)': diff('tts-fetch-headers', 'tts-body-done'),
|
||||
'tts-file (rel)': diff('tts-body-done', 'tts-file-written'),
|
||||
'audio-load (rel)': diff('tts-file-written', 'audio-loaded'),
|
||||
'first-audio': fmt(get('first-audio')),
|
||||
'TOTAL → speak': fmt(get('first-audio')),
|
||||
};
|
||||
|
||||
// Eine kompakte Zeile als console.log (für Logbox-Lesbarkeit) +
|
||||
// console.table mit allen Markern (für strukturierte Inspektion).
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(
|
||||
`[bench] ${this.provider} (${this.label})${extraNote ? ' ' + extraNote : ''}`,
|
||||
stages,
|
||||
);
|
||||
// eslint-disable-next-line no-console
|
||||
console.table(this.entries.map((e) => ({ marker: e.marker, tRel: e.tRel })));
|
||||
}
|
||||
|
||||
/** Snapshot für UI-Overlays (Debug-Drawer etc.). */
|
||||
snapshot(): { provider: string; label: string; entries: MarkerEntry[] } {
|
||||
return { provider: this.provider, label: this.label, entries: [...this.entries] };
|
||||
}
|
||||
}
|
||||
@ -16,6 +16,7 @@
|
||||
// `setIsSpeaking` triggern.
|
||||
import { Audio } from 'expo-av';
|
||||
import * as FileSystem from 'expo-file-system';
|
||||
import type { BenchOnMetric } from './sosTtsBenchmark';
|
||||
|
||||
export type SosTtsFetchOpts = {
|
||||
apiBase: string;
|
||||
@ -33,6 +34,12 @@ export type SosTtsQueueOpts = SosTtsFetchOpts & {
|
||||
onIdle?: () => void;
|
||||
/** Single-sentence-fetch oder -playback ist gescheitert. Queue läuft weiter. */
|
||||
onError?: (err: unknown, sentence: string) => void;
|
||||
/** Latenz-Benchmark: feuert nur für das ERSTE enqueue'te Item, weil das
|
||||
* user-wahrgenommen first-audio bestimmt. Marker: tts-fetch-start,
|
||||
* tts-fetch-headers, tts-body-done, tts-file-written, audio-loaded,
|
||||
* first-audio. Folge-Items (z.B. sos-continuation) instrumentieren wir
|
||||
* nicht — die spielen ja schon parallel zum ersten und verzerren nur. */
|
||||
onMetric?: BenchOnMetric;
|
||||
};
|
||||
|
||||
const EMOJI_RE = /[\p{Extended_Pictographic}\p{Emoji_Component}]/gu;
|
||||
@ -47,6 +54,10 @@ type QueueItem = {
|
||||
text: string;
|
||||
mode: SosTtsMode;
|
||||
controller: AbortController;
|
||||
/** Nur das erste enqueue'te Item bekommt einen onMetric — das bestimmt
|
||||
* user-wahrgenommen first-audio. Folge-Items (sos-continuation) tracken
|
||||
* wir nicht. */
|
||||
metric?: BenchOnMetric;
|
||||
/** Pre-fetch starts beim enqueue → wenn play dran ist, ist Audio meist schon
|
||||
* fertig oder fast fertig. Eliminiert Gap zwischen Items im Hybrid-Mode. */
|
||||
audioPromise: Promise<{ uri: string } | null>;
|
||||
@ -59,6 +70,12 @@ export class SosTtsQueue {
|
||||
private aborted = false;
|
||||
private startedOnce = false;
|
||||
private opts: SosTtsQueueOpts;
|
||||
// Dedup: in dev-mode (React StrictMode) feuern useEffects 2x → identische
|
||||
// Sätze würden 2x enqueued + 2x von der TTS-API geholt + 2x abgespielt.
|
||||
// Wir tracken die in dieser Queue-Instanz schon gesehenen Texte.
|
||||
private seenTexts = new Set<string>();
|
||||
// Bench: nur das ERSTE enqueue'te Item kriegt Metric-Tracking.
|
||||
private metricGiven = false;
|
||||
|
||||
constructor(opts: SosTtsQueueOpts) {
|
||||
this.opts = opts;
|
||||
@ -74,15 +91,24 @@ export class SosTtsQueue {
|
||||
if (this.aborted) return;
|
||||
const cleaned = cleanForTts(sentence);
|
||||
if (!cleaned) return;
|
||||
// Dedup gegen StrictMode-double-effects: gleicher Text in derselben
|
||||
// Queue-Instanz wird nur 1x angefragt + abgespielt.
|
||||
if (this.seenTexts.has(cleaned)) return;
|
||||
this.seenTexts.add(cleaned);
|
||||
// Pre-fetch SOFORT beim enqueue → läuft parallel zum Playback der vorigen
|
||||
// Items. Heißt: wenn Item 1 fertig spielt, ist Item 2's Audio meist schon
|
||||
// im Cache → null Gap zwischen den Sätzen/Blöcken.
|
||||
const controller = new AbortController();
|
||||
const audioPromise = this.fetchAudio(cleaned, mode, controller.signal).catch((err) => {
|
||||
let metric: BenchOnMetric | undefined;
|
||||
if (!this.metricGiven && this.opts.onMetric) {
|
||||
this.metricGiven = true;
|
||||
metric = this.opts.onMetric;
|
||||
}
|
||||
const audioPromise = this.fetchAudio(cleaned, mode, controller.signal, metric).catch((err) => {
|
||||
this.opts.onError?.(err, cleaned);
|
||||
return null;
|
||||
});
|
||||
this.queue.push({ text: cleaned, mode, controller, audioPromise });
|
||||
this.queue.push({ text: cleaned, mode, controller, metric, audioPromise });
|
||||
void this.tick();
|
||||
}
|
||||
|
||||
@ -125,11 +151,13 @@ export class SosTtsQueue {
|
||||
{ uri: audio.uri },
|
||||
{ shouldPlay: true },
|
||||
);
|
||||
item.metric?.('audio-loaded');
|
||||
if (this.aborted) {
|
||||
await sound.unloadAsync().catch(() => {});
|
||||
return;
|
||||
}
|
||||
this.currentSound = sound;
|
||||
let firstAudioReported = false;
|
||||
await new Promise<void>((resolve) => {
|
||||
sound.setOnPlaybackStatusUpdate((status) => {
|
||||
if (this.aborted) {
|
||||
@ -137,6 +165,10 @@ export class SosTtsQueue {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
if (!firstAudioReported && status.isLoaded && status.isPlaying) {
|
||||
firstAudioReported = true;
|
||||
item.metric?.('first-audio');
|
||||
}
|
||||
if (status.isLoaded && status.didJustFinish) {
|
||||
sound.setOnPlaybackStatusUpdate(null);
|
||||
sound.unloadAsync().catch(() => {});
|
||||
@ -158,9 +190,15 @@ export class SosTtsQueue {
|
||||
}
|
||||
}
|
||||
|
||||
private async fetchAudio(text: string, mode: SosTtsMode, signal: AbortSignal): Promise<{ uri: string } | null> {
|
||||
private async fetchAudio(
|
||||
text: string,
|
||||
mode: SosTtsMode,
|
||||
signal: AbortSignal,
|
||||
metric?: BenchOnMetric,
|
||||
): Promise<{ uri: string } | null> {
|
||||
const endpoint = this.opts.endpoint ?? '/api/coach/speak-openai';
|
||||
const isGoogleCloud = endpoint.endsWith('/speak-google');
|
||||
metric?.('tts-fetch-start', { endpoint });
|
||||
const res = await fetch(`${this.opts.apiBase}${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@ -170,6 +208,7 @@ export class SosTtsQueue {
|
||||
body: JSON.stringify({ text, locale: this.opts.locale, mode }),
|
||||
signal,
|
||||
});
|
||||
metric?.('tts-fetch-headers', { status: res.status });
|
||||
if (!res.ok || signal.aborted) return null;
|
||||
|
||||
// /speak-google liefert JSON { audio: "data:audio/mp3;base64,..." }.
|
||||
@ -179,6 +218,7 @@ export class SosTtsQueue {
|
||||
let ext: 'mp3' | 'wav';
|
||||
if (isGoogleCloud) {
|
||||
const json = (await res.json()) as { audio?: string };
|
||||
metric?.('tts-body-done');
|
||||
const dataUri = json.audio ?? '';
|
||||
const comma = dataUri.indexOf(',');
|
||||
if (comma === -1) return null;
|
||||
@ -186,6 +226,7 @@ export class SosTtsQueue {
|
||||
ext = 'mp3';
|
||||
} else {
|
||||
const buffer = await res.arrayBuffer();
|
||||
metric?.('tts-body-done', { bytes: buffer.byteLength });
|
||||
if (signal.aborted || buffer.byteLength === 0) return null;
|
||||
const bytes = new Uint8Array(buffer);
|
||||
const chunks: string[] = [];
|
||||
@ -203,6 +244,7 @@ export class SosTtsQueue {
|
||||
await FileSystem.writeAsStringAsync(tmpPath, base64, {
|
||||
encoding: FileSystem.EncodingType.Base64,
|
||||
});
|
||||
metric?.('tts-file-written');
|
||||
if (signal.aborted) return null;
|
||||
return { uri: tmpPath };
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import { useEffect, useState } from 'react';
|
||||
|
||||
export type TtsProvider = 'openai' | 'gemini' | 'google-cloud';
|
||||
export type TtsProvider = 'openai' | 'gemini' | 'google-cloud' | 'elevenlabs' | 'cartesia';
|
||||
|
||||
const STORAGE_KEY = 'rebreak-sos-tts-provider';
|
||||
const DEFAULT_PROVIDER: TtsProvider = 'openai';
|
||||
@ -13,12 +13,16 @@ export const TTS_PROVIDER_LABEL: Record<TtsProvider, string> = {
|
||||
openai: 'OpenAI',
|
||||
gemini: 'Gemini',
|
||||
'google-cloud': 'Cloud',
|
||||
elevenlabs: 'ElevenLabs',
|
||||
cartesia: 'Cartesia',
|
||||
};
|
||||
|
||||
export const TTS_PROVIDER_ENDPOINT: Record<TtsProvider, string> = {
|
||||
openai: '/api/coach/speak-openai',
|
||||
gemini: '/api/coach/speak-gemini',
|
||||
'google-cloud': '/api/coach/speak-google',
|
||||
elevenlabs: '/api/coach/speak-elevenlabs',
|
||||
cartesia: '/api/coach/speak-cartesia',
|
||||
};
|
||||
|
||||
const listeners = new Set<(p: TtsProvider) => void>();
|
||||
@ -27,7 +31,10 @@ let cached: TtsProvider | null = null;
|
||||
export async function loadTtsProvider(): Promise<TtsProvider> {
|
||||
if (cached) return cached;
|
||||
const raw = await AsyncStorage.getItem(STORAGE_KEY).catch(() => null);
|
||||
cached = raw === 'gemini' || raw === 'google-cloud' ? raw : DEFAULT_PROVIDER;
|
||||
cached =
|
||||
raw === 'gemini' || raw === 'google-cloud' || raw === 'elevenlabs' || raw === 'cartesia'
|
||||
? raw
|
||||
: DEFAULT_PROVIDER;
|
||||
return cached;
|
||||
}
|
||||
|
||||
@ -41,6 +48,13 @@ export function endpointForProvider(p: TtsProvider): string {
|
||||
return TTS_PROVIDER_ENDPOINT[p];
|
||||
}
|
||||
|
||||
/** Always-fresh read of the current provider — module-level `cached` is updated
|
||||
* synchronously inside `setTtsProvider` BEFORE listeners fire, so reading this
|
||||
* inside any async callback sidesteps React's state-update / useRef-update lag. */
|
||||
export function currentProvider(): TtsProvider {
|
||||
return cached ?? DEFAULT_PROVIDER;
|
||||
}
|
||||
|
||||
export function useTtsProvider(): [TtsProvider, (p: TtsProvider) => Promise<void>] {
|
||||
const [p, setP] = useState<TtsProvider>(cached ?? DEFAULT_PROVIDER);
|
||||
useEffect(() => {
|
||||
|
||||
219
backend/scripts/llm-bench.sh
Normal file
219
backend/scripts/llm-bench.sh
Normal file
@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env bash
|
||||
# llm-bench.sh — TTFB benchmark across LLM providers für SOS-style requests.
|
||||
#
|
||||
# Misst time_starttransfer (= erste Byte vom Body) für streaming chat completions.
|
||||
# Für SSE-Endpoints korrespondiert das praktisch 1:1 mit dem ersten Token am Client.
|
||||
#
|
||||
# Usage:
|
||||
# bash llm-bench.sh [RUNS] # default RUNS=3
|
||||
#
|
||||
# Auf staging-server (mit Infisical):
|
||||
# infisical run --projectId="$INFISICAL_PROJECT_ID" --env=staging --token="$TOKEN" -- \
|
||||
# bash llm-bench.sh
|
||||
#
|
||||
# ENV vars (alle optional, fehlende → skip):
|
||||
# OPENROUTER_API_KEY — alle Modelle via OpenRouter
|
||||
# ANTHROPIC_API_KEY — Haiku/Sonnet direkt
|
||||
# GROQ_API_KEY — Llama via Groq direkt
|
||||
# OPENAI_API_KEY — GPT-4o-mini direkt
|
||||
# GEMINI_API_KEY — Gemini direkt (oder GOOGLE_GENERATIVE_AI_API_KEY)
|
||||
|
||||
set -u
|
||||
|
||||
RUNS="${1:-3}"
|
||||
|
||||
SYSTEM_PROMPT='Du bist Lyra, eine warme empathische Begleiterin für Menschen mit Glücksspielsucht. Antworte in maximal 2-3 deutschen Sätzen, warm und ohne Belehrung. Am Ende JSON-Chips: [{"label":"...","action":"..."}]'
|
||||
USER_MSG='Ich bin gerade unter starkem Druck und denke daran, einen großen Einsatz zu machen.'
|
||||
MAX_TOK=80
|
||||
|
||||
command -v jq >/dev/null || { echo "jq fehlt — apt install jq"; exit 1; }
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# curl 1× und gib TTFB in ms zurück (oder "FAIL($code)").
|
||||
ttfb_ms() {
|
||||
local out
|
||||
out=$(curl -s -N -o /dev/null --max-time 30 \
|
||||
-w '%{time_starttransfer}\n%{http_code}' "$@" 2>/dev/null) || {
|
||||
echo "ERR"; return
|
||||
}
|
||||
local time=$(echo "$out" | head -1)
|
||||
local code=$(echo "$out" | tail -1)
|
||||
if [[ "$code" != "200" ]] && [[ "$code" != "206" ]]; then
|
||||
echo "FAIL($code)"; return
|
||||
fi
|
||||
awk -v s="$time" 'BEGIN { printf "%d", s * 1000 }'
|
||||
}
|
||||
|
||||
# RUNS Iterationen, druckt Zeile mit allen Times + min/p50.
|
||||
benchN() {
|
||||
local label="$1"; shift
|
||||
local times=()
|
||||
printf " %-50s " "$label"
|
||||
for ((i=1; i<=RUNS; i++)); do
|
||||
t=$(ttfb_ms "$@")
|
||||
times+=("$t")
|
||||
printf "%-9s" "$t"
|
||||
sleep 0.4
|
||||
done
|
||||
local valid=()
|
||||
for t in "${times[@]}"; do
|
||||
[[ "$t" =~ ^[0-9]+$ ]] && valid+=("$t")
|
||||
done
|
||||
if (( ${#valid[@]} > 0 )); then
|
||||
local sorted
|
||||
mapfile -t sorted < <(printf '%s\n' "${valid[@]}" | sort -n)
|
||||
local n=${#sorted[@]}
|
||||
printf " │ min=%sms p50=%sms\n" "${sorted[0]}" "${sorted[$((n/2))]}"
|
||||
else
|
||||
printf " │ ALL FAILED\n"
|
||||
fi
|
||||
}
|
||||
|
||||
# OpenAI-style streaming body (OpenAI/OpenRouter/Groq/Cerebras/Mistral nutzen alle dieses Format).
|
||||
openai_body() {
|
||||
local model="$1"; local extra="${2:-}"
|
||||
jq -nc \
|
||||
--arg model "$model" \
|
||||
--arg system "$SYSTEM_PROMPT" \
|
||||
--arg user "$USER_MSG" \
|
||||
--argjson maxtok "$MAX_TOK" \
|
||||
--argjson extra "${extra:-{\}}" \
|
||||
'{model:$model, stream:true, max_tokens:$maxtok,
|
||||
messages:[{role:"system",content:$system},{role:"user",content:$user}]} + $extra'
|
||||
}
|
||||
|
||||
anthropic_body() {
|
||||
local model="$1"
|
||||
jq -nc \
|
||||
--arg model "$model" \
|
||||
--arg system "$SYSTEM_PROMPT" \
|
||||
--arg user "$USER_MSG" \
|
||||
--argjson maxtok "$MAX_TOK" \
|
||||
'{model:$model, stream:true, max_tokens:$maxtok, system:$system,
|
||||
messages:[{role:"user",content:$user}]}'
|
||||
}
|
||||
|
||||
gemini_body() {
|
||||
jq -nc \
|
||||
--arg system "$SYSTEM_PROMPT" \
|
||||
--arg user "$USER_MSG" \
|
||||
--argjson maxtok "$MAX_TOK" \
|
||||
'{contents:[{parts:[{text:$user}]}],
|
||||
systemInstruction:{parts:[{text:$system}]},
|
||||
generationConfig:{maxOutputTokens:$maxtok}}'
|
||||
}
|
||||
|
||||
# ── header ───────────────────────────────────────────────────────────────────
|
||||
echo
|
||||
echo "═══════════════════════════════════════════════════════════════════"
|
||||
echo " LLM TTFB Benchmark — $RUNS runs each — $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
country=$(curl -s --max-time 3 ipinfo.io/country 2>/dev/null || echo "?")
|
||||
ip=$(curl -s --max-time 3 ifconfig.co 2>/dev/null || echo "?")
|
||||
echo " Source: $country / $ip"
|
||||
echo "═══════════════════════════════════════════════════════════════════"
|
||||
|
||||
# ── via OpenRouter (default load-balanced) ───────────────────────────────────
|
||||
if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
|
||||
echo
|
||||
echo "── via OpenRouter (default load-balancing) ──"
|
||||
for m in \
|
||||
"anthropic/claude-haiku-4.5" \
|
||||
"anthropic/claude-sonnet-4.5" \
|
||||
"anthropic/claude-3.5-haiku" \
|
||||
"openai/gpt-4o-mini" \
|
||||
"google/gemini-2.0-flash-001" \
|
||||
"meta-llama/llama-3.3-70b-instruct"
|
||||
do
|
||||
benchN "OR $m" \
|
||||
-X POST https://openrouter.ai/api/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
|
||||
-H "HTTP-Referer: https://rebreak.org" \
|
||||
-H "X-Title: ReBreak-Bench" \
|
||||
-d "$(openai_body "$m")"
|
||||
done
|
||||
|
||||
echo
|
||||
echo "── via OpenRouter + provider:{sort:latency} ──"
|
||||
for m in \
|
||||
"anthropic/claude-haiku-4.5" \
|
||||
"anthropic/claude-sonnet-4.5"
|
||||
do
|
||||
benchN "ORL $m" \
|
||||
-X POST https://openrouter.ai/api/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
|
||||
-H "HTTP-Referer: https://rebreak.org" \
|
||||
-H "X-Title: ReBreak-Bench" \
|
||||
-d "$(openai_body "$m" '{"provider":{"sort":"latency"}}')"
|
||||
done
|
||||
else
|
||||
echo; echo "(skip OpenRouter — OPENROUTER_API_KEY nicht gesetzt)"
|
||||
fi
|
||||
|
||||
# ── Anthropic direct ─────────────────────────────────────────────────────────
|
||||
if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||
echo
|
||||
echo "── via Anthropic direkt ──"
|
||||
for m in claude-haiku-4-5 claude-sonnet-4-5; do
|
||||
benchN "ANT $m" \
|
||||
-X POST https://api.anthropic.com/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "x-api-key: $ANTHROPIC_API_KEY" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-d "$(anthropic_body "$m")"
|
||||
done
|
||||
else
|
||||
echo; echo "(skip Anthropic direkt — ANTHROPIC_API_KEY nicht gesetzt)"
|
||||
fi
|
||||
|
||||
# ── OpenAI direct ────────────────────────────────────────────────────────────
|
||||
if [[ -n "${OPENAI_API_KEY:-}" ]]; then
|
||||
echo
|
||||
echo "── via OpenAI direkt ──"
|
||||
for m in gpt-4o-mini gpt-4o; do
|
||||
benchN "OAI $m" \
|
||||
-X POST https://api.openai.com/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-d "$(openai_body "$m")"
|
||||
done
|
||||
else
|
||||
echo; echo "(skip OpenAI direkt — OPENAI_API_KEY nicht gesetzt)"
|
||||
fi
|
||||
|
||||
# ── Groq direct ──────────────────────────────────────────────────────────────
|
||||
if [[ -n "${GROQ_API_KEY:-}" ]]; then
|
||||
echo
|
||||
echo "── via Groq direkt (LPU hardware) ──"
|
||||
for m in llama-3.3-70b-versatile llama-3.1-8b-instant; do
|
||||
benchN "GRQ $m" \
|
||||
-X POST https://api.groq.com/openai/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $GROQ_API_KEY" \
|
||||
-d "$(openai_body "$m")"
|
||||
done
|
||||
else
|
||||
echo; echo "(skip Groq — GROQ_API_KEY nicht gesetzt)"
|
||||
fi
|
||||
|
||||
# ── Gemini direct ────────────────────────────────────────────────────────────
|
||||
GEM_KEY="${GEMINI_API_KEY:-${GOOGLE_GENERATIVE_AI_API_KEY:-}}"
|
||||
if [[ -n "$GEM_KEY" ]]; then
|
||||
echo
|
||||
echo "── via Gemini direkt ──"
|
||||
for m in gemini-2.0-flash gemini-1.5-flash; do
|
||||
benchN "GEM $m" \
|
||||
-X POST "https://generativelanguage.googleapis.com/v1beta/models/${m}:streamGenerateContent?key=${GEM_KEY}&alt=sse" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(gemini_body)"
|
||||
done
|
||||
else
|
||||
echo; echo "(skip Gemini direkt — GEMINI_API_KEY nicht gesetzt)"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "═══════════════════════════════════════════════════════════════════"
|
||||
echo " done. min = bestes TTFB, p50 = median über $RUNS Runs"
|
||||
echo "═══════════════════════════════════════════════════════════════════"
|
||||
@ -10,9 +10,10 @@
|
||||
export default defineEventHandler(async (event) => {
|
||||
const user = await requireUser(event);
|
||||
const body = await readBody(event);
|
||||
const { messages, locale } = body as {
|
||||
const { messages, locale, llmProvider } = body as {
|
||||
messages: Array<{ role: "user" | "assistant"; content: string }>;
|
||||
locale?: string;
|
||||
llmProvider?: string;
|
||||
};
|
||||
|
||||
if (!messages || !Array.isArray(messages)) {
|
||||
@ -28,6 +29,7 @@ export default defineEventHandler(async (event) => {
|
||||
userId: user.id,
|
||||
messages,
|
||||
locale: locale ?? "de",
|
||||
llmProvider,
|
||||
createdAt: Date.now(),
|
||||
});
|
||||
|
||||
|
||||
@ -87,10 +87,6 @@ export default defineEventHandler(async (event) => {
|
||||
deleteSosSession(sessionId);
|
||||
|
||||
const config = useRuntimeConfig();
|
||||
const key = config.openrouterApiKey as string | undefined;
|
||||
if (!key) {
|
||||
throw createError({ statusCode: 503, message: "OpenRouter Key fehlt" });
|
||||
}
|
||||
|
||||
// System-Prompt: Coach-Basis + SOS-Streaming-Regeln
|
||||
const LANG: Record<string, string> = {
|
||||
@ -139,24 +135,46 @@ export default defineEventHandler(async (event) => {
|
||||
firstUserIdx > 0 ? messages.slice(firstUserIdx) : messages;
|
||||
const trimmed = conversation.slice(-8);
|
||||
|
||||
const upstream = await fetch(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://rebreak.org",
|
||||
"X-Title": "ReBreak SOS",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "anthropic/claude-sonnet-4.5",
|
||||
max_tokens: 400,
|
||||
stream: true,
|
||||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||
}),
|
||||
},
|
||||
);
|
||||
// LLM-Routing: client schickt llmProvider via /sos-session-Body (Toggle).
|
||||
// Default openrouter-sonnet. sort:latency bei Anthropic über OR spart 30-58% TTFB
|
||||
// (server-curl-bench gemessen). Groq bypasst OpenRouter-Hop für ~157ms TTFB.
|
||||
const llmProvider = sessionData.llmProvider ?? "openrouter-sonnet";
|
||||
let upstreamUrl: string;
|
||||
let upstreamKey: string | undefined;
|
||||
let upstreamModel: string;
|
||||
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
||||
let upstreamProviderField: { sort: string } | undefined;
|
||||
if (llmProvider === "groq-llama") {
|
||||
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
||||
upstreamKey = config.groqApiKey as string | undefined;
|
||||
upstreamModel = "llama-3.3-70b-versatile";
|
||||
} else {
|
||||
upstreamUrl = "https://openrouter.ai/api/v1/chat/completions";
|
||||
upstreamKey = config.openrouterApiKey as string | undefined;
|
||||
upstreamModel = llmProvider === "openrouter-haiku"
|
||||
? "anthropic/claude-haiku-4.5"
|
||||
: "anthropic/claude-sonnet-4.5";
|
||||
upstreamHeaders["HTTP-Referer"] = "https://rebreak.org";
|
||||
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
||||
upstreamProviderField = { sort: "latency" };
|
||||
}
|
||||
if (!upstreamKey) {
|
||||
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
||||
}
|
||||
upstreamHeaders.Authorization = `Bearer ${upstreamKey}`;
|
||||
console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`);
|
||||
|
||||
const upstream = await fetch(upstreamUrl, {
|
||||
method: "POST",
|
||||
headers: upstreamHeaders,
|
||||
body: JSON.stringify({
|
||||
model: upstreamModel,
|
||||
max_tokens: 400,
|
||||
stream: true,
|
||||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||
...(upstreamProviderField ? { provider: upstreamProviderField } : {}),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!upstream.ok || !upstream.body) {
|
||||
const errText = await upstream.text().catch(() => "");
|
||||
|
||||
@ -69,10 +69,6 @@ export default defineEventHandler(async (event) => {
|
||||
deleteSosSession(sessionId);
|
||||
|
||||
const config = useRuntimeConfig();
|
||||
const key = config.openrouterApiKey as string | undefined;
|
||||
if (!key) {
|
||||
throw createError({ statusCode: 503, message: "OpenRouter Key fehlt" });
|
||||
}
|
||||
|
||||
// System-Prompt: Coach-Basis + SOS-Streaming-Regeln
|
||||
const LANG: Record<string, string> = {
|
||||
@ -90,24 +86,46 @@ export default defineEventHandler(async (event) => {
|
||||
firstUserIdx > 0 ? messages.slice(firstUserIdx) : messages;
|
||||
const trimmed = conversation.slice(-8);
|
||||
|
||||
const upstream = await fetch(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://rebreak.org",
|
||||
"X-Title": "ReBreak SOS",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "anthropic/claude-sonnet-4.5",
|
||||
max_tokens: 400,
|
||||
stream: true,
|
||||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||
}),
|
||||
},
|
||||
);
|
||||
// LLM-Routing: client schickt llmProvider via /sos-session-Body (Toggle).
|
||||
// Default openrouter-sonnet. sort:latency bei Anthropic über OR spart 30-58% TTFB
|
||||
// (server-curl-bench gemessen). Groq bypasst OpenRouter-Hop für ~157ms TTFB.
|
||||
const llmProvider = sessionData.llmProvider ?? "openrouter-sonnet";
|
||||
let upstreamUrl: string;
|
||||
let upstreamKey: string | undefined;
|
||||
let upstreamModel: string;
|
||||
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
||||
let upstreamProviderField: { sort: string } | undefined;
|
||||
if (llmProvider === "groq-llama") {
|
||||
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
||||
upstreamKey = config.groqApiKey as string | undefined;
|
||||
upstreamModel = "llama-3.3-70b-versatile";
|
||||
} else {
|
||||
upstreamUrl = "https://openrouter.ai/api/v1/chat/completions";
|
||||
upstreamKey = config.openrouterApiKey as string | undefined;
|
||||
upstreamModel = llmProvider === "openrouter-haiku"
|
||||
? "anthropic/claude-haiku-4.5"
|
||||
: "anthropic/claude-sonnet-4.5";
|
||||
upstreamHeaders["HTTP-Referer"] = "https://rebreak.org";
|
||||
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
||||
upstreamProviderField = { sort: "latency" };
|
||||
}
|
||||
if (!upstreamKey) {
|
||||
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
||||
}
|
||||
upstreamHeaders.Authorization = `Bearer ${upstreamKey}`;
|
||||
console.log(`[coach/sos-stream] using provider=${llmProvider} model=${upstreamModel}`);
|
||||
|
||||
const upstream = await fetch(upstreamUrl, {
|
||||
method: "POST",
|
||||
headers: upstreamHeaders,
|
||||
body: JSON.stringify({
|
||||
model: upstreamModel,
|
||||
max_tokens: 400,
|
||||
stream: true,
|
||||
messages: [{ role: "system", content: systemPrompt }, ...trimmed],
|
||||
...(upstreamProviderField ? { provider: upstreamProviderField } : {}),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!upstream.ok || !upstream.body) {
|
||||
const errText = await upstream.text().catch(() => "");
|
||||
|
||||
70
backend/server/api/coach/speak-cartesia.post.ts
Normal file
70
backend/server/api/coach/speak-cartesia.post.ts
Normal file
@ -0,0 +1,70 @@
|
||||
/**
|
||||
* POST /api/coach/speak-cartesia
|
||||
* Cartesia Sonic-2 — schnellstes TTS (~75ms first-byte), native German.
|
||||
*
|
||||
* Returns audio/mpeg. Voice via runtimeConfig.cartesiaVoiceId
|
||||
* (Fallback `b9de4a89-2257-424b-94c2-db18ba68c81a` wenn unset).
|
||||
*/
|
||||
const FALLBACK_VOICE_ID = "b9de4a89-2257-424b-94c2-db18ba68c81a";
|
||||
|
||||
export default defineEventHandler(async (event) => {
|
||||
await requireUser(event);
|
||||
|
||||
const body = await readBody(event);
|
||||
const { text } = body as { text: string };
|
||||
|
||||
if (!text?.trim()) {
|
||||
throw createError({ statusCode: 400, message: "text fehlt" });
|
||||
}
|
||||
|
||||
const config = useRuntimeConfig();
|
||||
const key =
|
||||
(config.cartesiaApiKey as string) || process.env.CARTESIA_API_KEY || "";
|
||||
const voiceId =
|
||||
(config.cartesiaVoiceId as string) ||
|
||||
process.env.CARTESIA_VOICE_ID ||
|
||||
FALLBACK_VOICE_ID;
|
||||
|
||||
if (!key) {
|
||||
throw createError({
|
||||
statusCode: 503,
|
||||
message: "CARTESIA_API_KEY nicht konfiguriert",
|
||||
});
|
||||
}
|
||||
|
||||
const upstream = await fetch("https://api.cartesia.ai/tts/bytes", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"X-API-Key": key,
|
||||
"Cartesia-Version": "2024-11-13",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model_id: "sonic-2",
|
||||
transcript: text.slice(0, 4096),
|
||||
voice: { mode: "id", id: voiceId },
|
||||
output_format: {
|
||||
container: "mp3",
|
||||
sample_rate: 22050,
|
||||
bit_rate: 64000,
|
||||
},
|
||||
language: "de",
|
||||
}),
|
||||
});
|
||||
|
||||
if (!upstream.ok || !upstream.body) {
|
||||
const err = await upstream.text().catch(() => "");
|
||||
console.error("[speak-cartesia] error:", upstream.status, err);
|
||||
throw createError({
|
||||
statusCode: 502,
|
||||
message: "Cartesia TTS fehlgeschlagen",
|
||||
});
|
||||
}
|
||||
|
||||
setHeader(event, "Content-Type", "audio/mpeg");
|
||||
setHeader(event, "Cache-Control", "no-store");
|
||||
|
||||
const { Readable } = await import("node:stream");
|
||||
const nodeStream = Readable.fromWeb(upstream.body as never);
|
||||
return sendStream(event, nodeStream);
|
||||
});
|
||||
95
backend/server/api/coach/speak-elevenlabs.post.ts
Normal file
95
backend/server/api/coach/speak-elevenlabs.post.ts
Normal file
@ -0,0 +1,95 @@
|
||||
/**
|
||||
* POST /api/coach/speak-elevenlabs
|
||||
* ElevenLabs eleven_multilingual_v2 — voice via runtimeConfig.elevenlabsVoiceId
|
||||
* (default: Alexandra `kdmDKE6EkgrWrrykO9Qt` als Fallback wenn unset).
|
||||
*
|
||||
* Returns audio/mpeg. Voice ist deterministisch konstant über mehrere Calls
|
||||
* — identisch zu Gemini-Verhalten, kein Mode-Switch wie bei gpt-4o-mini-tts.
|
||||
*/
|
||||
const FALLBACK_VOICE_ID = "kdmDKE6EkgrWrrykO9Qt"; // Alexandra
|
||||
|
||||
export default defineEventHandler(async (event) => {
|
||||
await requireUser(event);
|
||||
|
||||
const body = await readBody(event);
|
||||
const { text } = body as { text: string };
|
||||
|
||||
if (!text?.trim()) {
|
||||
throw createError({ statusCode: 400, message: "text fehlt" });
|
||||
}
|
||||
|
||||
const config = useRuntimeConfig();
|
||||
// Fallback chain: runtimeConfig (Nuxt build-time) → process.env (runtime injection
|
||||
// via Infisical at pm2-start). Stellt sicher dass auch dann ein Key vorhanden ist
|
||||
// wenn nuxt's runtimeConfig-Inflate den process.env-Wert nicht mit-bundelt.
|
||||
const key =
|
||||
(config.elevenlabsApiKey as string) || process.env.ELEVENLABS_API_KEY || "";
|
||||
const voiceId =
|
||||
(config.elevenlabsVoiceId as string) ||
|
||||
process.env.ELEVENLABS_VOICE_ID ||
|
||||
FALLBACK_VOICE_ID;
|
||||
|
||||
console.log(
|
||||
"[speak-elevenlabs] cfg-key:",
|
||||
!!config.elevenlabsApiKey,
|
||||
"env-key:",
|
||||
!!process.env.ELEVENLABS_API_KEY,
|
||||
"key-len:",
|
||||
key.length,
|
||||
"voice:",
|
||||
voiceId,
|
||||
);
|
||||
|
||||
if (!key) {
|
||||
throw createError({
|
||||
statusCode: 503,
|
||||
message: "ELEVENLABS_API_KEY nicht konfiguriert",
|
||||
});
|
||||
}
|
||||
|
||||
console.log("[speak-elevenlabs] CALL recv, text-len=", text?.length ?? 0, "voice=", voiceId);
|
||||
|
||||
// /stream endpoint + optimize_streaming_latency=4 (max-latency-optimized,
|
||||
// marginal weniger Quality). ElevenLabs sendet erste Bytes ~200-300ms statt
|
||||
// 600-1000ms beim non-stream endpoint.
|
||||
const upstream = await fetch(
|
||||
`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream?optimize_streaming_latency=4`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"xi-api-key": key,
|
||||
"Content-Type": "application/json",
|
||||
Accept: "audio/mpeg",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: text.slice(0, 4096),
|
||||
// Turbo v2.5: ~50% schneller als multilingual_v2, marginal niedrigere
|
||||
// Quality — Trade-off lohnt sich für SOS (latency > Studio-Polish).
|
||||
model_id: "eleven_turbo_v2_5",
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75,
|
||||
style: 0.3,
|
||||
use_speaker_boost: true,
|
||||
},
|
||||
output_format: "mp3_22050_32",
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!upstream.ok || !upstream.body) {
|
||||
const err = await upstream.text().catch(() => "");
|
||||
console.error("[speak-elevenlabs] error:", upstream.status, err);
|
||||
throw createError({
|
||||
statusCode: 502,
|
||||
message: "ElevenLabs TTS fehlgeschlagen",
|
||||
});
|
||||
}
|
||||
|
||||
setHeader(event, "Content-Type", "audio/mpeg");
|
||||
setHeader(event, "Cache-Control", "no-store");
|
||||
|
||||
const { Readable } = await import("node:stream");
|
||||
const nodeStream = Readable.fromWeb(upstream.body as never);
|
||||
return sendStream(event, nodeStream);
|
||||
});
|
||||
@ -55,6 +55,17 @@ export default defineEventHandler(async (event) => {
|
||||
});
|
||||
}
|
||||
|
||||
// Gemini-TTS interpretiert den raw `text`-Part manchmal als Prompt
|
||||
// statt als Vorlese-Auftrag (wenn Lyra-Antwort z.B. mit `?` endet → Modell
|
||||
// versucht zu antworten → 400 INVALID_ARGUMENT). Instruction-Prefix zwingt
|
||||
// strict-TTS-Mode + setzt warm-empathic-Tone für SOS.
|
||||
const promptText =
|
||||
"Read the following German text aloud, verbatim, in a warm, gentle, " +
|
||||
"empathic voice — like a calm friend on the phone. Speak slowly with " +
|
||||
"natural pauses. Soft delivery, low energy, no fake-cheerfulness. " +
|
||||
"Do not respond to or comment on the text — just read it.\n\n" +
|
||||
text.slice(0, 4096);
|
||||
|
||||
const upstream = await fetch(
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent",
|
||||
{
|
||||
@ -64,7 +75,7 @@ export default defineEventHandler(async (event) => {
|
||||
"x-goog-api-key": key,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
contents: [{ parts: [{ text: text.slice(0, 4096) }] }],
|
||||
contents: [{ parts: [{ text: promptText }] }],
|
||||
generationConfig: {
|
||||
responseModalities: ["AUDIO"],
|
||||
speechConfig: {
|
||||
|
||||
@ -10,6 +10,8 @@ type SosSessionData = {
|
||||
userId: string;
|
||||
messages: Array<{ role: "user" | "assistant"; content: string }>;
|
||||
locale: string;
|
||||
/** A/B-Test: client wählt LLM via Toggle. Default openrouter-sonnet. */
|
||||
llmProvider?: string;
|
||||
createdAt: number;
|
||||
};
|
||||
|
||||
|
||||
65
ops/nginx/db-staging.rebreak.org.conf
Normal file
65
ops/nginx/db-staging.rebreak.org.conf
Normal file
@ -0,0 +1,65 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name db-staging.rebreak.org;
|
||||
return 301 https://db-staging.rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name db-staging.rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/db-staging.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/db-staging.rebreak.org/privkey.pem;
|
||||
|
||||
location = / {
|
||||
default_type application/json;
|
||||
return 200 '{"status":"ok","env":"staging"}';
|
||||
}
|
||||
|
||||
# Direct WebSocket proxy to Supabase Realtime Rebreak-Staging (bypasses Kong)
|
||||
# Port 54353 is mapped from the realtime-rebreak-staging container in docker-compose.rebreak-staging.yml
|
||||
location /realtime/v1/ {
|
||||
rewrite ^/realtime/v1/(.*)$ /socket/$1 break;
|
||||
proxy_pass http://127.0.0.1:54353;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 3600s;
|
||||
proxy_send_timeout 3600s;
|
||||
}
|
||||
|
||||
location / {
|
||||
client_max_body_size 50M;
|
||||
|
||||
if ($request_method = OPTIONS) {
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials "true" always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, PATCH, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, apikey, x-client-info, content-type, range, x-upsert, accept, prefer, x-supabase-api-version, accept-profile, content-profile" always;
|
||||
add_header Access-Control-Max-Age 3600 always;
|
||||
return 204;
|
||||
}
|
||||
|
||||
proxy_pass http://127.0.0.1:54351;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_hide_header Access-Control-Allow-Origin;
|
||||
proxy_hide_header Access-Control-Allow-Credentials;
|
||||
proxy_hide_header Access-Control-Allow-Methods;
|
||||
proxy_hide_header Access-Control-Allow-Headers;
|
||||
proxy_hide_header Access-Control-Expose-Headers;
|
||||
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials "true" always;
|
||||
}
|
||||
}
|
||||
64
ops/nginx/db.rebreak.org.conf
Normal file
64
ops/nginx/db.rebreak.org.conf
Normal file
@ -0,0 +1,64 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name db.rebreak.org;
|
||||
return 301 https://db.rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name db.rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/db.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/db.rebreak.org/privkey.pem;
|
||||
|
||||
location = / {
|
||||
default_type application/json;
|
||||
return 200 '{"status":"ok"}';
|
||||
}
|
||||
|
||||
# Direct WebSocket proxy to Supabase Realtime (bypasses Kong)
|
||||
location /realtime/v1/ {
|
||||
rewrite ^/realtime/v1/(.*)$ /socket/$1 break;
|
||||
proxy_pass http://172.19.0.27:4000;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 3600s;
|
||||
proxy_send_timeout 3600s;
|
||||
}
|
||||
|
||||
location / {
|
||||
client_max_body_size 50M;
|
||||
|
||||
if ($request_method = OPTIONS) {
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials "true" always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, PATCH, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, apikey, x-client-info, content-type, range, x-upsert, accept, prefer, x-supabase-api-version, accept-profile, content-profile" always;
|
||||
add_header Access-Control-Max-Age 3600 always;
|
||||
return 204;
|
||||
}
|
||||
|
||||
proxy_pass http://127.0.0.1:54321;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_hide_header Access-Control-Allow-Origin;
|
||||
proxy_hide_header Access-Control-Allow-Credentials;
|
||||
proxy_hide_header Access-Control-Allow-Methods;
|
||||
proxy_hide_header Access-Control-Allow-Headers;
|
||||
proxy_hide_header Access-Control-Expose-Headers;
|
||||
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials "true" always;
|
||||
}
|
||||
}
|
||||
32
ops/nginx/dns-staging.rebreak.de.conf
Normal file
32
ops/nginx/dns-staging.rebreak.de.conf
Normal file
@ -0,0 +1,32 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name dns-staging.rebreak.de;
|
||||
return 301 https://dns-staging.rebreak.de$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name dns-staging.rebreak.de;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/staging.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/staging.rebreak.org/privkey.pem;
|
||||
include /etc/letsencrypt/options-ssl-nginx.conf;
|
||||
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
|
||||
|
||||
location /dns-query {
|
||||
proxy_pass http://127.0.0.1:5354/dns-query;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_connect_timeout 5s;
|
||||
proxy_send_timeout 10s;
|
||||
proxy_read_timeout 10s;
|
||||
}
|
||||
|
||||
location /health {
|
||||
return 200 "OK\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
}
|
||||
28
ops/nginx/rebreak.org.conf
Normal file
28
ops/nginx/rebreak.org.conf
Normal file
@ -0,0 +1,28 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name rebreak.org www.rebreak.org;
|
||||
return 301 https://rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/rebreak.org/privkey.pem;
|
||||
|
||||
location / {
|
||||
auth_basic "ReBreak – Coming Soon";
|
||||
auth_basic_user_file /etc/nginx/.htpasswd-rebreak;
|
||||
|
||||
client_max_body_size 10m;
|
||||
proxy_pass http://127.0.0.1:3015;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
41
ops/nginx/staging.rebreak.org.conf
Normal file
41
ops/nginx/staging.rebreak.org.conf
Normal file
@ -0,0 +1,41 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name staging.rebreak.org;
|
||||
return 301 https://staging.rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name staging.rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/staging.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/staging.rebreak.org/privkey.pem;
|
||||
include /etc/letsencrypt/options-ssl-nginx.conf;
|
||||
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
|
||||
|
||||
add_header X-Robots-Tag "noindex, nofollow" always;
|
||||
|
||||
# Mailpit – Mail-Catcher Web UI + REST API (nur für Staging/Cypress)
|
||||
location /mailpit/ {
|
||||
proxy_pass http://127.0.0.1:54360/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
location / {
|
||||
client_max_body_size 10m;
|
||||
proxy_pass http://127.0.0.1:3016;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
22
ops/nginx/studio-staging.rebreak.org.conf
Normal file
22
ops/nginx/studio-staging.rebreak.org.conf
Normal file
@ -0,0 +1,22 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name studio-staging.rebreak.org;
|
||||
return 301 https://studio-staging.rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name studio-staging.rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/studio-staging.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/studio-staging.rebreak.org/privkey.pem;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:54332;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
22
ops/nginx/studio.rebreak.org.conf
Normal file
22
ops/nginx/studio.rebreak.org.conf
Normal file
@ -0,0 +1,22 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name studio.rebreak.org;
|
||||
return 301 https://studio.rebreak.org$request_uri;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name studio.rebreak.org;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/studio.rebreak.org/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/studio.rebreak.org/privkey.pem;
|
||||
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:54322;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
53
xgit
Executable file
53
xgit
Executable file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env zsh
|
||||
set -euo pipefail
|
||||
|
||||
# rebreak-monorepo helper: git add . + commit + push origin <current-branch>.
|
||||
# Usage: ./xgit "fix(backend/sos): single-instruction tts"
|
||||
# Wenn keine Message: prompted interaktiv.
|
||||
# Wenn Backend-Files geändert: bietet pre-push nitro build an (optional).
|
||||
|
||||
if ! git rev-parse --git-dir > /dev/null 2>&1; then
|
||||
echo "Error: aktuelles Verzeichnis ist kein Git-Repository" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$#" -gt 0 ]; then
|
||||
msg="$*"
|
||||
else
|
||||
printf "Commit message: " >&2
|
||||
read -r msg
|
||||
fi
|
||||
|
||||
if [ -z "${msg// /}" ]; then
|
||||
echo "Abbruch: Commit-Message leer" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Backend-Files staged? → Build-Validation anbieten.
|
||||
if git status --porcelain | grep -qE '^.. backend/(server|prisma|nitro\.config)' 2>/dev/null; then
|
||||
printf "Backend-Files geändert. nitro build vor push? [Y/n] " >&2
|
||||
read -r run_build
|
||||
if [[ "$run_build" != "n" && "$run_build" != "N" ]]; then
|
||||
echo "Starte nitro build..."
|
||||
(cd backend && pnpm build) || { echo "Build fehlgeschlagen – Commit abgebrochen." >&2; exit 1; }
|
||||
echo ""
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Staging alle Änderungen..."
|
||||
git add .
|
||||
|
||||
if git diff --cached --quiet; then
|
||||
echo "Keine Änderungen zum Commit vorhanden." >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Erstelle Commit: $msg"
|
||||
git commit -m "$msg"
|
||||
|
||||
branch=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
echo "Pushe Branch $branch nach origin..."
|
||||
git push origin "$branch"
|
||||
|
||||
echo "Fertig."
|
||||
Loading…
x
Reference in New Issue
Block a user