Versions: - expo: 53.0.0 → 54.0.34 - react-native: 0.79.6 → 0.81.5 - react: 19.0.0 → 19.1.0 - expo-router: 5.1.11 → 6.0.23 (major) - react-native-reanimated: 4.0.0 → 4.1.7 - react-native-worklets: 0.4.0 → 0.5.1 - react-native-screens: 4.11.1 → 4.16.0 - react-native-gesture-handler: 2.24.0 → 2.28.0 - @expo/metro-runtime: 5.0.5 → 6.1.2 - @types/react: → 19.2.14 - expo-av: 15.1.7 → 16.0.8 (still deprecated, last shipping in SDK 54) expo-file-system breaking change quick-fix: - New SDK 54 API is class-based (File/Directory/Paths). Legacy API `cacheDirectory` + `EncodingType` moved to `expo-file-system/legacy` sub-export. - 6 files updated to import from `expo-file-system/legacy` with TODO(sdk54) marker. Proper migration tracked as Task #14. Smoke-test: 0 TS errors, Metro bundles 2185 modules in 5.9s. Native binary still SDK 53 — Phase 5 prebuild --clean pending. Branch: upgrade/sdk-54, rollback tag: pre-sdk54-upgrade Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
253 lines
9.1 KiB
TypeScript
253 lines
9.1 KiB
TypeScript
// Sentence-Level TTS Queue für SOS-Streaming.
|
|
//
|
|
// Aufrufer (urge.tsx) erstellt eine neue Queue pro sendToLyra-Call und füttert
|
|
// sie via `enqueue(sentence)` aus dem `onSentence`-Callback von streamSosLyra.
|
|
// Die Queue fetched + spielt sequenziell — wenn n+1 reinkommt während n noch
|
|
// spielt, wartet der Fetch bis n's Audio durch ist (kein doppeltes Sprechen).
|
|
//
|
|
// Lifecycle:
|
|
// - new SosTtsQueue({...}) → bereit, nichts spielt
|
|
// - enqueue(s1) → fetch + play s1
|
|
// - enqueue(s2) während s1 spielt → s2 wartet in queue, fetch+play sobald s1 fertig
|
|
// - abort() → in-flight fetch cancelled, current sound stopped+unloaded, queue cleared
|
|
//
|
|
// State-Reporting via Callbacks: onStart (erster Satz beginnt zu spielen),
|
|
// onIdle (Queue komplett durch + nichts mehr spielt). UI-Layer kann darauf
|
|
// `setIsSpeaking` triggern.
|
|
import { Audio } from 'expo-av';
|
|
// TODO(sdk54): migrate to new expo-file-system class-based API (File/Directory/Paths) — see Task #14
|
|
import * as FileSystem from 'expo-file-system/legacy';
|
|
import type { BenchOnMetric } from './sosTtsBenchmark';
|
|
|
|
export type SosTtsFetchOpts = {
|
|
apiBase: string;
|
|
accessToken: string;
|
|
locale: string;
|
|
/** Server-Pfad zum TTS-Endpoint, default: OpenAI. Erlaubt A/B zwischen
|
|
* /api/coach/speak-openai, /api/coach/speak-gemini, /api/coach/speak-google. */
|
|
endpoint?: string;
|
|
};
|
|
|
|
export type SosTtsQueueOpts = SosTtsFetchOpts & {
|
|
/** Erster Satz beginnt zu spielen. */
|
|
onStart?: () => void;
|
|
/** Queue ist leer + nichts spielt mehr. */
|
|
onIdle?: () => void;
|
|
/** Single-sentence-fetch oder -playback ist gescheitert. Queue läuft weiter. */
|
|
onError?: (err: unknown, sentence: string) => void;
|
|
/** Latenz-Benchmark: feuert nur für das ERSTE enqueue'te Item, weil das
|
|
* user-wahrgenommen first-audio bestimmt. Marker: tts-fetch-start,
|
|
* tts-fetch-headers, tts-body-done, tts-file-written, audio-loaded,
|
|
* first-audio. Folge-Items (z.B. sos-continuation) instrumentieren wir
|
|
* nicht — die spielen ja schon parallel zum ersten und verzerren nur. */
|
|
onMetric?: BenchOnMetric;
|
|
};
|
|
|
|
const EMOJI_RE = /[\p{Extended_Pictographic}\p{Emoji_Component}]/gu;
|
|
|
|
function cleanForTts(text: string): string {
|
|
return text.replace(EMOJI_RE, '').replace(/\s+/g, ' ').trim();
|
|
}
|
|
|
|
export type SosTtsMode = 'sos' | 'sos-continuation';
|
|
|
|
type QueueItem = {
|
|
text: string;
|
|
mode: SosTtsMode;
|
|
controller: AbortController;
|
|
/** Nur das erste enqueue'te Item bekommt einen onMetric — das bestimmt
|
|
* user-wahrgenommen first-audio. Folge-Items (sos-continuation) tracken
|
|
* wir nicht. */
|
|
metric?: BenchOnMetric;
|
|
/** Pre-fetch starts beim enqueue → wenn play dran ist, ist Audio meist schon
|
|
* fertig oder fast fertig. Eliminiert Gap zwischen Items im Hybrid-Mode. */
|
|
audioPromise: Promise<{ uri: string } | null>;
|
|
};
|
|
|
|
export class SosTtsQueue {
|
|
private queue: QueueItem[] = [];
|
|
private playing = false;
|
|
private currentSound: Audio.Sound | null = null;
|
|
private aborted = false;
|
|
private startedOnce = false;
|
|
private opts: SosTtsQueueOpts;
|
|
// Dedup: in dev-mode (React StrictMode) feuern useEffects 2x → identische
|
|
// Sätze würden 2x enqueued + 2x von der TTS-API geholt + 2x abgespielt.
|
|
// Wir tracken die in dieser Queue-Instanz schon gesehenen Texte.
|
|
private seenTexts = new Set<string>();
|
|
// Bench: nur das ERSTE enqueue'te Item kriegt Metric-Tracking.
|
|
private metricGiven = false;
|
|
|
|
constructor(opts: SosTtsQueueOpts) {
|
|
this.opts = opts;
|
|
}
|
|
|
|
/**
|
|
* Enqueue a text segment for TTS playback.
|
|
* @param mode Default 'sos' (warm-empathic-opening). Use 'sos-continuation'
|
|
* für Folge-Blöcke im Hybrid-Mode → server passt OpenAI's
|
|
* `instructions`-Feld an damit der Voice-Boundary weicher klingt.
|
|
*/
|
|
enqueue(sentence: string, mode: SosTtsMode = 'sos'): void {
|
|
if (this.aborted) return;
|
|
const cleaned = cleanForTts(sentence);
|
|
if (!cleaned) return;
|
|
// Dedup gegen StrictMode-double-effects: gleicher Text in derselben
|
|
// Queue-Instanz wird nur 1x angefragt + abgespielt.
|
|
if (this.seenTexts.has(cleaned)) return;
|
|
this.seenTexts.add(cleaned);
|
|
// Pre-fetch SOFORT beim enqueue → läuft parallel zum Playback der vorigen
|
|
// Items. Heißt: wenn Item 1 fertig spielt, ist Item 2's Audio meist schon
|
|
// im Cache → null Gap zwischen den Sätzen/Blöcken.
|
|
const controller = new AbortController();
|
|
let metric: BenchOnMetric | undefined;
|
|
if (!this.metricGiven && this.opts.onMetric) {
|
|
this.metricGiven = true;
|
|
metric = this.opts.onMetric;
|
|
}
|
|
const audioPromise = this.fetchAudio(cleaned, mode, controller.signal, metric).catch((err) => {
|
|
this.opts.onError?.(err, cleaned);
|
|
return null;
|
|
});
|
|
this.queue.push({ text: cleaned, mode, controller, metric, audioPromise });
|
|
void this.tick();
|
|
}
|
|
|
|
abort(): void {
|
|
this.aborted = true;
|
|
// Alle in-flight fetches cancelen (auch pre-fetched ones)
|
|
for (const item of this.queue) {
|
|
item.controller.abort();
|
|
}
|
|
this.queue = [];
|
|
if (this.currentSound) {
|
|
const s = this.currentSound;
|
|
this.currentSound = null;
|
|
s.stopAsync().catch(() => {});
|
|
s.unloadAsync().catch(() => {});
|
|
}
|
|
}
|
|
|
|
/** True wenn noch was läuft (in queue oder gerade spielend). */
|
|
isActive(): boolean {
|
|
return !this.aborted && (this.playing || this.queue.length > 0);
|
|
}
|
|
|
|
private async tick(): Promise<void> {
|
|
if (this.aborted || this.playing) return;
|
|
const item = this.queue.shift();
|
|
if (!item) return;
|
|
this.playing = true;
|
|
|
|
if (!this.startedOnce) {
|
|
this.startedOnce = true;
|
|
this.opts.onStart?.();
|
|
}
|
|
|
|
try {
|
|
const audio = await item.audioPromise;
|
|
if (this.aborted || !audio) return;
|
|
|
|
const { sound } = await Audio.Sound.createAsync(
|
|
{ uri: audio.uri },
|
|
{ shouldPlay: true },
|
|
);
|
|
item.metric?.('audio-loaded');
|
|
if (this.aborted) {
|
|
await sound.unloadAsync().catch(() => {});
|
|
return;
|
|
}
|
|
this.currentSound = sound;
|
|
let firstAudioReported = false;
|
|
await new Promise<void>((resolve) => {
|
|
sound.setOnPlaybackStatusUpdate((status) => {
|
|
if (this.aborted) {
|
|
sound.setOnPlaybackStatusUpdate(null);
|
|
resolve();
|
|
return;
|
|
}
|
|
if (!firstAudioReported && status.isLoaded && status.isPlaying) {
|
|
firstAudioReported = true;
|
|
item.metric?.('first-audio');
|
|
}
|
|
if (status.isLoaded && status.didJustFinish) {
|
|
sound.setOnPlaybackStatusUpdate(null);
|
|
sound.unloadAsync().catch(() => {});
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
this.currentSound = null;
|
|
} catch (err) {
|
|
this.opts.onError?.(err, item.text);
|
|
} finally {
|
|
this.playing = false;
|
|
if (this.aborted) return;
|
|
if (this.queue.length > 0) {
|
|
void this.tick();
|
|
} else {
|
|
this.opts.onIdle?.();
|
|
}
|
|
}
|
|
}
|
|
|
|
private async fetchAudio(
|
|
text: string,
|
|
mode: SosTtsMode,
|
|
signal: AbortSignal,
|
|
metric?: BenchOnMetric,
|
|
): Promise<{ uri: string } | null> {
|
|
const endpoint = this.opts.endpoint ?? '/api/coach/speak-openai';
|
|
const isGoogleCloud = endpoint.endsWith('/speak-google');
|
|
metric?.('tts-fetch-start', { endpoint });
|
|
const res = await fetch(`${this.opts.apiBase}${endpoint}`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: `Bearer ${this.opts.accessToken}`,
|
|
},
|
|
body: JSON.stringify({ text, locale: this.opts.locale, mode }),
|
|
signal,
|
|
});
|
|
metric?.('tts-fetch-headers', { status: res.status });
|
|
if (!res.ok || signal.aborted) return null;
|
|
|
|
// /speak-google liefert JSON { audio: "data:audio/mp3;base64,..." }.
|
|
// /speak-openai (audio/mpeg) und /speak-gemini (audio/wav) liefern den
|
|
// Body als raw bytes — gleiche Pipeline reicht für beide.
|
|
let base64: string;
|
|
let ext: 'mp3' | 'wav';
|
|
if (isGoogleCloud) {
|
|
const json = (await res.json()) as { audio?: string };
|
|
metric?.('tts-body-done');
|
|
const dataUri = json.audio ?? '';
|
|
const comma = dataUri.indexOf(',');
|
|
if (comma === -1) return null;
|
|
base64 = dataUri.slice(comma + 1);
|
|
ext = 'mp3';
|
|
} else {
|
|
const buffer = await res.arrayBuffer();
|
|
metric?.('tts-body-done', { bytes: buffer.byteLength });
|
|
if (signal.aborted || buffer.byteLength === 0) return null;
|
|
const bytes = new Uint8Array(buffer);
|
|
const chunks: string[] = [];
|
|
const cs = 0x8000;
|
|
for (let i = 0; i < bytes.length; i += cs) {
|
|
chunks.push(
|
|
String.fromCharCode(...bytes.subarray(i, Math.min(i + cs, bytes.length))),
|
|
);
|
|
}
|
|
base64 = btoa(chunks.join(''));
|
|
ext = endpoint.endsWith('/speak-gemini') ? 'wav' : 'mp3';
|
|
}
|
|
|
|
const tmpPath = `${FileSystem.cacheDirectory}sos-tts-q-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.${ext}`;
|
|
await FileSystem.writeAsStringAsync(tmpPath, base64, {
|
|
encoding: FileSystem.EncodingType.Base64,
|
|
});
|
|
metric?.('tts-file-written');
|
|
if (signal.aborted) return null;
|
|
return { uri: tmpPath };
|
|
}
|
|
}
|