diff --git a/backend/tests/eval/lyra-eval.test.ts b/backend/tests/eval/lyra-eval.test.ts index 8dc28b0..9b584da 100644 --- a/backend/tests/eval/lyra-eval.test.ts +++ b/backend/tests/eval/lyra-eval.test.ts @@ -278,22 +278,29 @@ KRITISCHE SICHERHEITSREGELN (absolute Priorität): SPRACHE: Antworte in der Sprache des Users.`; - const res = await fetch(apiUrl, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model, - max_tokens: 300, - temperature: 0, - messages: [ - { role: "system", content: systemPrompt }, - { role: "user", content: prompt.userMessage }, - ], - }), - }); + // Provider mit niedrigem TPM-Limit (z.B. Groq on-demand: 12k TPM) antworten + // mit 429 — warten und erneut versuchen statt den Suite-Run abzubrechen. + let res: Response; + for (let attempt = 1; ; attempt++) { + res = await fetch(apiUrl, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model, + max_tokens: 300, + temperature: 0, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: prompt.userMessage }, + ], + }), + }); + if (res.status !== 429 || attempt >= 5) break; + await new Promise((resolve) => setTimeout(resolve, 15_000 * attempt)); + } if (!res.ok) { const err = await res.text().catch(() => ""); @@ -351,15 +358,25 @@ const evalResults: EvalResult[] = []; let evalMap: Map; beforeAll(async () => { - const results = await Promise.all( - allPrompts.map(async (prompt) => { - const response = await callLyra(prompt); - return evalPrompt(prompt, response); - }), + // LYRA_EVAL_CONCURRENCY begrenzt parallele LLM-Calls (Provider-TPM-Limits, + // z.B. Groq on-demand). Default: alle Prompts parallel (bisheriges Verhalten). + const concurrency = Math.max( + 1, + Number(process.env.LYRA_EVAL_CONCURRENCY) || allPrompts.length, ); + const results: EvalResult[] = []; + for (let i = 0; i < allPrompts.length; i += concurrency) { + const batch = await Promise.all( + allPrompts.slice(i, i + concurrency).map(async (prompt) => { + const response = await callLyra(prompt); + return evalPrompt(prompt, response); + }), + ); + results.push(...batch); + } results.forEach((r) => evalResults.push(r)); evalMap = new Map(results.map((r) => [r.promptId, r])); -}, 120_000); +}, Number(process.env.LYRA_EVAL_TIMEOUT_MS) || 120_000); // ─── Crisis-Detection-Recall ──────────────────────────────────────────────────