test(eval): Concurrency-Limit + 429-Retry für Lyra-Eval-Suite
LYRA_EVAL_CONCURRENCY (Batch statt Promise.all-30-parallel) und LYRA_EVAL_TIMEOUT_MS als Env-Schalter, plus 429-Retry mit Backoff — nötig für Live-Runs gegen Provider mit niedrigem TPM-Limit (Groq on-demand 12k). Default-Verhalten unverändert; Prompts/Regeln/ System-Prompt nicht angefasst. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
2c33ba55a4
commit
444688b6e9
@ -278,22 +278,29 @@ KRITISCHE SICHERHEITSREGELN (absolute Priorität):
|
|||||||
|
|
||||||
SPRACHE: Antworte in der Sprache des Users.`;
|
SPRACHE: Antworte in der Sprache des Users.`;
|
||||||
|
|
||||||
const res = await fetch(apiUrl, {
|
// Provider mit niedrigem TPM-Limit (z.B. Groq on-demand: 12k TPM) antworten
|
||||||
method: "POST",
|
// mit 429 — warten und erneut versuchen statt den Suite-Run abzubrechen.
|
||||||
headers: {
|
let res: Response;
|
||||||
Authorization: `Bearer ${apiKey}`,
|
for (let attempt = 1; ; attempt++) {
|
||||||
"Content-Type": "application/json",
|
res = await fetch(apiUrl, {
|
||||||
},
|
method: "POST",
|
||||||
body: JSON.stringify({
|
headers: {
|
||||||
model,
|
Authorization: `Bearer ${apiKey}`,
|
||||||
max_tokens: 300,
|
"Content-Type": "application/json",
|
||||||
temperature: 0,
|
},
|
||||||
messages: [
|
body: JSON.stringify({
|
||||||
{ role: "system", content: systemPrompt },
|
model,
|
||||||
{ role: "user", content: prompt.userMessage },
|
max_tokens: 300,
|
||||||
],
|
temperature: 0,
|
||||||
}),
|
messages: [
|
||||||
});
|
{ role: "system", content: systemPrompt },
|
||||||
|
{ role: "user", content: prompt.userMessage },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (res.status !== 429 || attempt >= 5) break;
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 15_000 * attempt));
|
||||||
|
}
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const err = await res.text().catch(() => "");
|
const err = await res.text().catch(() => "");
|
||||||
@ -351,15 +358,25 @@ const evalResults: EvalResult[] = [];
|
|||||||
let evalMap: Map<string, EvalResult>;
|
let evalMap: Map<string, EvalResult>;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
const results = await Promise.all(
|
// LYRA_EVAL_CONCURRENCY begrenzt parallele LLM-Calls (Provider-TPM-Limits,
|
||||||
allPrompts.map(async (prompt) => {
|
// z.B. Groq on-demand). Default: alle Prompts parallel (bisheriges Verhalten).
|
||||||
const response = await callLyra(prompt);
|
const concurrency = Math.max(
|
||||||
return evalPrompt(prompt, response);
|
1,
|
||||||
}),
|
Number(process.env.LYRA_EVAL_CONCURRENCY) || allPrompts.length,
|
||||||
);
|
);
|
||||||
|
const results: EvalResult[] = [];
|
||||||
|
for (let i = 0; i < allPrompts.length; i += concurrency) {
|
||||||
|
const batch = await Promise.all(
|
||||||
|
allPrompts.slice(i, i + concurrency).map(async (prompt) => {
|
||||||
|
const response = await callLyra(prompt);
|
||||||
|
return evalPrompt(prompt, response);
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
results.push(...batch);
|
||||||
|
}
|
||||||
results.forEach((r) => evalResults.push(r));
|
results.forEach((r) => evalResults.push(r));
|
||||||
evalMap = new Map(results.map((r) => [r.promptId, r]));
|
evalMap = new Map(results.map((r) => [r.promptId, r]));
|
||||||
}, 120_000);
|
}, Number(process.env.LYRA_EVAL_TIMEOUT_MS) || 120_000);
|
||||||
|
|
||||||
// ─── Crisis-Detection-Recall ──────────────────────────────────────────────────
|
// ─── Crisis-Detection-Recall ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user