refactor(mail): remove groq llm layer — deterministic pipeline only
User-Direktive: Mail-Filter bleibt auf dem deterministischen Score+Layer-2.5-Stack. Groq-LLM Borderline-Call (Layer 4) entfernt. Layer 2.5 Brand+Random fängt den Apple Hide-My-Email Fall (icloud.com-Adressen mit kryptischen Local-Parts + Brand-DisplayName) weiterhin sauber via Hard-Block. Score-Mid-Range 25-79 entscheidet jetzt deterministisch: ≥50 → BLOCK, sonst PASS. Damit auch DSGVO-P0-Items aus dem Hans-Müller-Review obsolet (AVV-Annex Groq, Drittland-USA-Consent-Toggle, Datenschutzerklärung-Absatz). - mail-classifier.ts: callGroqClassifier + redactLocalPartForLLM + groq-Feld raus - scan.post.ts + scan-internal.post.ts: groqApiKey-Param raus, groq*-Sample-Felder raus - mail-classifier.test.ts: Groq-Tests + redactLocalPart-Tests entfernt, 46 Tests grün DB-Spalten in mail_classification_samples (groq_*) bleiben als legacy nullable — Cleanup-Migration optional in späterem Sprint. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
343f9ab567
commit
f2e3c00943
@ -63,9 +63,7 @@ export default defineEventHandler(async (event) => {
|
|||||||
|
|
||||||
await deleteOldMailBlocked(userId);
|
await deleteOldMailBlocked(userId);
|
||||||
|
|
||||||
// Groq API Key aus runtimeConfig (Infisical-injiziert)
|
|
||||||
const config = useRuntimeConfig(event);
|
const config = useRuntimeConfig(event);
|
||||||
const groqApiKey: string = (config.groqApiKey as string) || process.env.GROQ_API_KEY || "";
|
|
||||||
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
|
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
|
||||||
|
|
||||||
let totalScanned = 0;
|
let totalScanned = 0;
|
||||||
@ -159,7 +157,6 @@ export default defineEventHandler(async (event) => {
|
|||||||
const result = await classifyMail({
|
const result = await classifyMail({
|
||||||
mail: { senderEmail, senderName, subject },
|
mail: { senderEmail, senderName, subject },
|
||||||
blockedDomainSet,
|
blockedDomainSet,
|
||||||
groqApiKey,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Layer 5: Sample-Capture (immer, außer Layer 0)
|
// Layer 5: Sample-Capture (immer, außer Layer 0)
|
||||||
@ -174,9 +171,6 @@ export default defineEventHandler(async (event) => {
|
|||||||
features: result.features as unknown as Record<string, unknown>,
|
features: result.features as unknown as Record<string, unknown>,
|
||||||
finalAction: result.action,
|
finalAction: result.action,
|
||||||
triggerSource: result.triggerSource,
|
triggerSource: result.triggerSource,
|
||||||
groqIsGambling: result.groq?.isGambling ?? null,
|
|
||||||
groqConfidence: result.groq?.confidence ?? null,
|
|
||||||
groqReason: result.groq?.reason ?? null,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (result.action !== "blocked") continue;
|
if (result.action !== "blocked") continue;
|
||||||
|
|||||||
@ -55,7 +55,6 @@ export default defineEventHandler(async (event) => {
|
|||||||
await deleteOldMailBlocked(user.id);
|
await deleteOldMailBlocked(user.id);
|
||||||
|
|
||||||
const config = useRuntimeConfig(event);
|
const config = useRuntimeConfig(event);
|
||||||
const groqApiKey: string = (config.groqApiKey as string) || process.env.GROQ_API_KEY || "";
|
|
||||||
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
|
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
|
||||||
|
|
||||||
let totalScanned = 0;
|
let totalScanned = 0;
|
||||||
@ -145,7 +144,6 @@ export default defineEventHandler(async (event) => {
|
|||||||
const result = await classifyMail({
|
const result = await classifyMail({
|
||||||
mail: { senderEmail, senderName, subject },
|
mail: { senderEmail, senderName, subject },
|
||||||
blockedDomainSet,
|
blockedDomainSet,
|
||||||
groqApiKey,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Layer 5: Sample-Capture (immer, außer Layer 0)
|
// Layer 5: Sample-Capture (immer, außer Layer 0)
|
||||||
@ -160,9 +158,6 @@ export default defineEventHandler(async (event) => {
|
|||||||
features: result.features as unknown as Record<string, unknown>,
|
features: result.features as unknown as Record<string, unknown>,
|
||||||
finalAction: result.action,
|
finalAction: result.action,
|
||||||
triggerSource: result.triggerSource,
|
triggerSource: result.triggerSource,
|
||||||
groqIsGambling: result.groq?.isGambling ?? null,
|
|
||||||
groqConfidence: result.groq?.confidence ?? null,
|
|
||||||
groqReason: result.groq?.reason ?? null,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (result.action !== "blocked") continue;
|
if (result.action !== "blocked") continue;
|
||||||
|
|||||||
@ -1,21 +1,19 @@
|
|||||||
/**
|
/**
|
||||||
* Mail-Klassifikations-Pipeline (Layer 0–4 + Sample-Capture).
|
* Mail-Klassifikations-Pipeline (deterministisch, ohne LLM).
|
||||||
*
|
*
|
||||||
* Architektur:
|
* Architektur:
|
||||||
* Layer 0 — Skip-Guard (bereits geblockt / kein Consent)
|
* Layer 0 — Skip-Guard (bereits geblockt / kein Consent)
|
||||||
* Layer 1 — Whitelist (wetter, wettkampf …) → PASS
|
* Layer 1 — Whitelist (wetter, wettkampf …) → PASS
|
||||||
* Layer 2 — Domain-Hard-Block (Blocklist)
|
* Layer 2 — Domain-Hard-Block (Blocklist)
|
||||||
* Layer 2.5 — Brand+Random-Token-Detection (Hard-Block ohne LLM)
|
* Layer 2.5 — Brand+Random-Token-Detection (Hard-Block, fängt Apple Hide-My-Email)
|
||||||
* Layer 3 — Score 0–100 (deterministisch)
|
* Layer 3 — Score 0–100 (deterministisch); ≥50 → BLOCK, sonst PASS
|
||||||
* Layer 4 — Groq-Borderline (Score 25–75, mit Local-Part-Redact)
|
|
||||||
* Layer 5 — MailClassificationSample-Insert (immer, außer Layer 0)
|
* Layer 5 — MailClassificationSample-Insert (immer, außer Layer 0)
|
||||||
*
|
*
|
||||||
* Alle Layer-Logiken sind pure Funktionen → vollständig unit-testbar ohne DB-Mocks.
|
* Alle Layer-Logiken sind pure Funktionen → vollständig unit-testbar ohne DB-Mocks.
|
||||||
*
|
*
|
||||||
* DSGVO-Hinweise:
|
* DSGVO-Hinweise:
|
||||||
* - Mail-Inhalte (Body) werden nie persistiert (Art. 9).
|
* - Mail-Inhalte (Body) werden nie persistiert (Art. 9).
|
||||||
* - Local-Part der Sender-Adresse wird vor dem Groq-Call redacted
|
* - Keine Daten verlassen mehr den Server (kein LLM-Drittland-Transfer).
|
||||||
* (es sei denn, er enthält selbst Casino-Keywords — dann ist er Detection-Signal).
|
|
||||||
* - userId in Logs nur wenn absolut nötig (Datenminimierung Art. 5).
|
* - userId in Logs nur wenn absolut nötig (Datenminimierung Art. 5).
|
||||||
* - MailClassificationSample: Cascade-Delete via userId-Relation (Art. 17).
|
* - MailClassificationSample: Cascade-Delete via userId-Relation (Art. 17).
|
||||||
*/
|
*/
|
||||||
@ -33,7 +31,6 @@ export type TriggerSource =
|
|||||||
| "relay-decoded"
|
| "relay-decoded"
|
||||||
| "brand+random"
|
| "brand+random"
|
||||||
| `score:${number}`
|
| `score:${number}`
|
||||||
| `llm:${string}`
|
|
||||||
| "whitelist"
|
| "whitelist"
|
||||||
| "no-signal";
|
| "no-signal";
|
||||||
|
|
||||||
@ -52,12 +49,6 @@ export interface ClassificationResult {
|
|||||||
score: number;
|
score: number;
|
||||||
/** Aus Relay-Adressen extrahierte echte Domain (z.B. gamblezen.com) */
|
/** Aus Relay-Adressen extrahierte echte Domain (z.B. gamblezen.com) */
|
||||||
relayDecodedDomain: string | null;
|
relayDecodedDomain: string | null;
|
||||||
/** Groq-Verdict (nur wenn Layer 4 lief) */
|
|
||||||
groq?: {
|
|
||||||
isGambling: boolean;
|
|
||||||
confidence: number;
|
|
||||||
reason: string;
|
|
||||||
};
|
|
||||||
/** Score-Komponenten für MailClassificationSample.features */
|
/** Score-Komponenten für MailClassificationSample.features */
|
||||||
features: ClassificationFeatures;
|
features: ClassificationFeatures;
|
||||||
}
|
}
|
||||||
@ -97,11 +88,12 @@ export const SCORE_WEIGHTS = {
|
|||||||
RANDOM_TOKENS_NO_BRAND: 10, // Random-Tokens ohne Brand-Match
|
RANDOM_TOKENS_NO_BRAND: 10, // Random-Tokens ohne Brand-Match
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
// Hard-Block-Threshold: Score >= 80 → BLOCK ohne LLM
|
// Hard-Block-Threshold: Score >= 80 → BLOCK
|
||||||
const SCORE_HARD_BLOCK_THRESHOLD = 80;
|
const SCORE_HARD_BLOCK_THRESHOLD = 80;
|
||||||
// Borderline-Range: 25–75 → Groq-Call
|
// Pass-Below: Score < 25 → PASS (no-signal)
|
||||||
const SCORE_BORDERLINE_LOW = 25;
|
const SCORE_PASS_BELOW = 25;
|
||||||
const SCORE_BORDERLINE_HIGH = 75;
|
// Mid-range Block-Threshold: Score in [25, 80) → BLOCK ab 50, sonst PASS
|
||||||
|
const SCORE_BLOCK_MIDRANGE = 50;
|
||||||
|
|
||||||
// ─── Bekannte Gambling-Brands (für Brand-Match-Normalisierung) ─────────────────
|
// ─── Bekannte Gambling-Brands (für Brand-Match-Normalisierung) ─────────────────
|
||||||
// Abgeleitet aus GAMBLING_KEYWORDS + typischen Blocklist-Domains.
|
// Abgeleitet aus GAMBLING_KEYWORDS + typischen Blocklist-Domains.
|
||||||
@ -203,25 +195,6 @@ export function hasRandomTokens(localPart: string): boolean {
|
|||||||
return randomLooking.length >= 2;
|
return randomLooking.length >= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── Local-Part-Redaction ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Redacted den Local-Part einer E-Mail-Adresse vor dem Groq-Call (DSGVO).
|
|
||||||
*
|
|
||||||
* AUSNAHME: wenn der Local-Part selbst Gambling-Keywords enthält
|
|
||||||
* (z.B. "casino_offers_abc123@mailer.com"), bleibt er erhalten —
|
|
||||||
* er ist in diesem Fall ein Klassifikations-Signal, kein PII.
|
|
||||||
*/
|
|
||||||
export function redactLocalPartForLLM(
|
|
||||||
senderEmail: string,
|
|
||||||
localPartHasKeyword: boolean,
|
|
||||||
): string {
|
|
||||||
if (localPartHasKeyword) return senderEmail;
|
|
||||||
const atIdx = senderEmail.indexOf("@");
|
|
||||||
if (atIdx === -1) return senderEmail;
|
|
||||||
return `***${senderEmail.slice(atIdx)}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Score-Berechnung (Layer 3) ───────────────────────────────────────────────
|
// ─── Score-Berechnung (Layer 3) ───────────────────────────────────────────────
|
||||||
|
|
||||||
interface ScoreResult {
|
interface ScoreResult {
|
||||||
@ -340,93 +313,21 @@ export function computeScore(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── Groq-LLM-Call (Layer 4) ─────────────────────────────────────────────────
|
|
||||||
|
|
||||||
interface GroqVerdict {
|
|
||||||
isGambling: boolean;
|
|
||||||
confidence: number;
|
|
||||||
reason: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ruft Groq Llama 3.3 70B zur Borderline-Klassifikation auf.
|
|
||||||
* Sendet NUR: senderName, senderEmail (ggf. local-part-redacted), subject.
|
|
||||||
* KEIN Mail-Body, KEINE weiteren PII.
|
|
||||||
*/
|
|
||||||
export async function callGroqClassifier(params: {
|
|
||||||
senderName: string | null;
|
|
||||||
senderEmailRedacted: string;
|
|
||||||
subject: string;
|
|
||||||
groqApiKey: string;
|
|
||||||
}): Promise<GroqVerdict> {
|
|
||||||
const prompt = `You are a spam classifier for a gambling addiction recovery app.
|
|
||||||
Classify whether this email is from a gambling/betting operator.
|
|
||||||
|
|
||||||
Sender name: ${params.senderName ?? "(none)"}
|
|
||||||
Sender email: ${params.senderEmailRedacted}
|
|
||||||
Subject: ${params.subject}
|
|
||||||
|
|
||||||
Respond with ONLY valid JSON in this exact format:
|
|
||||||
{"isGambling": true/false, "confidence": 0.0-1.0, "reason": "one sentence"}
|
|
||||||
|
|
||||||
Do not include any other text.`;
|
|
||||||
|
|
||||||
const response = await fetch("https://api.groq.com/openai/v1/chat/completions", {
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
Authorization: `Bearer ${params.groqApiKey}`,
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: "llama-3.3-70b-versatile",
|
|
||||||
messages: [{ role: "user", content: prompt }],
|
|
||||||
temperature: 0,
|
|
||||||
max_tokens: 100,
|
|
||||||
response_format: { type: "json_object" },
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
const errText = await response.text().catch(() => "");
|
|
||||||
throw new Error(`Groq API error ${response.status}: ${errText.slice(0, 200)}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await response.json() as {
|
|
||||||
choices: { message: { content: string } }[];
|
|
||||||
};
|
|
||||||
|
|
||||||
const raw = data.choices?.[0]?.message?.content ?? "{}";
|
|
||||||
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(raw) as Partial<GroqVerdict>;
|
|
||||||
return {
|
|
||||||
isGambling: Boolean(parsed.isGambling),
|
|
||||||
confidence: typeof parsed.confidence === "number" ? parsed.confidence : 0,
|
|
||||||
reason: typeof parsed.reason === "string" ? parsed.reason.slice(0, 300) : "",
|
|
||||||
};
|
|
||||||
} catch {
|
|
||||||
// JSON-Parse-Fehler → konservativ PASS (kein false-positive durch LLM-Fehler)
|
|
||||||
return { isGambling: false, confidence: 0, reason: "parse-error" };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── Haupt-Pipeline ───────────────────────────────────────────────────────────
|
// ─── Haupt-Pipeline ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
export interface ClassifyMailParams {
|
export interface ClassifyMailParams {
|
||||||
mail: MailInput;
|
mail: MailInput;
|
||||||
/** Menge der geblockten Domains (aus getBlocklistedDomainsSet) */
|
/** Menge der geblockten Domains (aus getBlocklistedDomainsSet) */
|
||||||
blockedDomainSet: Set<string>;
|
blockedDomainSet: Set<string>;
|
||||||
/** Groq API Key (aus runtimeConfig) — wenn leer, Layer 4 überspringen */
|
|
||||||
groqApiKey: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Klassifiziert eine einzelne Mail durch alle Layer.
|
* Klassifiziert eine einzelne Mail durch alle Layer.
|
||||||
* Pure bezüglich IO — Groq-Call ist die einzige externe Abhängigkeit.
|
* Komplett deterministisch — keine externen Calls, keine PII verlässt den Server.
|
||||||
* DB-Writes (MailBlocked, MailClassificationSample) liegen beim Aufrufer.
|
* DB-Writes (MailBlocked, MailClassificationSample) liegen beim Aufrufer.
|
||||||
*/
|
*/
|
||||||
export async function classifyMail(params: ClassifyMailParams): Promise<ClassificationResult> {
|
export async function classifyMail(params: ClassifyMailParams): Promise<ClassificationResult> {
|
||||||
const { mail, blockedDomainSet, groqApiKey } = params;
|
const { mail, blockedDomainSet } = params;
|
||||||
const { senderEmail, senderName, subject } = mail;
|
const { senderEmail, senderName, subject } = mail;
|
||||||
|
|
||||||
const senderEmailLower = senderEmail.toLowerCase();
|
const senderEmailLower = senderEmail.toLowerCase();
|
||||||
@ -580,8 +481,8 @@ export async function classifyMail(params: ClassifyMailParams): Promise<Classifi
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Score < 25 → PASS, kein LLM
|
// Score < 25 → PASS
|
||||||
if (score < SCORE_BORDERLINE_LOW) {
|
if (score < SCORE_PASS_BELOW) {
|
||||||
return {
|
return {
|
||||||
action: "passed",
|
action: "passed",
|
||||||
triggerSource: "no-signal",
|
triggerSource: "no-signal",
|
||||||
@ -597,53 +498,13 @@ export async function classifyMail(params: ClassifyMailParams): Promise<Classifi
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Layer 4: Groq-Borderline (25–75) ────────────────────────────────────────
|
// Score 25-79 → PASS bei < 50, BLOCK bei >= 50 (deterministisch, kein LLM)
|
||||||
if (score >= SCORE_BORDERLINE_LOW && score <= SCORE_BORDERLINE_HIGH && groqApiKey) {
|
const midAction: ClassificationAction =
|
||||||
// Local-Part-Redaction: nur behalten wenn er selbst Gambling-Keywords enthält
|
score >= SCORE_BLOCK_MIDRANGE ? "blocked" : "passed";
|
||||||
const localPartHasKeyword = (GAMBLING_KEYWORDS as string[]).some((kw: string) =>
|
const midTrigger: TriggerSource = `score:${score}`;
|
||||||
localPart.toLowerCase().includes(kw),
|
|
||||||
);
|
|
||||||
const senderEmailRedacted = redactLocalPartForLLM(senderEmailLower, localPartHasKeyword);
|
|
||||||
|
|
||||||
let groqVerdict: GroqVerdict | null = null;
|
|
||||||
try {
|
|
||||||
groqVerdict = await callGroqClassifier({
|
|
||||||
senderName,
|
|
||||||
senderEmailRedacted,
|
|
||||||
subject,
|
|
||||||
groqApiKey,
|
|
||||||
});
|
|
||||||
} catch (err) {
|
|
||||||
// LLM-Fehler → konservativ PASS (kein false-positive durch API-Ausfall)
|
|
||||||
console.warn("[mail-classifier] Groq call failed, falling back to score-based decision:", err);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (groqVerdict) {
|
|
||||||
const action: ClassificationAction = groqVerdict.isGambling ? "blocked" : "passed";
|
|
||||||
const triggerSource: TriggerSource = `llm:${groqVerdict.confidence.toFixed(2)}`;
|
|
||||||
return {
|
|
||||||
action,
|
|
||||||
triggerSource,
|
|
||||||
score,
|
|
||||||
relayDecodedDomain,
|
|
||||||
groq: groqVerdict,
|
|
||||||
features: {
|
|
||||||
...scoreResult,
|
|
||||||
domainBlocked: false,
|
|
||||||
relayDecoded: !!relayDecodedDomain,
|
|
||||||
brandMatch,
|
|
||||||
randomTokens,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: Score 25–75 ohne Groq (API-Fehler oder kein Key) → PASS bei < 50, BLOCK bei >= 50
|
|
||||||
const fallbackAction: ClassificationAction = score >= 50 ? "blocked" : "passed";
|
|
||||||
const fallbackTrigger: TriggerSource = `score:${score}`;
|
|
||||||
return {
|
return {
|
||||||
action: fallbackAction,
|
action: midAction,
|
||||||
triggerSource: fallbackTrigger,
|
triggerSource: midTrigger,
|
||||||
score,
|
score,
|
||||||
relayDecodedDomain,
|
relayDecodedDomain,
|
||||||
features: {
|
features: {
|
||||||
|
|||||||
@ -1,17 +1,16 @@
|
|||||||
/**
|
/**
|
||||||
* Tests für mail-classifier.ts — Mail-Klassifikations-Pipeline.
|
* Tests für mail-classifier.ts — Mail-Klassifikations-Pipeline.
|
||||||
*
|
*
|
||||||
* Testet alle Layer-Logiken als pure Funktionen (kein DB-Mock, kein Groq-Mock).
|
* Testet alle Layer-Logiken als pure Funktionen (kein DB-Mock).
|
||||||
*
|
*
|
||||||
* Abgedeckt:
|
* Abgedeckt:
|
||||||
* - extractRelayedDomain() — diverse Relay-Patterns
|
* - extractRelayedDomain() — diverse Relay-Patterns
|
||||||
* - normalizeBrand() — Normalisierungs-Logik
|
* - normalizeBrand() — Normalisierungs-Logik
|
||||||
* - hasRandomTokens() — true/false cases
|
* - hasRandomTokens() — true/false cases
|
||||||
* - redactLocalPartForLLM() — keep vs redact
|
|
||||||
* - computeScore() — Score-Berechnung mit Weights
|
* - computeScore() — Score-Berechnung mit Weights
|
||||||
* - classifyMail() — End-to-End Pipeline:
|
* - classifyMail() — End-to-End Pipeline:
|
||||||
* - Gamblezen-Beispiel → Layer 2.5 Hard-Block (kein LLM-Call)
|
* - Gamblezen-Beispiel → Layer 2.5 Hard-Block
|
||||||
* - BetandPlay-Beispiel → Layer 2.5 Hard-Block (kein LLM-Call)
|
* - BetandPlay-Beispiel → Layer 2.5 Hard-Block (Apple Hide-My-Email-Pattern)
|
||||||
* - Whitelist-Case (wettervorhersage)
|
* - Whitelist-Case (wettervorhersage)
|
||||||
* - Domain-Block (Layer 2)
|
* - Domain-Block (Layer 2)
|
||||||
* - Relay-Decoded Block (Layer 2)
|
* - Relay-Decoded Block (Layer 2)
|
||||||
@ -42,7 +41,6 @@ import {
|
|||||||
extractRelayedDomain,
|
extractRelayedDomain,
|
||||||
normalizeBrand,
|
normalizeBrand,
|
||||||
hasRandomTokens,
|
hasRandomTokens,
|
||||||
redactLocalPartForLLM,
|
|
||||||
computeScore,
|
computeScore,
|
||||||
classifyMail,
|
classifyMail,
|
||||||
matchesGamblingBrand,
|
matchesGamblingBrand,
|
||||||
@ -183,29 +181,6 @@ describe("hasRandomTokens()", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// ─── redactLocalPartForLLM ───────────────────────────────────────────────────
|
|
||||||
|
|
||||||
describe("redactLocalPartForLLM()", () => {
|
|
||||||
it("normale Adresse → local-part wird redacted", () => {
|
|
||||||
expect(redactLocalPartForLLM("user123@example.com", false))
|
|
||||||
.toBe("***@example.com");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("Adresse mit Casino-Keyword im local-part → NICHT redacted", () => {
|
|
||||||
expect(redactLocalPartForLLM("casino_offers@mailer.net", true))
|
|
||||||
.toBe("casino_offers@mailer.net");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("normal ohne Keyword-Flag → redacted", () => {
|
|
||||||
expect(redactLocalPartForLLM("a1b2c3_track@sendgrid.net", false))
|
|
||||||
.toBe("***@sendgrid.net");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("Adresse ohne @ → unverändert zurückgegeben", () => {
|
|
||||||
expect(redactLocalPartForLLM("noatsign", false)).toBe("noatsign");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// ─── computeScore ────────────────────────────────────────────────────────────
|
// ─── computeScore ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
describe("computeScore()", () => {
|
describe("computeScore()", () => {
|
||||||
@ -300,7 +275,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Dein exklusives Angebot wartet",
|
subject: "Dein exklusives Angebot wartet",
|
||||||
},
|
},
|
||||||
blockedDomainSet: domainSetWithGamblezen,
|
blockedDomainSet: domainSetWithGamblezen,
|
||||||
groqApiKey: "", // kein LLM erlaubt hier
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Relay-decoded domain matcht blocklist → Layer 2 (relay-decoded), NICHT Layer 2.5
|
// Relay-decoded domain matcht blocklist → Layer 2 (relay-decoded), NICHT Layer 2.5
|
||||||
@ -318,7 +292,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Dein exklusives Angebot wartet",
|
subject: "Dein exklusives Angebot wartet",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "", // kein LLM-Call hier erwartet
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
@ -338,7 +311,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "100€ Willkommensbonus — Nur heute!",
|
subject: "100€ Willkommensbonus — Nur heute!",
|
||||||
},
|
},
|
||||||
blockedDomainSet: domainSetWithBetandPlay,
|
blockedDomainSet: domainSetWithBetandPlay,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
@ -354,7 +326,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "100€ Willkommensbonus",
|
subject: "100€ Willkommensbonus",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
@ -372,7 +343,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Wettervorhersage für morgen",
|
subject: "Wettervorhersage für morgen",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("passed");
|
expect(result.action).toBe("passed");
|
||||||
@ -387,7 +357,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Wettkampf-Ergebnisse dieser Woche",
|
subject: "Wettkampf-Ergebnisse dieser Woche",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("passed");
|
expect(result.action).toBe("passed");
|
||||||
@ -405,7 +374,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Dein Bonus wartet",
|
subject: "Dein Bonus wartet",
|
||||||
},
|
},
|
||||||
blockedDomainSet: domainSet,
|
blockedDomainSet: domainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
@ -424,7 +392,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Exklusiv für dich",
|
subject: "Exklusiv für dich",
|
||||||
},
|
},
|
||||||
blockedDomainSet: domainSet,
|
blockedDomainSet: domainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
@ -432,11 +399,9 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
expect(result.relayDecodedDomain).toBe("rabona.com");
|
expect(result.relayDecodedDomain).toBe("rabona.com");
|
||||||
});
|
});
|
||||||
|
|
||||||
// ─── Layer 3: Score-Block (ohne LLM) ──────────────────────────────────────
|
// ─── Layer 3: Score-Hard-Block ────────────────────────────────────────────
|
||||||
it("Viele Signale → Score >= 80 → Hard-Block ohne LLM", async () => {
|
it("Viele Signale → Score >= 80 → Hard-Block", async () => {
|
||||||
// Casino im Sender-Name + Jackpot im Betreff + Urgency + Geld-Pattern
|
// Casino im Sender-Name + Jackpot im Betreff + Urgency + Geld-Pattern
|
||||||
const groqCallSpy = vi.fn();
|
|
||||||
|
|
||||||
const result = await classifyMail({
|
const result = await classifyMail({
|
||||||
mail: {
|
mail: {
|
||||||
senderEmail: "info@spinz-casino.example",
|
senderEmail: "info@spinz-casino.example",
|
||||||
@ -444,14 +409,11 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "JACKPOT 500€ Freispiele — Nur heute!",
|
subject: "JACKPOT 500€ Freispiele — Nur heute!",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "should-not-be-called",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("blocked");
|
expect(result.action).toBe("blocked");
|
||||||
expect(result.triggerSource).toMatch(/^score:/);
|
expect(result.triggerSource).toMatch(/^score:/);
|
||||||
expect(result.score).toBeGreaterThanOrEqual(80);
|
expect(result.score).toBeGreaterThanOrEqual(80);
|
||||||
// groqCallSpy wurde nicht gecallt weil wir fetch nicht mocken —
|
|
||||||
// aber score >= 80 bedeutet Layer 4 wird gar nicht erreicht
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// ─── No-Signal → PASS ────────────────────────────────────────────────────
|
// ─── No-Signal → PASS ────────────────────────────────────────────────────
|
||||||
@ -463,7 +425,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Deine Bestellung wurde versandt",
|
subject: "Deine Bestellung wurde versandt",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.action).toBe("passed");
|
expect(result.action).toBe("passed");
|
||||||
@ -480,7 +441,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Willkommen",
|
subject: "Willkommen",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Kein Hard-Block Layer 2.5 (kein Random), aber Score erhöht durch Brand-Match
|
// Kein Hard-Block Layer 2.5 (kein Random), aber Score erhöht durch Brand-Match
|
||||||
@ -500,7 +460,6 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
|||||||
subject: "Test",
|
subject: "Test",
|
||||||
},
|
},
|
||||||
blockedDomainSet: emptyDomainSet,
|
blockedDomainSet: emptyDomainSet,
|
||||||
groqApiKey: "",
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(result.features).toHaveProperty("score");
|
expect(result.features).toHaveProperty("score");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user