User-Direktive: Mail-Filter bleibt auf dem deterministischen Score+Layer-2.5-Stack. Groq-LLM Borderline-Call (Layer 4) entfernt. Layer 2.5 Brand+Random fängt den Apple Hide-My-Email Fall (icloud.com-Adressen mit kryptischen Local-Parts + Brand-DisplayName) weiterhin sauber via Hard-Block. Score-Mid-Range 25-79 entscheidet jetzt deterministisch: ≥50 → BLOCK, sonst PASS. Damit auch DSGVO-P0-Items aus dem Hans-Müller-Review obsolet (AVV-Annex Groq, Drittland-USA-Consent-Toggle, Datenschutzerklärung-Absatz). - mail-classifier.ts: callGroqClassifier + redactLocalPartForLLM + groq-Feld raus - scan.post.ts + scan-internal.post.ts: groqApiKey-Param raus, groq*-Sample-Felder raus - mail-classifier.test.ts: Groq-Tests + redactLocalPart-Tests entfernt, 46 Tests grün DB-Spalten in mail_classification_samples (groq_*) bleiben als legacy nullable — Cleanup-Migration optional in späterem Sprint. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
477 lines
17 KiB
TypeScript
477 lines
17 KiB
TypeScript
/**
|
|
* Tests für mail-classifier.ts — Mail-Klassifikations-Pipeline.
|
|
*
|
|
* Testet alle Layer-Logiken als pure Funktionen (kein DB-Mock).
|
|
*
|
|
* Abgedeckt:
|
|
* - extractRelayedDomain() — diverse Relay-Patterns
|
|
* - normalizeBrand() — Normalisierungs-Logik
|
|
* - hasRandomTokens() — true/false cases
|
|
* - computeScore() — Score-Berechnung mit Weights
|
|
* - classifyMail() — End-to-End Pipeline:
|
|
* - Gamblezen-Beispiel → Layer 2.5 Hard-Block
|
|
* - BetandPlay-Beispiel → Layer 2.5 Hard-Block (Apple Hide-My-Email-Pattern)
|
|
* - Whitelist-Case (wettervorhersage)
|
|
* - Domain-Block (Layer 2)
|
|
* - Relay-Decoded Block (Layer 2)
|
|
* - No-Signal → PASS
|
|
*/
|
|
import { describe, it, expect, vi } from "vitest";
|
|
|
|
// gambling-keywords.mjs ist ESM ohne TypeScript — mock before import
|
|
vi.mock("../../server/utils/gambling-keywords.mjs", () => ({
|
|
GAMBLING_KEYWORDS: [
|
|
"casino", "bet365", "bwin", "tipico", "unibet", "betway",
|
|
"pokerstars", "jackpot", "freispiel", "free spin", "bonus code",
|
|
"auszahlung", "glücksspiel", "slots", "roulette", "wette",
|
|
"stake", "rolletto", "vbet", "1xbet", "melbet", "mostbet",
|
|
"luckyvibe", "spinz", "casinoly", "rabona", "justcasino",
|
|
"getslots", "rocketplay", "freshcasino", "betano", "leovegas",
|
|
],
|
|
GAMBLING_WHITELIST: [
|
|
"wettervorhersage",
|
|
"wetter",
|
|
"wetterbericht",
|
|
"wettkampf",
|
|
"wettbewerb",
|
|
],
|
|
}));
|
|
|
|
import {
|
|
extractRelayedDomain,
|
|
normalizeBrand,
|
|
hasRandomTokens,
|
|
computeScore,
|
|
classifyMail,
|
|
matchesGamblingBrand,
|
|
} from "../../server/utils/mail-classifier";
|
|
|
|
// ─── extractRelayedDomain ────────────────────────────────────────────────────
|
|
|
|
describe("extractRelayedDomain()", () => {
|
|
it("extrahiert Domain aus SendGrid-bounce-Pattern (user=domain@sendgrid)", () => {
|
|
expect(extractRelayedDomain("bounces+user=gamblezen.com@sendgrid.net"))
|
|
.toBe("gamblezen.com");
|
|
});
|
|
|
|
it("extrahiert Domain aus Mailchimp-Track-Pattern (track.user=domain@mc)", () => {
|
|
expect(extractRelayedDomain("track.user=betandplay.com@mailchimp.com"))
|
|
.toBe("betandplay.com");
|
|
});
|
|
|
|
it("extrahiert Domain aus _at_-Pattern", () => {
|
|
expect(extractRelayedDomain("a1b2c3_user_at_betandplay.com@em.example.com"))
|
|
.toBe("betandplay.com");
|
|
});
|
|
|
|
it("gibt null zurück wenn kein Relay-Pattern erkannt", () => {
|
|
expect(extractRelayedDomain("info@betandplay.com")).toBeNull();
|
|
});
|
|
|
|
it("gibt null zurück für direkte Adressen ohne @", () => {
|
|
expect(extractRelayedDomain("noatsign")).toBeNull();
|
|
});
|
|
|
|
it("normalisiert extrahierte Domain auf lowercase", () => {
|
|
expect(extractRelayedDomain("bounce=GambleZen.COM@delivery.net"))
|
|
.toBe("gamblezen.com");
|
|
});
|
|
|
|
it("gibt null zurück für normale Adressen ohne Relay-Muster", () => {
|
|
expect(extractRelayedDomain("newsletter@example.org")).toBeNull();
|
|
});
|
|
});
|
|
|
|
// ─── normalizeBrand ──────────────────────────────────────────────────────────
|
|
|
|
describe("normalizeBrand()", () => {
|
|
it("BetandPlay → betandplay", () => {
|
|
expect(normalizeBrand("BetandPlay")).toBe("betandplay");
|
|
});
|
|
|
|
it("bet-and-play → betandplay", () => {
|
|
expect(normalizeBrand("bet-and-play")).toBe("betandplay");
|
|
});
|
|
|
|
it("Gamble Zen → gamblezen", () => {
|
|
expect(normalizeBrand("Gamble Zen")).toBe("gamblezen");
|
|
});
|
|
|
|
it("Mr. Green → mrgreen", () => {
|
|
expect(normalizeBrand("Mr. Green")).toBe("mrgreen");
|
|
});
|
|
|
|
it("lucky_vibe → luckyvibe", () => {
|
|
expect(normalizeBrand("lucky_vibe")).toBe("luckyvibe");
|
|
});
|
|
|
|
it("unveränderte Kleinbuchstaben bleiben gleich", () => {
|
|
expect(normalizeBrand("casino")).toBe("casino");
|
|
});
|
|
});
|
|
|
|
// ─── matchesGamblingBrand ────────────────────────────────────────────────────
|
|
|
|
describe("matchesGamblingBrand()", () => {
|
|
it("'gamblezen' matcht", () => {
|
|
expect(matchesGamblingBrand("gamblezen")).toBe(true);
|
|
});
|
|
|
|
it("'betandplay' matcht", () => {
|
|
expect(matchesGamblingBrand("betandplay")).toBe(true);
|
|
});
|
|
|
|
it("'casino' matcht (exact)", () => {
|
|
expect(matchesGamblingBrand("casino")).toBe(true);
|
|
});
|
|
|
|
it("'mrgreen' matcht", () => {
|
|
expect(matchesGamblingBrand("mrgreen")).toBe(true);
|
|
});
|
|
|
|
it("'example' matcht nicht", () => {
|
|
expect(matchesGamblingBrand("example")).toBe(false);
|
|
});
|
|
|
|
it("zu kurze Strings (< 4 Zeichen) matchen nie", () => {
|
|
expect(matchesGamblingBrand("bet")).toBe(false);
|
|
});
|
|
|
|
it("'googlemail' matcht nicht", () => {
|
|
expect(matchesGamblingBrand("googlemail")).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ─── hasRandomTokens ─────────────────────────────────────────────────────────
|
|
|
|
describe("hasRandomTokens()", () => {
|
|
it("local-part mit 2+ zufälligen Tokens → true", () => {
|
|
// Gamblezen-typisch: hq3a91_7xmpl2 (2 random-looking tokens)
|
|
expect(hasRandomTokens("hq3a91_7xmpl2")).toBe(true);
|
|
});
|
|
|
|
it("local-part mit User-ID + Token → true", () => {
|
|
expect(hasRandomTokens("user123abc_ref456xyz")).toBe(true);
|
|
});
|
|
|
|
it("'info' → false (Funktionswort)", () => {
|
|
expect(hasRandomTokens("info")).toBe(false);
|
|
});
|
|
|
|
it("'noreply' → false (Funktionswort)", () => {
|
|
expect(hasRandomTokens("noreply")).toBe(false);
|
|
});
|
|
|
|
it("'newsletter' → false (Funktionswort, kein Digit-Mix)", () => {
|
|
expect(hasRandomTokens("newsletter")).toBe(false);
|
|
});
|
|
|
|
it("normaler Local-Part ohne Zufalls-Tokens → false", () => {
|
|
expect(hasRandomTokens("john.doe")).toBe(false);
|
|
});
|
|
|
|
it("nur ein random Token (Grenzfall) → false", () => {
|
|
// Nur ein Token >= 6 mit Digit-Mix → unter Schwelle (braucht >= 2)
|
|
expect(hasRandomTokens("abc123")).toBe(false);
|
|
});
|
|
|
|
it("echter BetandPlay-typischer Local-Part → true", () => {
|
|
// z.B. "u7a2b1_offers_ref9x2z" — ein Funktionswort + 2 random tokens
|
|
expect(hasRandomTokens("u7a2b1_offers_ref9x2z")).toBe(true);
|
|
});
|
|
});
|
|
|
|
// ─── computeScore ────────────────────────────────────────────────────────────
|
|
|
|
describe("computeScore()", () => {
|
|
it("Whitelist-Hit → score=0, whitelistHit=true", () => {
|
|
const result = computeScore(
|
|
"info@wetter.de",
|
|
"Wetter Service",
|
|
"Wettervorhersage für morgen",
|
|
false,
|
|
false,
|
|
);
|
|
expect(result.whitelistHit).toBe(true);
|
|
expect(result.score).toBe(0);
|
|
});
|
|
|
|
it("Casino im Betreff → SUBJECT_GAMBLING_KEYWORD += 35", () => {
|
|
const result = computeScore(
|
|
"info@example.com",
|
|
null,
|
|
"Dein Casino-Bonus wartet",
|
|
false,
|
|
false,
|
|
);
|
|
expect(result.keywordHitsSubject).toContain("casino");
|
|
expect(result.score).toBeGreaterThanOrEqual(35);
|
|
});
|
|
|
|
it("Geld-Pattern (100€) im Betreff → SUBJECT_MONEY_PATTERN += 20", () => {
|
|
const result = computeScore(
|
|
"info@example.com",
|
|
null,
|
|
"100€ Willkommensbonus jetzt sichern",
|
|
false,
|
|
false,
|
|
);
|
|
expect(result.styleFlags).toContain("money-pattern");
|
|
expect(result.score).toBeGreaterThanOrEqual(20);
|
|
});
|
|
|
|
it("Brand-Match ohne Random → BRAND_MATCH_NO_RANDOM += 35", () => {
|
|
const result = computeScore(
|
|
"info@example.com",
|
|
null,
|
|
"Normaler Betreff",
|
|
true, // brandMatch=true
|
|
false, // randomTokens=false
|
|
);
|
|
expect(result.score).toBeGreaterThanOrEqual(35);
|
|
});
|
|
|
|
it("Random-Tokens ohne Brand → RANDOM_TOKENS_NO_BRAND += 10", () => {
|
|
const result = computeScore(
|
|
"info@example.com",
|
|
null,
|
|
"Newsletter vom Tag",
|
|
false, // brandMatch=false
|
|
true, // randomTokens=true
|
|
);
|
|
expect(result.score).toBeGreaterThanOrEqual(10);
|
|
});
|
|
|
|
it("Score wird auf max 100 gecapped", () => {
|
|
// Alle Signale gleichzeitig → Score würde > 100 sein
|
|
const result = computeScore(
|
|
"slots@casinobonus.bet",
|
|
"Casino Jackpot",
|
|
"JACKPOT Casino 500€ Freispiele Nur heute Letzte chance",
|
|
true,
|
|
true,
|
|
);
|
|
expect(result.score).toBeLessThanOrEqual(100);
|
|
});
|
|
});
|
|
|
|
// ─── classifyMail() — Pipeline End-to-End ────────────────────────────────────
|
|
|
|
describe("classifyMail() — End-to-End Pipeline", () => {
|
|
// Leere Domain-Set für die meisten Tests (kein Domain-Hard-Block)
|
|
const emptyDomainSet = new Set<string>();
|
|
|
|
// ─── Screenshot-Beispiel 1: Gamblezen via Relay ───────────────────────────
|
|
it("Gamblezen-Beispiel: bounces+user=gamblezen.com@em.sendgrid.net → Layer 2.5 Hard-Block", async () => {
|
|
// Gamblezen leitet über SendGrid-Bounces: Domain "em.sendgrid.net" ist nicht geblockt,
|
|
// aber relay-decoded → "gamblezen.com" + local-part hat random tokens.
|
|
// gamblezen.com ist ein bekannter Gambling-Brand.
|
|
const domainSetWithGamblezen = new Set(["gamblezen.com"]);
|
|
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "bounces+user=gamblezen.com@em.sendgrid.net",
|
|
senderName: "Gamble Zen",
|
|
subject: "Dein exklusives Angebot wartet",
|
|
},
|
|
blockedDomainSet: domainSetWithGamblezen,
|
|
});
|
|
|
|
// Relay-decoded domain matcht blocklist → Layer 2 (relay-decoded), NICHT Layer 2.5
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("relay-decoded");
|
|
expect(result.relayDecodedDomain).toBe("gamblezen.com");
|
|
});
|
|
|
|
it("Gamblezen-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => {
|
|
// Wenn gamblezen.com NICHT in der Blocklist ist: Brand+Random greift trotzdem
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "hq3a91_7xmpl2@em.sendgrid.net",
|
|
senderName: "Gamble Zen", // Brand-Match via Display-Name
|
|
subject: "Dein exklusives Angebot wartet",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("brand+random");
|
|
expect(result.features.brandMatch).toBe(true);
|
|
expect(result.features.randomTokens).toBe(true);
|
|
});
|
|
|
|
// ─── Screenshot-Beispiel 2: BetandPlay via Relay ─────────────────────────
|
|
it("BetandPlay-Beispiel: track.user=betandplay.com@mailchimp.com → Layer 2.5 Hard-Block", async () => {
|
|
const domainSetWithBetandPlay = new Set(["betandplay.com"]);
|
|
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "track.user=betandplay.com@mailchimp.com",
|
|
senderName: "BetandPlay",
|
|
subject: "100€ Willkommensbonus — Nur heute!",
|
|
},
|
|
blockedDomainSet: domainSetWithBetandPlay,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("relay-decoded");
|
|
expect(result.relayDecodedDomain).toBe("betandplay.com");
|
|
});
|
|
|
|
it("BetandPlay-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "u7a2b1_offers_ref9x2z@mailchimp.com",
|
|
senderName: "BetandPlay", // Brand-Match via Display-Name
|
|
subject: "100€ Willkommensbonus",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("brand+random");
|
|
expect(result.features.brandMatch).toBe(true);
|
|
expect(result.features.randomTokens).toBe(true);
|
|
});
|
|
|
|
// ─── Layer 1: Whitelist ───────────────────────────────────────────────────
|
|
it("Whitelist-Treffer: 'wettervorhersage' im Betreff → PASS", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "service@wetter.de",
|
|
senderName: "Wetter.de",
|
|
subject: "Wettervorhersage für morgen",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("passed");
|
|
expect(result.triggerSource).toBe("whitelist");
|
|
});
|
|
|
|
it("'wettkampf' in Betreff → PASS (kein Gambling trotz 'wette')", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "info@sport.de",
|
|
senderName: null,
|
|
subject: "Wettkampf-Ergebnisse dieser Woche",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("passed");
|
|
expect(result.triggerSource).toBe("whitelist");
|
|
});
|
|
|
|
// ─── Layer 2: Domain-Hard-Block ───────────────────────────────────────────
|
|
it("Domain in Blocklist → Layer 2 Hard-Block", async () => {
|
|
const domainSet = new Set(["casinoly.com"]);
|
|
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "promo@casinoly.com",
|
|
senderName: "Casinoly",
|
|
subject: "Dein Bonus wartet",
|
|
},
|
|
blockedDomainSet: domainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("domain");
|
|
expect(result.features.domainBlocked).toBe(true);
|
|
});
|
|
|
|
// ─── Relay-Decoded Block ──────────────────────────────────────────────────
|
|
it("Relay-Decoded: =domain.com in local-part und Domain in Blocklist → relay-decoded Block", async () => {
|
|
const domainSet = new Set(["rabona.com"]);
|
|
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "bounce+track=rabona.com@em.sendgrid.net",
|
|
senderName: "Rabona Casino",
|
|
subject: "Exklusiv für dich",
|
|
},
|
|
blockedDomainSet: domainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toBe("relay-decoded");
|
|
expect(result.relayDecodedDomain).toBe("rabona.com");
|
|
});
|
|
|
|
// ─── Layer 3: Score-Hard-Block ────────────────────────────────────────────
|
|
it("Viele Signale → Score >= 80 → Hard-Block", async () => {
|
|
// Casino im Sender-Name + Jackpot im Betreff + Urgency + Geld-Pattern
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "info@spinz-casino.example",
|
|
senderName: "Casino Jackpot Club",
|
|
subject: "JACKPOT 500€ Freispiele — Nur heute!",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("blocked");
|
|
expect(result.triggerSource).toMatch(/^score:/);
|
|
expect(result.score).toBeGreaterThanOrEqual(80);
|
|
});
|
|
|
|
// ─── No-Signal → PASS ────────────────────────────────────────────────────
|
|
it("unauffällige Mail → PASS mit triggerSource 'no-signal'", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "newsletter@amazon.de",
|
|
senderName: "Amazon",
|
|
subject: "Deine Bestellung wurde versandt",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.action).toBe("passed");
|
|
expect(result.triggerSource).toBe("no-signal");
|
|
expect(result.score).toBeLessThan(25);
|
|
});
|
|
|
|
// ─── Brand-Match ohne Random → kein Hard-Block, Score-Erhöhung ───────────
|
|
it("Brand-Match ohne Random-Tokens → kein Layer-2.5-Block, aber Score-Erhöhung", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "info@betandplay.com", // direktes info@, kein random
|
|
senderName: "BetandPlay",
|
|
subject: "Willkommen",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
// Kein Hard-Block Layer 2.5 (kein Random), aber Score erhöht durch Brand-Match
|
|
expect(result.triggerSource).not.toBe("brand+random");
|
|
expect(result.features.brandMatch).toBe(true);
|
|
expect(result.features.randomTokens).toBe(false);
|
|
// Score >= 35 (BRAND_MATCH_NO_RANDOM) — endet je nach anderen Signalen
|
|
expect(result.features.score).toBeGreaterThanOrEqual(35);
|
|
});
|
|
|
|
// ─── Korrekte Feature-Struktur im Result ─────────────────────────────────
|
|
it("Result-Features enthalten alle erwarteten Keys", async () => {
|
|
const result = await classifyMail({
|
|
mail: {
|
|
senderEmail: "promo@example.com",
|
|
senderName: null,
|
|
subject: "Test",
|
|
},
|
|
blockedDomainSet: emptyDomainSet,
|
|
});
|
|
|
|
expect(result.features).toHaveProperty("score");
|
|
expect(result.features).toHaveProperty("domainBlocked");
|
|
expect(result.features).toHaveProperty("relayDecoded");
|
|
expect(result.features).toHaveProperty("brandMatch");
|
|
expect(result.features).toHaveProperty("randomTokens");
|
|
expect(result.features).toHaveProperty("keywordHitsSubject");
|
|
expect(result.features).toHaveProperty("keywordHitsDomain");
|
|
expect(result.features).toHaveProperty("keywordHitsName");
|
|
expect(result.features).toHaveProperty("styleFlags");
|
|
expect(result.features).toHaveProperty("whitelistHit");
|
|
});
|
|
});
|