diff --git a/backend/server/utils/mail-classifier.ts b/backend/server/utils/mail-classifier.ts index 3cd3e67..156e58e 100644 --- a/backend/server/utils/mail-classifier.ts +++ b/backend/server/utils/mail-classifier.ts @@ -80,9 +80,9 @@ export const SCORE_WEIGHTS = { SUBJECT_URGENCY: 15, // "Nur heute", "Letzte Chance", "Ablaufdatum" SUBJECT_ALL_CAPS_WORD: 5, // EINZELNES ALL-CAPS-WORT im Betreff - // Display-Name-Indikatoren - SENDER_NAME_GAMBLING_KEYWORD: 30, // Gambling-Begriff im Absender-Namen - SENDER_NAME_BRAND_MATCH: 20, // Name matcht bekannten Gambling-Brand (normalisiert) + // Display-Name-Indikatoren: entfernt in v1.0 (zu False-Positive-anfällig). + // v1.1: SENDER_NAME_GAMBLING_KEYWORD, SENDER_NAME_BRAND_MATCH reaktivieren + // wenn Display-Name-Blocking UX + Testing vollständig sind. // Layer 2.5 Score-Ergänzungen (wenn kein Hard-Block ausgelöst) BRAND_MATCH_NO_RANDOM: 35, // Brand-Match ohne Random-Tokens (kein Hard-Block) @@ -258,14 +258,9 @@ export function computeScore( } } - // ── Sender-Name-Keywords ── - for (const kw of GAMBLING_KEYWORDS as string[]) { - if (senderNameLower.includes(kw)) { - keywordHitsName.push(kw); - score += SCORE_WEIGHTS.SENDER_NAME_GAMBLING_KEYWORD; - break; - } - } + // ── Sender-Name-Keywords: entfernt in v1.0 (Score-Beitrag via Display-Name + // ist zu False-Positive-anfällig, Display-Name-Blocking nicht supported). + // keywordHitsName bleibt im ScoreResult für v1.1-Reaktivierung (immer leer). // ── Geld-Pattern im Betreff (€/$ + Zahl) ── if (/[€$£]\s*\d|\d\s*[€$£]/.test(subject)) { @@ -414,11 +409,12 @@ export async function classifyMail(params: ClassifyMailParams): Promise c.length >= 4 && matchesGamblingBrand(c)); const randomTokens = hasRandomTokens(localPart); diff --git a/backend/tests/mail/mail-classifier.test.ts b/backend/tests/mail/mail-classifier.test.ts index 7e4bbd9..d79f348 100644 --- a/backend/tests/mail/mail-classifier.test.ts +++ b/backend/tests/mail/mail-classifier.test.ts @@ -9,12 +9,15 @@ * - hasRandomTokens() — true/false cases * - computeScore() — Score-Berechnung mit Weights * - classifyMail() — End-to-End Pipeline: - * - Gamblezen-Beispiel → Layer 2.5 Hard-Block - * - BetandPlay-Beispiel → Layer 2.5 Hard-Block (Apple Hide-My-Email-Pattern) + * - Gamblezen-Beispiel → Layer 2 Hard-Block via Blocklist + * - Gamblezen-Beispiel ohne Blocklist → Score-Block via Domain-Keyword + * - BetandPlay-Beispiel → Layer 2 Hard-Block via Relay-Decoded + * - BetandPlay-Beispiel ohne Blocklist → Score-Path (kein Brand via Display-Name) * - Whitelist-Case (wettervorhersage) * - Domain-Block (Layer 2) * - Relay-Decoded Block (Layer 2) * - No-Signal → PASS + * - v1.0: Display-Name-only Gambling-Pattern → PASS (kein Score-Beitrag) */ import { describe, it, expect, vi } from "vitest"; @@ -283,21 +286,26 @@ describe("classifyMail() — End-to-End Pipeline", () => { expect(result.relayDecodedDomain).toBe("gamblezen.com"); }); - it("Gamblezen-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => { - // Wenn gamblezen.com NICHT in der Blocklist ist: Brand+Random greift trotzdem + it("Gamblezen-Beispiel ohne Blocklist-Entry → kein Layer-2.5-Block (v1.0: kein Display-Name Brand-Match), Score-Path", async () => { + // v1.0: Display-Name "Gamble Zen" liefert keinen Brand-Match mehr. + // Domain em.sendgrid.net enthält kein Gambling-Keyword → kein Domain-Score. + // Subject "Dein exklusives Angebot wartet" enthält kein Keyword → Score=0 → PASS. const result = await classifyMail({ mail: { senderEmail: "hq3a91_7xmpl2@em.sendgrid.net", - senderName: "Gamble Zen", // Brand-Match via Display-Name + senderName: "Gamble Zen", subject: "Dein exklusives Angebot wartet", }, blockedDomainSet: emptyDomainSet, }); - expect(result.action).toBe("blocked"); - expect(result.triggerSource).toBe("brand+random"); - expect(result.features.brandMatch).toBe(true); + // Kein Brand+Random mehr (Display-Name ist nicht mehr Brand-Source) + expect(result.triggerSource).not.toBe("brand+random"); + expect(result.features.brandMatch).toBe(false); + // Random-Tokens sind noch erkannt (Local-Part hq3a91_7xmpl2) expect(result.features.randomTokens).toBe(true); + // Score gering — kein Keyword im Subject/Domain → PASS + expect(result.action).toBe("passed"); }); // ─── Screenshot-Beispiel 2: BetandPlay via Relay ───────────────────────── @@ -318,20 +326,28 @@ describe("classifyMail() — End-to-End Pipeline", () => { expect(result.relayDecodedDomain).toBe("betandplay.com"); }); - it("BetandPlay-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => { + it("BetandPlay-Beispiel ohne Blocklist-Entry → kein Layer-2.5-Block (v1.0: kein Display-Name Brand-Match), Score via Subject+Money", async () => { + // v1.0: Display-Name "BetandPlay" liefert keinen Brand-Match mehr. + // mailchimp.com enthält kein Gambling-Keyword. + // Subject "100€ Willkommensbonus" hat kein GAMBLING_KEYWORDS-Treffer (kein "casino", "bonus code" etc.) + // aber Geld-Pattern (100€) → +20. Random-Tokens → +10 (RANDOM_TOKENS_NO_BRAND). + // Score = 30 → < 50 → PASS. const result = await classifyMail({ mail: { senderEmail: "u7a2b1_offers_ref9x2z@mailchimp.com", - senderName: "BetandPlay", // Brand-Match via Display-Name + senderName: "BetandPlay", subject: "100€ Willkommensbonus", }, blockedDomainSet: emptyDomainSet, }); - expect(result.action).toBe("blocked"); - expect(result.triggerSource).toBe("brand+random"); - expect(result.features.brandMatch).toBe(true); + // Kein Brand+Random-Hard-Block (Display-Name ist v1.0 nicht Brand-Source) + expect(result.triggerSource).not.toBe("brand+random"); + expect(result.features.brandMatch).toBe(false); expect(result.features.randomTokens).toBe(true); + // Score: 20 (money) + 10 (random-no-brand) = 30 → PASS (< 50) + expect(result.score).toBe(30); + expect(result.action).toBe("passed"); }); // ─── Layer 1: Whitelist ─────────────────────────────────────────────────── @@ -400,8 +416,13 @@ describe("classifyMail() — End-to-End Pipeline", () => { }); // ─── Layer 3: Score-Hard-Block ──────────────────────────────────────────── - it("Viele Signale → Score >= 80 → Hard-Block", async () => { - // Casino im Sender-Name + Jackpot im Betreff + Urgency + Geld-Pattern + it("Viele Signale → Score >= 80 → Hard-Block (ohne Display-Name-Beitrag)", async () => { + // Domain-Keyword ("casino" in spinz-casino.example) → +40 + // Subject-Keyword ("jackpot") → +50 + // Geld-Pattern (500€) → +20 + // Urgency ("Nur heute") → +15 + // ALL_CAPS ("JACKPOT") → +5 + // Gesamt: 130 → gecapped auf 100. Display-Name spielt keine Rolle. const result = await classifyMail({ mail: { senderEmail: "info@spinz-casino.example", @@ -505,6 +526,41 @@ describe("classifyMail() — End-to-End Pipeline", () => { expect(result.action).toBe("passed"); expect(result.features.keywordHitsSubject).toHaveLength(0); }); + + // ─── v1.0: Display-Name-only Signale → kein Score-Beitrag ──────────────── + + it("v1.0: Subject leer + Display-Name 'Casino Bonus' + generische Domain → Score=0 → PASS", async () => { + // Display-Name hat Gambling-Keyword, aber v1.0 wertet das nicht aus. + // Kein Subject-Keyword, keine Gambling-Domain → Score=0 → PASS. + const result = await classifyMail({ + mail: { + senderEmail: "info@example.com", + senderName: "Casino Bonus", + subject: "", + }, + blockedDomainSet: emptyDomainSet, + }); + expect(result.action).toBe("passed"); + expect(result.score).toBe(0); + expect(result.features.keywordHitsName).toHaveLength(0); + expect(result.triggerSource).toBe("no-signal"); + }); + + it("v1.0: Subject 'Hotel Las Vegas' + Display-Name 'Casino Royale' + generische Domain → Score=0 → PASS", async () => { + // Weder Subject noch Domain enthält einen GAMBLING_KEYWORDS-Treffer. + // Display-Name "Casino Royale" hat zwar 'casino', zählt aber v1.0 nicht. + const result = await classifyMail({ + mail: { + senderEmail: "info@hotel-example.com", + senderName: "Casino Royale", + subject: "Hotel Las Vegas", + }, + blockedDomainSet: emptyDomainSet, + }); + expect(result.action).toBe("passed"); + expect(result.score).toBe(0); + expect(result.features.keywordHitsName).toHaveLength(0); + }); }); // ─── Fix 1: Folder-Filter (System-Folder-Ausschluss) ──────────────────────────