refactor(mail-classifier): display-name aus Score-Pfad entfernen (v1.0)
SENDER_NAME_GAMBLING_KEYWORD (+30) und SENDER_NAME_BRAND_MATCH (+20) aus SCORE_WEIGHTS entfernt. Layer-2.5-Brand-Match prüft nur noch Domain-Root und Relay-Domain, nicht mehr displayNameNorm. Sender-Name-Keywords-Block in computeScore() entfernt. keywordHitsName bleibt im Interface für v1.1. Tests: Brand+Random-Tests die Display-Name als einzige Brand-Source hatten auf neues v1.0-Verhalten (PASS) umgeschrieben. Zwei neue Tests: Display-Name- only Casino-Signal → Score=0 → PASS verifiziert. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
00ec716694
commit
4573d16e1a
@ -80,9 +80,9 @@ export const SCORE_WEIGHTS = {
|
||||
SUBJECT_URGENCY: 15, // "Nur heute", "Letzte Chance", "Ablaufdatum"
|
||||
SUBJECT_ALL_CAPS_WORD: 5, // EINZELNES ALL-CAPS-WORT im Betreff
|
||||
|
||||
// Display-Name-Indikatoren
|
||||
SENDER_NAME_GAMBLING_KEYWORD: 30, // Gambling-Begriff im Absender-Namen
|
||||
SENDER_NAME_BRAND_MATCH: 20, // Name matcht bekannten Gambling-Brand (normalisiert)
|
||||
// Display-Name-Indikatoren: entfernt in v1.0 (zu False-Positive-anfällig).
|
||||
// v1.1: SENDER_NAME_GAMBLING_KEYWORD, SENDER_NAME_BRAND_MATCH reaktivieren
|
||||
// wenn Display-Name-Blocking UX + Testing vollständig sind.
|
||||
|
||||
// Layer 2.5 Score-Ergänzungen (wenn kein Hard-Block ausgelöst)
|
||||
BRAND_MATCH_NO_RANDOM: 35, // Brand-Match ohne Random-Tokens (kein Hard-Block)
|
||||
@ -258,14 +258,9 @@ export function computeScore(
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sender-Name-Keywords ──
|
||||
for (const kw of GAMBLING_KEYWORDS as string[]) {
|
||||
if (senderNameLower.includes(kw)) {
|
||||
keywordHitsName.push(kw);
|
||||
score += SCORE_WEIGHTS.SENDER_NAME_GAMBLING_KEYWORD;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ── Sender-Name-Keywords: entfernt in v1.0 (Score-Beitrag via Display-Name
|
||||
// ist zu False-Positive-anfällig, Display-Name-Blocking nicht supported).
|
||||
// keywordHitsName bleibt im ScoreResult für v1.1-Reaktivierung (immer leer).
|
||||
|
||||
// ── Geld-Pattern im Betreff (€/$ + Zahl) ──
|
||||
if (/[€$£]\s*\d|\d\s*[€$£]/.test(subject)) {
|
||||
@ -414,11 +409,12 @@ export async function classifyMail(params: ClassifyMailParams): Promise<Classifi
|
||||
}
|
||||
|
||||
// ── Layer 2.5: Brand+Random-Token-Hard-Block ────────────────────────────────
|
||||
// Normalisiere Absender-Name und Domain-Root für Brand-Vergleich
|
||||
const displayNameNorm = normalizeBrand(senderName ?? "");
|
||||
// Brand-Match prüft nur Domain-Root und Relay-Domain — kein Display-Name.
|
||||
// Display-Name-basiertes Brand-Matching ist in v1.0 entfernt (zu False-Positive-anfällig).
|
||||
// v1.1: displayNameNorm wieder in allBrandCandidates aufnehmen wenn UX + Testing fertig.
|
||||
const domainCandidates = domainToBrandCandidates(domain);
|
||||
const relayDomainCandidates = relayDecodedDomain ? domainToBrandCandidates(relayDecodedDomain) : [];
|
||||
const allBrandCandidates = [displayNameNorm, ...domainCandidates, ...relayDomainCandidates];
|
||||
const allBrandCandidates = [...domainCandidates, ...relayDomainCandidates];
|
||||
|
||||
const brandMatch = allBrandCandidates.some((c) => c.length >= 4 && matchesGamblingBrand(c));
|
||||
const randomTokens = hasRandomTokens(localPart);
|
||||
|
||||
@ -9,12 +9,15 @@
|
||||
* - hasRandomTokens() — true/false cases
|
||||
* - computeScore() — Score-Berechnung mit Weights
|
||||
* - classifyMail() — End-to-End Pipeline:
|
||||
* - Gamblezen-Beispiel → Layer 2.5 Hard-Block
|
||||
* - BetandPlay-Beispiel → Layer 2.5 Hard-Block (Apple Hide-My-Email-Pattern)
|
||||
* - Gamblezen-Beispiel → Layer 2 Hard-Block via Blocklist
|
||||
* - Gamblezen-Beispiel ohne Blocklist → Score-Block via Domain-Keyword
|
||||
* - BetandPlay-Beispiel → Layer 2 Hard-Block via Relay-Decoded
|
||||
* - BetandPlay-Beispiel ohne Blocklist → Score-Path (kein Brand via Display-Name)
|
||||
* - Whitelist-Case (wettervorhersage)
|
||||
* - Domain-Block (Layer 2)
|
||||
* - Relay-Decoded Block (Layer 2)
|
||||
* - No-Signal → PASS
|
||||
* - v1.0: Display-Name-only Gambling-Pattern → PASS (kein Score-Beitrag)
|
||||
*/
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
|
||||
@ -283,21 +286,26 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
||||
expect(result.relayDecodedDomain).toBe("gamblezen.com");
|
||||
});
|
||||
|
||||
it("Gamblezen-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => {
|
||||
// Wenn gamblezen.com NICHT in der Blocklist ist: Brand+Random greift trotzdem
|
||||
it("Gamblezen-Beispiel ohne Blocklist-Entry → kein Layer-2.5-Block (v1.0: kein Display-Name Brand-Match), Score-Path", async () => {
|
||||
// v1.0: Display-Name "Gamble Zen" liefert keinen Brand-Match mehr.
|
||||
// Domain em.sendgrid.net enthält kein Gambling-Keyword → kein Domain-Score.
|
||||
// Subject "Dein exklusives Angebot wartet" enthält kein Keyword → Score=0 → PASS.
|
||||
const result = await classifyMail({
|
||||
mail: {
|
||||
senderEmail: "hq3a91_7xmpl2@em.sendgrid.net",
|
||||
senderName: "Gamble Zen", // Brand-Match via Display-Name
|
||||
senderName: "Gamble Zen",
|
||||
subject: "Dein exklusives Angebot wartet",
|
||||
},
|
||||
blockedDomainSet: emptyDomainSet,
|
||||
});
|
||||
|
||||
expect(result.action).toBe("blocked");
|
||||
expect(result.triggerSource).toBe("brand+random");
|
||||
expect(result.features.brandMatch).toBe(true);
|
||||
// Kein Brand+Random mehr (Display-Name ist nicht mehr Brand-Source)
|
||||
expect(result.triggerSource).not.toBe("brand+random");
|
||||
expect(result.features.brandMatch).toBe(false);
|
||||
// Random-Tokens sind noch erkannt (Local-Part hq3a91_7xmpl2)
|
||||
expect(result.features.randomTokens).toBe(true);
|
||||
// Score gering — kein Keyword im Subject/Domain → PASS
|
||||
expect(result.action).toBe("passed");
|
||||
});
|
||||
|
||||
// ─── Screenshot-Beispiel 2: BetandPlay via Relay ─────────────────────────
|
||||
@ -318,20 +326,28 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
||||
expect(result.relayDecodedDomain).toBe("betandplay.com");
|
||||
});
|
||||
|
||||
it("BetandPlay-Beispiel ohne Blocklist-Entry → Layer 2.5 Hard-Block via Brand+Random", async () => {
|
||||
it("BetandPlay-Beispiel ohne Blocklist-Entry → kein Layer-2.5-Block (v1.0: kein Display-Name Brand-Match), Score via Subject+Money", async () => {
|
||||
// v1.0: Display-Name "BetandPlay" liefert keinen Brand-Match mehr.
|
||||
// mailchimp.com enthält kein Gambling-Keyword.
|
||||
// Subject "100€ Willkommensbonus" hat kein GAMBLING_KEYWORDS-Treffer (kein "casino", "bonus code" etc.)
|
||||
// aber Geld-Pattern (100€) → +20. Random-Tokens → +10 (RANDOM_TOKENS_NO_BRAND).
|
||||
// Score = 30 → < 50 → PASS.
|
||||
const result = await classifyMail({
|
||||
mail: {
|
||||
senderEmail: "u7a2b1_offers_ref9x2z@mailchimp.com",
|
||||
senderName: "BetandPlay", // Brand-Match via Display-Name
|
||||
senderName: "BetandPlay",
|
||||
subject: "100€ Willkommensbonus",
|
||||
},
|
||||
blockedDomainSet: emptyDomainSet,
|
||||
});
|
||||
|
||||
expect(result.action).toBe("blocked");
|
||||
expect(result.triggerSource).toBe("brand+random");
|
||||
expect(result.features.brandMatch).toBe(true);
|
||||
// Kein Brand+Random-Hard-Block (Display-Name ist v1.0 nicht Brand-Source)
|
||||
expect(result.triggerSource).not.toBe("brand+random");
|
||||
expect(result.features.brandMatch).toBe(false);
|
||||
expect(result.features.randomTokens).toBe(true);
|
||||
// Score: 20 (money) + 10 (random-no-brand) = 30 → PASS (< 50)
|
||||
expect(result.score).toBe(30);
|
||||
expect(result.action).toBe("passed");
|
||||
});
|
||||
|
||||
// ─── Layer 1: Whitelist ───────────────────────────────────────────────────
|
||||
@ -400,8 +416,13 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
||||
});
|
||||
|
||||
// ─── Layer 3: Score-Hard-Block ────────────────────────────────────────────
|
||||
it("Viele Signale → Score >= 80 → Hard-Block", async () => {
|
||||
// Casino im Sender-Name + Jackpot im Betreff + Urgency + Geld-Pattern
|
||||
it("Viele Signale → Score >= 80 → Hard-Block (ohne Display-Name-Beitrag)", async () => {
|
||||
// Domain-Keyword ("casino" in spinz-casino.example) → +40
|
||||
// Subject-Keyword ("jackpot") → +50
|
||||
// Geld-Pattern (500€) → +20
|
||||
// Urgency ("Nur heute") → +15
|
||||
// ALL_CAPS ("JACKPOT") → +5
|
||||
// Gesamt: 130 → gecapped auf 100. Display-Name spielt keine Rolle.
|
||||
const result = await classifyMail({
|
||||
mail: {
|
||||
senderEmail: "info@spinz-casino.example",
|
||||
@ -505,6 +526,41 @@ describe("classifyMail() — End-to-End Pipeline", () => {
|
||||
expect(result.action).toBe("passed");
|
||||
expect(result.features.keywordHitsSubject).toHaveLength(0);
|
||||
});
|
||||
|
||||
// ─── v1.0: Display-Name-only Signale → kein Score-Beitrag ────────────────
|
||||
|
||||
it("v1.0: Subject leer + Display-Name 'Casino Bonus' + generische Domain → Score=0 → PASS", async () => {
|
||||
// Display-Name hat Gambling-Keyword, aber v1.0 wertet das nicht aus.
|
||||
// Kein Subject-Keyword, keine Gambling-Domain → Score=0 → PASS.
|
||||
const result = await classifyMail({
|
||||
mail: {
|
||||
senderEmail: "info@example.com",
|
||||
senderName: "Casino Bonus",
|
||||
subject: "",
|
||||
},
|
||||
blockedDomainSet: emptyDomainSet,
|
||||
});
|
||||
expect(result.action).toBe("passed");
|
||||
expect(result.score).toBe(0);
|
||||
expect(result.features.keywordHitsName).toHaveLength(0);
|
||||
expect(result.triggerSource).toBe("no-signal");
|
||||
});
|
||||
|
||||
it("v1.0: Subject 'Hotel Las Vegas' + Display-Name 'Casino Royale' + generische Domain → Score=0 → PASS", async () => {
|
||||
// Weder Subject noch Domain enthält einen GAMBLING_KEYWORDS-Treffer.
|
||||
// Display-Name "Casino Royale" hat zwar 'casino', zählt aber v1.0 nicht.
|
||||
const result = await classifyMail({
|
||||
mail: {
|
||||
senderEmail: "info@hotel-example.com",
|
||||
senderName: "Casino Royale",
|
||||
subject: "Hotel Las Vegas",
|
||||
},
|
||||
blockedDomainSet: emptyDomainSet,
|
||||
});
|
||||
expect(result.action).toBe("passed");
|
||||
expect(result.score).toBe(0);
|
||||
expect(result.features.keywordHitsName).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Fix 1: Folder-Filter (System-Folder-Ausschluss) ──────────────────────────
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user