Task B — linguistische FP-Fix: - mail-classifier.ts: Subject-Keyword-Loop überspringt Keyword-Score wenn Subject das Keyword als Sucht-Compound enthält (z.B. "glücksspiel" in "Glücksspielsucht" → kein +50 Score). Globale linguistische Invariante Deutsch — Gambling-Marketer schreiben nie "Glücksspielsucht-Bonus". - gambling-keywords.mjs: GAMBLING_WHITELIST erweitert um Stamm-Varianten (wettsucht, spielsucht, suchtberatung, suchthilfe) als Fallback für Compounds wo keyword ≠ exakter Stamm. - 4 neue Tests: Forum Glücksspielsucht → PASS, Hilfe bei Spielsucht → PASS, Wettsucht-Selbsthilfe → PASS, Glücksspiel-Bonus 100€ → BLOCK. Task C — Phase-1-Data-Foundation: - mail-training-utils.ts: sanitizeSubjectForTraining() (PII-Stripping via Regex: EMAIL/URL/NUM/Greeting/ALL-CAPS) + detectSubjectLanguage() via franc (iso639-3). 26 Unit-Tests. - franc@6.2.0 installiert (~50KB ESM). - mail.ts insertMailClassificationSample(): ruft sanitizeSubjectForTraining() auf, schreibt detectedLang + subjectSanitized in features-JSON (Interim bis Schema-Migration). - mail-retention-cron.ts: Subject-Nullification nach 30 Tagen (täglich) + Sample-Purge nach 12 Monaten (monatlich). DSGVO Art. 5 Abs. 1e. 105 Tests grün (58 classifier + 26 training-utils + 11 display-name + 10 gmail). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
571 lines
20 KiB
TypeScript
571 lines
20 KiB
TypeScript
/**
|
||
* Mail-Klassifikations-Pipeline (deterministisch, ohne LLM).
|
||
*
|
||
* Architektur:
|
||
* Layer 0 — Skip-Guard (bereits geblockt / kein Consent)
|
||
* Layer 1 — Whitelist (wetter, wettkampf …) → PASS
|
||
* Layer 2 — Domain-Hard-Block (Blocklist)
|
||
* Layer 2.5 — Brand+Random-Token-Detection (Hard-Block, fängt Apple Hide-My-Email)
|
||
* Layer 3 — Score 0–100 (deterministisch); ≥50 → BLOCK, sonst PASS
|
||
* Layer 5 — MailClassificationSample-Insert (immer, außer Layer 0)
|
||
*
|
||
* Alle Layer-Logiken sind pure Funktionen → vollständig unit-testbar ohne DB-Mocks.
|
||
*
|
||
* DSGVO-Hinweise:
|
||
* - Mail-Inhalte (Body) werden nie persistiert (Art. 9).
|
||
* - Keine Daten verlassen mehr den Server (kein LLM-Drittland-Transfer).
|
||
* - userId in Logs nur wenn absolut nötig (Datenminimierung Art. 5).
|
||
* - MailClassificationSample: Cascade-Delete via userId-Relation (Art. 17).
|
||
*/
|
||
|
||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||
// @ts-ignore — .mjs ohne types, Exports sind string[]
|
||
import { GAMBLING_KEYWORDS, GAMBLING_WHITELIST } from "./gambling-keywords.mjs";
|
||
|
||
// ─── Typen ─────────────────────────────────────────────────────────────────────
|
||
|
||
export type ClassificationAction = "blocked" | "passed";
|
||
|
||
export type TriggerSource =
|
||
| "domain"
|
||
| "relay-decoded"
|
||
| "brand+random"
|
||
| "custom-display-name"
|
||
| `score:${number}`
|
||
| "whitelist"
|
||
| "no-signal";
|
||
|
||
export interface MailInput {
|
||
/** Sender-E-Mail-Adresse (lowercase, wie von IMAP geliefert) */
|
||
senderEmail: string;
|
||
/** Display-Name des Absenders (kann leer sein) */
|
||
senderName: string | null;
|
||
/** Betreff-Zeile */
|
||
subject: string;
|
||
}
|
||
|
||
export interface ClassificationResult {
|
||
action: ClassificationAction;
|
||
triggerSource: TriggerSource;
|
||
score: number;
|
||
/** Aus Relay-Adressen extrahierte echte Domain (z.B. gamblezen.com) */
|
||
relayDecodedDomain: string | null;
|
||
/** Score-Komponenten für MailClassificationSample.features */
|
||
features: ClassificationFeatures;
|
||
}
|
||
|
||
export interface ClassificationFeatures {
|
||
score: number;
|
||
domainBlocked: boolean;
|
||
relayDecoded: boolean;
|
||
brandMatch: boolean;
|
||
randomTokens: boolean;
|
||
keywordHitsSubject: string[];
|
||
keywordHitsDomain: string[];
|
||
keywordHitsName: string[];
|
||
styleFlags: string[];
|
||
whitelistHit: boolean;
|
||
}
|
||
|
||
// ─── Score-Weights (TS-Constants, kein Config-File-Overhead) ──────────────────
|
||
|
||
export const SCORE_WEIGHTS = {
|
||
// Domain-Indikatoren
|
||
DOMAIN_GAMBLING_KEYWORD: 40, // Domain enthält Gambling-Begriff (bet, casino, slots …)
|
||
DOMAIN_SHORT_RANDOM: 15, // Domain-Root < 6 Zeichen und zufällig wirkend (betx, 1win)
|
||
|
||
// Subject-Indikatoren
|
||
SUBJECT_GAMBLING_KEYWORD: 50, // Keyword im Betreff (casino, jackpot, freispiel …)
|
||
SUBJECT_MONEY_PATTERN: 20, // €/$ + Zahl (z.B. "100€ Bonus")
|
||
SUBJECT_URGENCY: 15, // "Nur heute", "Letzte Chance", "Ablaufdatum"
|
||
SUBJECT_ALL_CAPS_WORD: 5, // EINZELNES ALL-CAPS-WORT im Betreff
|
||
|
||
// Display-Name-Indikatoren: entfernt in v1.0 (zu False-Positive-anfällig).
|
||
// v1.1: SENDER_NAME_GAMBLING_KEYWORD, SENDER_NAME_BRAND_MATCH reaktivieren
|
||
// wenn Display-Name-Blocking UX + Testing vollständig sind.
|
||
|
||
// Layer 2.5 Score-Ergänzungen (wenn kein Hard-Block ausgelöst)
|
||
BRAND_MATCH_NO_RANDOM: 35, // Brand-Match ohne Random-Tokens (kein Hard-Block)
|
||
RANDOM_TOKENS_NO_BRAND: 10, // Random-Tokens ohne Brand-Match
|
||
} as const;
|
||
|
||
// Hard-Block-Threshold: Score >= 80 → BLOCK
|
||
const SCORE_HARD_BLOCK_THRESHOLD = 80;
|
||
// Pass-Below: Score < 25 → PASS (no-signal)
|
||
const SCORE_PASS_BELOW = 25;
|
||
// Mid-range Block-Threshold: Score in [25, 80) → BLOCK ab 50, sonst PASS
|
||
const SCORE_BLOCK_MIDRANGE = 50;
|
||
|
||
// ─── Bekannte Gambling-Brands (für Brand-Match-Normalisierung) ─────────────────
|
||
// Abgeleitet aus GAMBLING_KEYWORDS + typischen Blocklist-Domains.
|
||
// Normalisierungsregel: lowercase, alle Sonder- und Leerzeichen entfernt.
|
||
const GAMBLING_BRANDS: string[] = [
|
||
"casino", "bet365", "bwin", "tipico", "unibet", "betway", "888casino",
|
||
"pokerstars", "interwetten", "netbet", "leovegas", "mrgreen",
|
||
"betsson", "neobet", "mybet", "lottoland", "betano", "williamhill",
|
||
"paddypower", "betfair", "stake", "rolletto", "vbet", "1xbet", "melbet",
|
||
"mostbet", "luckyvibe", "spinz", "casinoly", "rabona",
|
||
"justcasino", "getslots", "rocketplay", "freshcasino",
|
||
"nomnomcasino", "gamblezen", "betandplay",
|
||
];
|
||
|
||
// ─── Relay-Decoder ─────────────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Extrahiert die echte Ziel-Domain aus einer E-Mail-Relay-Adresse.
|
||
*
|
||
* Muster die wir kennen:
|
||
* bounces+user=example.com@sendgrid.net → example.com
|
||
* track.user=gamblezen.com@mailchimp.com → gamblezen.com
|
||
* a1b2c3_user_at_betandplay.com@em.em.xyz → betandplay.com
|
||
* user=betandplay.com@bounce.em.example → betandplay.com
|
||
*
|
||
* Pattern: Sucht nach `=domain.tld` oder `_at_domain.tld` im local-part.
|
||
*/
|
||
export function extractRelayedDomain(senderEmail: string): string | null {
|
||
if (!senderEmail.includes("@")) return null;
|
||
const [localPart] = senderEmail.split("@");
|
||
|
||
// Pattern 1: user=domain.tld (SendGrid, Mailchimp, SES-Bounces)
|
||
const eqMatch = localPart.match(/=([a-z0-9][\w-]*\.[a-z]{2,}(?:\.[a-z]{2,})?)(?:[+_&]|$)/i);
|
||
if (eqMatch) return eqMatch[1].toLowerCase();
|
||
|
||
// Pattern 2: _at_domain.tld (weniger häufig, einige Custom-Relay-Setups)
|
||
const atMatch = localPart.match(/_at_([a-z0-9][\w-]*\.[a-z]{2,}(?:\.[a-z]{2,})?)(?:[_+]|$)/i);
|
||
if (atMatch) return atMatch[1].toLowerCase();
|
||
|
||
return null;
|
||
}
|
||
|
||
// ─── Brand-Normalisierung ──────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Normalisiert einen String für Brand-Vergleiche.
|
||
* "BetandPlay" → "betandplay", "bet-and-play.com" → "betandplay" (nach Strip)
|
||
*/
|
||
export function normalizeBrand(s: string): string {
|
||
return s.toLowerCase().replace(/[\s\-._]/g, "");
|
||
}
|
||
|
||
/**
|
||
* Prüft ob ein normalisierter String mit einem bekannten Gambling-Brand übereinstimmt.
|
||
* Mindestlänge 4 Zeichen um False-Positives zu vermeiden ("bet" alleine → zu kurz).
|
||
*/
|
||
export function matchesGamblingBrand(normalized: string): boolean {
|
||
if (normalized.length < 4) return false;
|
||
return GAMBLING_BRANDS.some((brand) => normalized === brand || normalized.includes(brand));
|
||
}
|
||
|
||
/**
|
||
* Extrahiert Brand-Kandidaten aus einer Domain für den Match-Check.
|
||
* "betand-play.com" → ["betandplay", "betand"] (root + normalisiert)
|
||
*/
|
||
function domainToBrandCandidates(domain: string): string[] {
|
||
const root = domain.split(".")[0] ?? "";
|
||
return [normalizeBrand(root), normalizeBrand(domain)];
|
||
}
|
||
|
||
// ─── Random-Token-Detection ───────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Erkennt zufällig wirkende Tokens im Local-Part einer E-Mail-Adresse.
|
||
*
|
||
* Definition "random token": >= 6 Zeichen, Mix aus Buchstaben + Ziffern,
|
||
* kein bekanntes Funktions-Wort (info, admin, noreply, support …).
|
||
*
|
||
* Ein Local-Part mit >= 2 solchen Tokens gilt als "random-looking" —
|
||
* typisch für Massen-Mailer mit trackierbaren User-IDs.
|
||
*/
|
||
export function hasRandomTokens(localPart: string): boolean {
|
||
const FUNCTION_WORDS = new Set([
|
||
"info", "admin", "noreply", "no-reply", "support", "hello",
|
||
"news", "marketing", "sales", "contact", "newsletter", "service",
|
||
"offers", "promotions", "promo", "team", "mail", "email",
|
||
"reply", "bounce", "return", "postmaster", "mailer",
|
||
]);
|
||
|
||
const tokens = localPart.split(/[_\-.+]+/);
|
||
const randomLooking = tokens.filter((t) => {
|
||
if (t.length < 6) return false;
|
||
if (!/[a-z]/i.test(t) || !/[0-9]/.test(t)) return false; // muss Letters+Digits haben
|
||
const lower = t.toLowerCase();
|
||
if (FUNCTION_WORDS.has(lower)) return false;
|
||
return true;
|
||
});
|
||
|
||
return randomLooking.length >= 2;
|
||
}
|
||
|
||
// ─── Score-Berechnung (Layer 3) ───────────────────────────────────────────────
|
||
|
||
interface ScoreResult {
|
||
score: number;
|
||
keywordHitsSubject: string[];
|
||
keywordHitsDomain: string[];
|
||
keywordHitsName: string[];
|
||
styleFlags: string[];
|
||
whitelistHit: boolean;
|
||
}
|
||
|
||
export function computeScore(
|
||
senderEmail: string,
|
||
senderName: string | null,
|
||
subject: string,
|
||
brandMatchFound: boolean,
|
||
randomTokensFound: boolean,
|
||
): ScoreResult {
|
||
let score = 0;
|
||
const keywordHitsSubject: string[] = [];
|
||
const keywordHitsDomain: string[] = [];
|
||
const keywordHitsName: string[] = [];
|
||
const styleFlags: string[] = [];
|
||
|
||
const subjectLower = subject.toLowerCase();
|
||
const senderEmailLower = senderEmail.toLowerCase();
|
||
const senderNameLower = (senderName ?? "").toLowerCase();
|
||
const domain = senderEmailLower.split("@")[1] ?? "";
|
||
const domainRoot = domain.split(".")[0] ?? "";
|
||
|
||
// ── Whitelist-Check (Layer 1) ──
|
||
for (const w of GAMBLING_WHITELIST as string[]) {
|
||
if (subjectLower.includes(w) || senderEmailLower.includes(w) || senderNameLower.includes(w)) {
|
||
return {
|
||
score: 0,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: true,
|
||
};
|
||
}
|
||
}
|
||
|
||
// ── Domain-Keywords ──
|
||
for (const kw of GAMBLING_KEYWORDS as string[]) {
|
||
if (domain.includes(kw) || domainRoot.includes(kw)) {
|
||
keywordHitsDomain.push(kw);
|
||
score += SCORE_WEIGHTS.DOMAIN_GAMBLING_KEYWORD;
|
||
break; // einmal reicht
|
||
}
|
||
}
|
||
|
||
// ── Subject-Keywords ──
|
||
for (const kw of GAMBLING_KEYWORDS as string[]) {
|
||
if (subjectLower.includes(kw)) {
|
||
// Linguistische Invariante (Deutsch): Compound-Nomen mit "-sucht"-Suffix
|
||
// (Glücksspielsucht, Spielsucht, Wettsucht) signalisieren IMMER Recovery-/
|
||
// Anti-Gambling-Kontext. Gambling-Marketer schreiben nie "Glücksspielsucht-Bonus"
|
||
// — regulatorisch tabu + würde User-Vertrauen zerstören.
|
||
// Implementierung: keyword "glücksspiel" matcht in "Glücksspielsucht" →
|
||
// subject enthält "${kw}sucht" → kein Score-Beitrag.
|
||
if (subjectLower.includes(`${kw}sucht`)) {
|
||
continue; // Recovery-Kontext — kein Gambling-Signal
|
||
}
|
||
keywordHitsSubject.push(kw);
|
||
score += SCORE_WEIGHTS.SUBJECT_GAMBLING_KEYWORD;
|
||
break;
|
||
}
|
||
}
|
||
|
||
// ── Sender-Name-Keywords: entfernt in v1.0 (Score-Beitrag via Display-Name
|
||
// ist zu False-Positive-anfällig, Display-Name-Blocking nicht supported).
|
||
// keywordHitsName bleibt im ScoreResult für v1.1-Reaktivierung (immer leer).
|
||
|
||
// ── Geld-Pattern im Betreff (€/$ + Zahl) ──
|
||
if (/[€$£]\s*\d|\d\s*[€$£]/.test(subject)) {
|
||
styleFlags.push("money-pattern");
|
||
score += SCORE_WEIGHTS.SUBJECT_MONEY_PATTERN;
|
||
}
|
||
|
||
// ── Urgency-Wörter im Betreff ──
|
||
const URGENCY_PATTERNS = [
|
||
"nur heute", "letzte chance", "läuft ab", "ablaufdatum",
|
||
"expires", "last chance", "limited time", "jetzt einlösen",
|
||
"sofort", "nur noch", "endet heute",
|
||
];
|
||
if (URGENCY_PATTERNS.some((p) => subjectLower.includes(p))) {
|
||
styleFlags.push("urgency");
|
||
score += SCORE_WEIGHTS.SUBJECT_URGENCY;
|
||
}
|
||
|
||
// ── ALL-CAPS-Wort im Betreff ──
|
||
if (/\b[A-Z]{4,}\b/.test(subject)) {
|
||
styleFlags.push("all-caps");
|
||
score += SCORE_WEIGHTS.SUBJECT_ALL_CAPS_WORD;
|
||
}
|
||
|
||
// ── Short-Random-Domain ──
|
||
if (domainRoot.length > 0 && domainRoot.length <= 5 && /[a-z]/.test(domainRoot) && /[0-9]/.test(domainRoot)) {
|
||
styleFlags.push("short-random-domain");
|
||
score += SCORE_WEIGHTS.DOMAIN_SHORT_RANDOM;
|
||
}
|
||
|
||
// ── Layer 2.5 Score-Ergänzungen ──
|
||
if (brandMatchFound && !randomTokensFound) {
|
||
score += SCORE_WEIGHTS.BRAND_MATCH_NO_RANDOM;
|
||
}
|
||
if (!brandMatchFound && randomTokensFound) {
|
||
score += SCORE_WEIGHTS.RANDOM_TOKENS_NO_BRAND;
|
||
}
|
||
|
||
return {
|
||
score: Math.min(score, 100),
|
||
keywordHitsSubject,
|
||
keywordHitsDomain,
|
||
keywordHitsName,
|
||
styleFlags,
|
||
whitelistHit: false,
|
||
};
|
||
}
|
||
|
||
// ─── Haupt-Pipeline ───────────────────────────────────────────────────────────
|
||
|
||
export interface ClassifyMailParams {
|
||
mail: MailInput;
|
||
/** Menge der geblockten Domains (aus getBlocklistedDomainsSet) */
|
||
blockedDomainSet: Set<string>;
|
||
/**
|
||
* Display-Name-Patterns (global-curated + optional user-scope) aus getMailDisplayNamePatterns().
|
||
* Layer 2.6: case-insensitive Substring-Match gegen senderName.
|
||
* Leer-Array solange keine Patterns geladen wurden.
|
||
*
|
||
* DSGVO: keine PII — reine Heuristik-Muster (z.B. ["Tipico", "Bet365"]).
|
||
*/
|
||
customDisplayNames?: string[];
|
||
}
|
||
|
||
/**
|
||
* Klassifiziert eine einzelne Mail durch alle Layer.
|
||
* Komplett deterministisch — keine externen Calls, keine PII verlässt den Server.
|
||
* DB-Writes (MailBlocked, MailClassificationSample) liegen beim Aufrufer.
|
||
*/
|
||
export async function classifyMail(params: ClassifyMailParams): Promise<ClassificationResult> {
|
||
const { mail, blockedDomainSet, customDisplayNames } = params;
|
||
const { senderEmail, senderName, subject } = mail;
|
||
|
||
const senderEmailLower = senderEmail.toLowerCase();
|
||
const domain = senderEmailLower.split("@")[1] ?? "";
|
||
const localPart = senderEmailLower.split("@")[0] ?? "";
|
||
|
||
// ── Layer 1: Whitelist ──────────────────────────────────────────────────────
|
||
const haystack = `${senderEmailLower} ${subject} ${senderName ?? ""}`.toLowerCase();
|
||
for (const w of GAMBLING_WHITELIST as string[]) {
|
||
if (haystack.includes(w)) {
|
||
return {
|
||
action: "passed",
|
||
triggerSource: "whitelist",
|
||
score: 0,
|
||
relayDecodedDomain: null,
|
||
features: {
|
||
score: 0,
|
||
domainBlocked: false,
|
||
relayDecoded: false,
|
||
brandMatch: false,
|
||
randomTokens: false,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: true,
|
||
},
|
||
};
|
||
}
|
||
}
|
||
|
||
// ── Layer 2: Domain-Hard-Block ──────────────────────────────────────────────
|
||
if (domain && blockedDomainSet.has(domain)) {
|
||
return {
|
||
action: "blocked",
|
||
triggerSource: "domain",
|
||
score: 100,
|
||
relayDecodedDomain: null,
|
||
features: {
|
||
score: 100,
|
||
domainBlocked: true,
|
||
relayDecoded: false,
|
||
brandMatch: false,
|
||
randomTokens: false,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: false,
|
||
},
|
||
};
|
||
}
|
||
|
||
// ── Layer 2: Relay-Decoded Domain-Block ─────────────────────────────────────
|
||
const relayDecodedDomain = extractRelayedDomain(senderEmailLower);
|
||
if (relayDecodedDomain && blockedDomainSet.has(relayDecodedDomain)) {
|
||
return {
|
||
action: "blocked",
|
||
triggerSource: "relay-decoded",
|
||
score: 100,
|
||
relayDecodedDomain,
|
||
features: {
|
||
score: 100,
|
||
domainBlocked: false,
|
||
relayDecoded: true,
|
||
brandMatch: false,
|
||
randomTokens: false,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: false,
|
||
},
|
||
};
|
||
}
|
||
|
||
// ── Layer 2.5: Brand+Random-Token-Hard-Block ────────────────────────────────
|
||
// Brand-Match prüft nur Domain-Root und Relay-Domain — kein Display-Name.
|
||
// Display-Name-basiertes Brand-Matching ist in v1.0 entfernt (zu False-Positive-anfällig).
|
||
// v1.1: displayNameNorm wieder in allBrandCandidates aufnehmen wenn UX + Testing fertig.
|
||
const domainCandidates = domainToBrandCandidates(domain);
|
||
const relayDomainCandidates = relayDecodedDomain ? domainToBrandCandidates(relayDecodedDomain) : [];
|
||
const allBrandCandidates = [...domainCandidates, ...relayDomainCandidates];
|
||
|
||
const brandMatch = allBrandCandidates.some((c) => c.length >= 4 && matchesGamblingBrand(c));
|
||
const randomTokens = hasRandomTokens(localPart);
|
||
|
||
if (brandMatch && randomTokens) {
|
||
return {
|
||
action: "blocked",
|
||
triggerSource: "brand+random",
|
||
score: 100,
|
||
relayDecodedDomain,
|
||
features: {
|
||
score: 100,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch: true,
|
||
randomTokens: true,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: false,
|
||
},
|
||
};
|
||
}
|
||
|
||
// ── Layer 2.6: Display-Name-Hard-Block (global-curated + user-scope) ────────
|
||
// Patterns kommen aus getMailDisplayNamePatterns() — admin-curated globale
|
||
// Gambling-Brand-Liste (z.B. "Tipico", "Bet365") plus optionale user-scope Patterns.
|
||
//
|
||
// v1.1 (2026-05-28): von dead-code zu live — global_mail_display_names-Tabelle
|
||
// als Datenquelle. Keine User-UI nötig; Admin pflegt die Liste manuell.
|
||
//
|
||
// Substring-Match (nicht exact) damit "Tipico Casino" und "TIPICO Bonus"
|
||
// beide von Pattern "Tipico" erfasst werden.
|
||
//
|
||
// Gambling-Brands rotieren aktiv Capitalization → case-insensitive ist Pflicht.
|
||
if (customDisplayNames && customDisplayNames.length > 0 && senderName) {
|
||
const senderNameLower = senderName.toLowerCase();
|
||
const matchedPattern = customDisplayNames.find(
|
||
(pattern) => pattern.length > 0 && senderNameLower.includes(pattern.toLowerCase()),
|
||
);
|
||
if (matchedPattern) {
|
||
return {
|
||
action: "blocked",
|
||
triggerSource: "custom-display-name",
|
||
score: 100,
|
||
relayDecodedDomain,
|
||
features: {
|
||
score: 100,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch,
|
||
randomTokens,
|
||
keywordHitsSubject: [],
|
||
keywordHitsDomain: [],
|
||
keywordHitsName: [],
|
||
styleFlags: [],
|
||
whitelistHit: false,
|
||
},
|
||
};
|
||
}
|
||
}
|
||
|
||
// ── Layer 3: Score ──────────────────────────────────────────────────────────
|
||
const scoreResult = computeScore(
|
||
senderEmailLower,
|
||
senderName,
|
||
subject,
|
||
brandMatch,
|
||
randomTokens,
|
||
);
|
||
|
||
if (scoreResult.whitelistHit) {
|
||
return {
|
||
action: "passed",
|
||
triggerSource: "whitelist",
|
||
score: 0,
|
||
relayDecodedDomain,
|
||
features: {
|
||
...scoreResult,
|
||
score: 0,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch,
|
||
randomTokens,
|
||
},
|
||
};
|
||
}
|
||
|
||
const score = scoreResult.score;
|
||
|
||
// Score >= 80 → Hard-Block, kein LLM
|
||
if (score >= SCORE_HARD_BLOCK_THRESHOLD) {
|
||
const triggerSource: TriggerSource = `score:${score}`;
|
||
return {
|
||
action: "blocked",
|
||
triggerSource,
|
||
score,
|
||
relayDecodedDomain,
|
||
features: {
|
||
...scoreResult,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch,
|
||
randomTokens,
|
||
},
|
||
};
|
||
}
|
||
|
||
// Score < 25 → PASS
|
||
if (score < SCORE_PASS_BELOW) {
|
||
return {
|
||
action: "passed",
|
||
triggerSource: "no-signal",
|
||
score,
|
||
relayDecodedDomain,
|
||
features: {
|
||
...scoreResult,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch,
|
||
randomTokens,
|
||
},
|
||
};
|
||
}
|
||
|
||
// Score 25-79 → PASS bei < 50, BLOCK bei >= 50 (deterministisch, kein LLM)
|
||
const midAction: ClassificationAction =
|
||
score >= SCORE_BLOCK_MIDRANGE ? "blocked" : "passed";
|
||
const midTrigger: TriggerSource = `score:${score}`;
|
||
return {
|
||
action: midAction,
|
||
triggerSource: midTrigger,
|
||
score,
|
||
relayDecodedDomain,
|
||
features: {
|
||
...scoreResult,
|
||
domainBlocked: false,
|
||
relayDecoded: !!relayDecodedDomain,
|
||
brandMatch,
|
||
randomTokens,
|
||
},
|
||
};
|
||
}
|