/** * Mail-Klassifikations-Pipeline (deterministisch, ohne LLM). * * Architektur: * Layer 0 — Skip-Guard (bereits geblockt / kein Consent) * Layer 1 — Whitelist (wetter, wettkampf …) → PASS * Layer 2 — Domain-Hard-Block (Blocklist) * Layer 2.5 — Brand+Random-Token-Detection (Hard-Block, fängt Apple Hide-My-Email) * Layer 3 — Score 0–100 (deterministisch); ≥50 → BLOCK, sonst PASS * Layer 5 — MailClassificationSample-Insert (immer, außer Layer 0) * * Alle Layer-Logiken sind pure Funktionen → vollständig unit-testbar ohne DB-Mocks. * * DSGVO-Hinweise: * - Mail-Inhalte (Body) werden nie persistiert (Art. 9). * - Keine Daten verlassen mehr den Server (kein LLM-Drittland-Transfer). * - userId in Logs nur wenn absolut nötig (Datenminimierung Art. 5). * - MailClassificationSample: Cascade-Delete via userId-Relation (Art. 17). */ // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore — .mjs ohne types, Exports sind string[] import { GAMBLING_KEYWORDS, GAMBLING_WHITELIST } from "./gambling-keywords.mjs"; // ─── Typen ───────────────────────────────────────────────────────────────────── export type ClassificationAction = "blocked" | "passed"; export type TriggerSource = | "domain" | "relay-decoded" | "brand+random" | "custom-display-name" | `score:${number}` | "whitelist" | "no-signal"; export interface MailInput { /** Sender-E-Mail-Adresse (lowercase, wie von IMAP geliefert) */ senderEmail: string; /** Display-Name des Absenders (kann leer sein) */ senderName: string | null; /** Betreff-Zeile */ subject: string; } export interface ClassificationResult { action: ClassificationAction; triggerSource: TriggerSource; score: number; /** Aus Relay-Adressen extrahierte echte Domain (z.B. gamblezen.com) */ relayDecodedDomain: string | null; /** Score-Komponenten für MailClassificationSample.features */ features: ClassificationFeatures; } export interface ClassificationFeatures { score: number; domainBlocked: boolean; relayDecoded: boolean; brandMatch: boolean; randomTokens: boolean; keywordHitsSubject: string[]; keywordHitsDomain: string[]; keywordHitsName: string[]; styleFlags: string[]; whitelistHit: boolean; } // ─── Score-Weights (TS-Constants, kein Config-File-Overhead) ────────────────── export const SCORE_WEIGHTS = { // Domain-Indikatoren DOMAIN_GAMBLING_KEYWORD: 40, // Domain enthält Gambling-Begriff (bet, casino, slots …) DOMAIN_SHORT_RANDOM: 15, // Domain-Root < 6 Zeichen und zufällig wirkend (betx, 1win) // Subject-Indikatoren SUBJECT_GAMBLING_KEYWORD: 35, // Keyword im Betreff (casino, jackpot, freispiel …) SUBJECT_MONEY_PATTERN: 20, // €/$ + Zahl (z.B. "100€ Bonus") SUBJECT_URGENCY: 15, // "Nur heute", "Letzte Chance", "Ablaufdatum" SUBJECT_ALL_CAPS_WORD: 5, // EINZELNES ALL-CAPS-WORT im Betreff // Display-Name-Indikatoren SENDER_NAME_GAMBLING_KEYWORD: 30, // Gambling-Begriff im Absender-Namen SENDER_NAME_BRAND_MATCH: 20, // Name matcht bekannten Gambling-Brand (normalisiert) // Layer 2.5 Score-Ergänzungen (wenn kein Hard-Block ausgelöst) BRAND_MATCH_NO_RANDOM: 35, // Brand-Match ohne Random-Tokens (kein Hard-Block) RANDOM_TOKENS_NO_BRAND: 10, // Random-Tokens ohne Brand-Match } as const; // Hard-Block-Threshold: Score >= 80 → BLOCK const SCORE_HARD_BLOCK_THRESHOLD = 80; // Pass-Below: Score < 25 → PASS (no-signal) const SCORE_PASS_BELOW = 25; // Mid-range Block-Threshold: Score in [25, 80) → BLOCK ab 50, sonst PASS const SCORE_BLOCK_MIDRANGE = 50; // ─── Bekannte Gambling-Brands (für Brand-Match-Normalisierung) ───────────────── // Abgeleitet aus GAMBLING_KEYWORDS + typischen Blocklist-Domains. // Normalisierungsregel: lowercase, alle Sonder- und Leerzeichen entfernt. const GAMBLING_BRANDS: string[] = [ "casino", "bet365", "bwin", "tipico", "unibet", "betway", "888casino", "pokerstars", "interwetten", "netbet", "leovegas", "mrgreen", "betsson", "neobet", "mybet", "lottoland", "betano", "williamhill", "paddypower", "betfair", "stake", "rolletto", "vbet", "1xbet", "melbet", "mostbet", "luckyvibe", "spinz", "casinoly", "rabona", "justcasino", "getslots", "rocketplay", "freshcasino", "nomnomcasino", "gamblezen", "betandplay", ]; // ─── Relay-Decoder ───────────────────────────────────────────────────────────── /** * Extrahiert die echte Ziel-Domain aus einer E-Mail-Relay-Adresse. * * Muster die wir kennen: * bounces+user=example.com@sendgrid.net → example.com * track.user=gamblezen.com@mailchimp.com → gamblezen.com * a1b2c3_user_at_betandplay.com@em.em.xyz → betandplay.com * user=betandplay.com@bounce.em.example → betandplay.com * * Pattern: Sucht nach `=domain.tld` oder `_at_domain.tld` im local-part. */ export function extractRelayedDomain(senderEmail: string): string | null { if (!senderEmail.includes("@")) return null; const [localPart] = senderEmail.split("@"); // Pattern 1: user=domain.tld (SendGrid, Mailchimp, SES-Bounces) const eqMatch = localPart.match(/=([a-z0-9][\w-]*\.[a-z]{2,}(?:\.[a-z]{2,})?)(?:[+_&]|$)/i); if (eqMatch) return eqMatch[1].toLowerCase(); // Pattern 2: _at_domain.tld (weniger häufig, einige Custom-Relay-Setups) const atMatch = localPart.match(/_at_([a-z0-9][\w-]*\.[a-z]{2,}(?:\.[a-z]{2,})?)(?:[_+]|$)/i); if (atMatch) return atMatch[1].toLowerCase(); return null; } // ─── Brand-Normalisierung ────────────────────────────────────────────────────── /** * Normalisiert einen String für Brand-Vergleiche. * "BetandPlay" → "betandplay", "bet-and-play.com" → "betandplay" (nach Strip) */ export function normalizeBrand(s: string): string { return s.toLowerCase().replace(/[\s\-._]/g, ""); } /** * Prüft ob ein normalisierter String mit einem bekannten Gambling-Brand übereinstimmt. * Mindestlänge 4 Zeichen um False-Positives zu vermeiden ("bet" alleine → zu kurz). */ export function matchesGamblingBrand(normalized: string): boolean { if (normalized.length < 4) return false; return GAMBLING_BRANDS.some((brand) => normalized === brand || normalized.includes(brand)); } /** * Extrahiert Brand-Kandidaten aus einer Domain für den Match-Check. * "betand-play.com" → ["betandplay", "betand"] (root + normalisiert) */ function domainToBrandCandidates(domain: string): string[] { const root = domain.split(".")[0] ?? ""; return [normalizeBrand(root), normalizeBrand(domain)]; } // ─── Random-Token-Detection ─────────────────────────────────────────────────── /** * Erkennt zufällig wirkende Tokens im Local-Part einer E-Mail-Adresse. * * Definition "random token": >= 6 Zeichen, Mix aus Buchstaben + Ziffern, * kein bekanntes Funktions-Wort (info, admin, noreply, support …). * * Ein Local-Part mit >= 2 solchen Tokens gilt als "random-looking" — * typisch für Massen-Mailer mit trackierbaren User-IDs. */ export function hasRandomTokens(localPart: string): boolean { const FUNCTION_WORDS = new Set([ "info", "admin", "noreply", "no-reply", "support", "hello", "news", "marketing", "sales", "contact", "newsletter", "service", "offers", "promotions", "promo", "team", "mail", "email", "reply", "bounce", "return", "postmaster", "mailer", ]); const tokens = localPart.split(/[_\-.+]+/); const randomLooking = tokens.filter((t) => { if (t.length < 6) return false; if (!/[a-z]/i.test(t) || !/[0-9]/.test(t)) return false; // muss Letters+Digits haben const lower = t.toLowerCase(); if (FUNCTION_WORDS.has(lower)) return false; return true; }); return randomLooking.length >= 2; } // ─── Score-Berechnung (Layer 3) ─────────────────────────────────────────────── interface ScoreResult { score: number; keywordHitsSubject: string[]; keywordHitsDomain: string[]; keywordHitsName: string[]; styleFlags: string[]; whitelistHit: boolean; } export function computeScore( senderEmail: string, senderName: string | null, subject: string, brandMatchFound: boolean, randomTokensFound: boolean, ): ScoreResult { let score = 0; const keywordHitsSubject: string[] = []; const keywordHitsDomain: string[] = []; const keywordHitsName: string[] = []; const styleFlags: string[] = []; const subjectLower = subject.toLowerCase(); const senderEmailLower = senderEmail.toLowerCase(); const senderNameLower = (senderName ?? "").toLowerCase(); const domain = senderEmailLower.split("@")[1] ?? ""; const domainRoot = domain.split(".")[0] ?? ""; // ── Whitelist-Check (Layer 1) ── for (const w of GAMBLING_WHITELIST as string[]) { if (subjectLower.includes(w) || senderEmailLower.includes(w) || senderNameLower.includes(w)) { return { score: 0, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: true, }; } } // ── Domain-Keywords ── for (const kw of GAMBLING_KEYWORDS as string[]) { if (domain.includes(kw) || domainRoot.includes(kw)) { keywordHitsDomain.push(kw); score += SCORE_WEIGHTS.DOMAIN_GAMBLING_KEYWORD; break; // einmal reicht } } // ── Subject-Keywords ── for (const kw of GAMBLING_KEYWORDS as string[]) { if (subjectLower.includes(kw)) { keywordHitsSubject.push(kw); score += SCORE_WEIGHTS.SUBJECT_GAMBLING_KEYWORD; break; } } // ── Sender-Name-Keywords ── for (const kw of GAMBLING_KEYWORDS as string[]) { if (senderNameLower.includes(kw)) { keywordHitsName.push(kw); score += SCORE_WEIGHTS.SENDER_NAME_GAMBLING_KEYWORD; break; } } // ── Geld-Pattern im Betreff (€/$ + Zahl) ── if (/[€$£]\s*\d|\d\s*[€$£]/.test(subject)) { styleFlags.push("money-pattern"); score += SCORE_WEIGHTS.SUBJECT_MONEY_PATTERN; } // ── Urgency-Wörter im Betreff ── const URGENCY_PATTERNS = [ "nur heute", "letzte chance", "läuft ab", "ablaufdatum", "expires", "last chance", "limited time", "jetzt einlösen", "sofort", "nur noch", "endet heute", ]; if (URGENCY_PATTERNS.some((p) => subjectLower.includes(p))) { styleFlags.push("urgency"); score += SCORE_WEIGHTS.SUBJECT_URGENCY; } // ── ALL-CAPS-Wort im Betreff ── if (/\b[A-Z]{4,}\b/.test(subject)) { styleFlags.push("all-caps"); score += SCORE_WEIGHTS.SUBJECT_ALL_CAPS_WORD; } // ── Short-Random-Domain ── if (domainRoot.length > 0 && domainRoot.length <= 5 && /[a-z]/.test(domainRoot) && /[0-9]/.test(domainRoot)) { styleFlags.push("short-random-domain"); score += SCORE_WEIGHTS.DOMAIN_SHORT_RANDOM; } // ── Layer 2.5 Score-Ergänzungen ── if (brandMatchFound && !randomTokensFound) { score += SCORE_WEIGHTS.BRAND_MATCH_NO_RANDOM; } if (!brandMatchFound && randomTokensFound) { score += SCORE_WEIGHTS.RANDOM_TOKENS_NO_BRAND; } return { score: Math.min(score, 100), keywordHitsSubject, keywordHitsDomain, keywordHitsName, styleFlags, whitelistHit: false, }; } // ─── Haupt-Pipeline ─────────────────────────────────────────────────────────── export interface ClassifyMailParams { mail: MailInput; /** Menge der geblockten Domains (aus getBlocklistedDomainsSet) */ blockedDomainSet: Set; /** * User-spezifische Display-Name-Patterns (aus getCustomMailDisplayNames). * Layer 2.6: case-insensitive Substring-Match gegen senderName. * Leer-Array wenn User keine Display-Name-Patterns gesetzt hat. * * DSGVO: keine PII — reine Heuristik-Muster (z.B. ["EXTRASPIN"]). */ customDisplayNames?: string[]; } /** * Klassifiziert eine einzelne Mail durch alle Layer. * Komplett deterministisch — keine externen Calls, keine PII verlässt den Server. * DB-Writes (MailBlocked, MailClassificationSample) liegen beim Aufrufer. */ export async function classifyMail(params: ClassifyMailParams): Promise { const { mail, blockedDomainSet, customDisplayNames } = params; const { senderEmail, senderName, subject } = mail; const senderEmailLower = senderEmail.toLowerCase(); const domain = senderEmailLower.split("@")[1] ?? ""; const localPart = senderEmailLower.split("@")[0] ?? ""; // ── Layer 1: Whitelist ────────────────────────────────────────────────────── const haystack = `${senderEmailLower} ${subject} ${senderName ?? ""}`.toLowerCase(); for (const w of GAMBLING_WHITELIST as string[]) { if (haystack.includes(w)) { return { action: "passed", triggerSource: "whitelist", score: 0, relayDecodedDomain: null, features: { score: 0, domainBlocked: false, relayDecoded: false, brandMatch: false, randomTokens: false, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: true, }, }; } } // ── Layer 2: Domain-Hard-Block ────────────────────────────────────────────── if (domain && blockedDomainSet.has(domain)) { return { action: "blocked", triggerSource: "domain", score: 100, relayDecodedDomain: null, features: { score: 100, domainBlocked: true, relayDecoded: false, brandMatch: false, randomTokens: false, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: false, }, }; } // ── Layer 2: Relay-Decoded Domain-Block ───────────────────────────────────── const relayDecodedDomain = extractRelayedDomain(senderEmailLower); if (relayDecodedDomain && blockedDomainSet.has(relayDecodedDomain)) { return { action: "blocked", triggerSource: "relay-decoded", score: 100, relayDecodedDomain, features: { score: 100, domainBlocked: false, relayDecoded: true, brandMatch: false, randomTokens: false, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: false, }, }; } // ── Layer 2.5: Brand+Random-Token-Hard-Block ──────────────────────────────── // Normalisiere Absender-Name und Domain-Root für Brand-Vergleich const displayNameNorm = normalizeBrand(senderName ?? ""); const domainCandidates = domainToBrandCandidates(domain); const relayDomainCandidates = relayDecodedDomain ? domainToBrandCandidates(relayDecodedDomain) : []; const allBrandCandidates = [displayNameNorm, ...domainCandidates, ...relayDomainCandidates]; const brandMatch = allBrandCandidates.some((c) => c.length >= 4 && matchesGamblingBrand(c)); const randomTokens = hasRandomTokens(localPart); if (brandMatch && randomTokens) { return { action: "blocked", triggerSource: "brand+random", score: 100, relayDecodedDomain, features: { score: 100, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch: true, randomTokens: true, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: false, }, }; } // ── Layer 2.6: User-Custom-Display-Name-Hard-Block ────────────────────────── // Display-name patterns disabled in v1.0 — re-enable when display-name input UX ships (v1.1). // getCustomMailDisplayNames() returns [] until mail_display_name rows exist, // so this block is dead code in practice. Keep logic intact for trivial re-activation. // // User-eigene Patterns (z.B. "EXTRASPIN") matchen case-insensitiv als Substring // gegen den Sender-Display-Name. Kein Score — direkter Hard-Block wenn Match. // // Substring-Match (nicht exact) damit "EXTRASPIN Casino" und "ExtraSpin Bonus" // beide von Pattern "EXTRASPIN" erfasst werden. // // Gambling-Brands rotieren aktiv Capitalization → case-insensitive ist Pflicht. if (customDisplayNames && customDisplayNames.length > 0 && senderName) { const senderNameLower = senderName.toLowerCase(); const matchedPattern = customDisplayNames.find( (pattern) => pattern.length > 0 && senderNameLower.includes(pattern.toLowerCase()), ); if (matchedPattern) { return { action: "blocked", triggerSource: "custom-display-name", score: 100, relayDecodedDomain, features: { score: 100, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch, randomTokens, keywordHitsSubject: [], keywordHitsDomain: [], keywordHitsName: [], styleFlags: [], whitelistHit: false, }, }; } } // ── Layer 3: Score ────────────────────────────────────────────────────────── const scoreResult = computeScore( senderEmailLower, senderName, subject, brandMatch, randomTokens, ); if (scoreResult.whitelistHit) { return { action: "passed", triggerSource: "whitelist", score: 0, relayDecodedDomain, features: { ...scoreResult, score: 0, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch, randomTokens, }, }; } const score = scoreResult.score; // Score >= 80 → Hard-Block, kein LLM if (score >= SCORE_HARD_BLOCK_THRESHOLD) { const triggerSource: TriggerSource = `score:${score}`; return { action: "blocked", triggerSource, score, relayDecodedDomain, features: { ...scoreResult, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch, randomTokens, }, }; } // Score < 25 → PASS if (score < SCORE_PASS_BELOW) { return { action: "passed", triggerSource: "no-signal", score, relayDecodedDomain, features: { ...scoreResult, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch, randomTokens, }, }; } // Score 25-79 → PASS bei < 50, BLOCK bei >= 50 (deterministisch, kein LLM) const midAction: ClassificationAction = score >= SCORE_BLOCK_MIDRANGE ? "blocked" : "passed"; const midTrigger: TriggerSource = `score:${score}`; return { action: midAction, triggerSource: midTrigger, score, relayDecodedDomain, features: { ...scoreResult, domainBlocked: false, relayDecoded: !!relayDecodedDomain, brandMatch, randomTokens, }, }; }