Layer 0–4 Klassifikations-Pipeline in mail-classifier.ts: - Layer 2: Domain-Hard-Block + Relay-Decoder (=domain.tld aus SendGrid/Mailchimp-Bounces) - Layer 2.5: Brand+Random-Token-Hard-Block (Gambling-Brand-Normalisierung + Random-Token-Detection) verhindert LLM-Call für bekannte Gambling-Relayer (Gamblezen, BetandPlay etc.) - Layer 3: Score 0–100 (TS-Gewichte: Domain-Keywords, Subject-Keywords, Name-Match, Geld-Pattern, Urgency, All-Caps, Short-Random-Domain, Brand/Random-Ergänzungen) - Layer 4: Groq Llama 3.3 70B Borderline-Klassifikation (Score 25–75) mit Local-Part-Redaction (DSGVO: nur behalten wenn local-part selbst Keyword enthält) - Layer 5: MailClassificationSample-Insert nach jeder Klassifikation (ML-Phase 3) Migrations: - 20260514_add_mail_blocked_trigger_source: ADD COLUMN trigger_source auf mail_blocked - 20260514_add_mail_classification_sample: CREATE TABLE mail_classification_samples 50 neue Tests (mail-classifier.test.ts): alle Layer, beide Screenshot-Beispiele (Gamblezen + BetandPlay) bestätigt als Layer-2.5-Hard-Block ohne LLM-Call, Whitelist, Score, Redaction. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
263 lines
9.3 KiB
TypeScript
263 lines
9.3 KiB
TypeScript
import { ImapFlow } from "imapflow";
|
||
import {
|
||
getMailConnections,
|
||
deleteOldMailBlocked,
|
||
getAlreadyBlockedUidSet,
|
||
insertMailBlocked,
|
||
upsertMailBlockedStat,
|
||
updateMailConnectionScanStats,
|
||
insertMailClassificationSample,
|
||
} from "../../db/mail";
|
||
import { getBlocklistedDomainsSet } from "../../db/domains";
|
||
import { getProfile } from "../../db/profile";
|
||
import { getPlanLimits } from "../../utils/plan-features";
|
||
import { resolveProviderMeta } from "../../utils/imap-providers";
|
||
import { resolveImapAuth } from "../../utils/mail-auth";
|
||
import { classifyMail } from "../../utils/mail-classifier";
|
||
|
||
/**
|
||
* POST /api/mail/scan-internal
|
||
* Called by cron or IMAP proxy. Scans ALL mailbox folders.
|
||
* Free: only custom domains + keywords. Pro/Legend: global blocklist + custom.
|
||
*
|
||
* Klassifikations-Pipeline: Layer 0–4 via mail-classifier.ts.
|
||
* Layer 5 (Sample-Capture): nach jeder Klassifikation.
|
||
*/
|
||
export default defineEventHandler(async (event) => {
|
||
const secret = getHeader(event, "x-admin-secret");
|
||
const adminSecret = process.env.NUXT_ADMIN_SECRET || process.env.ADMIN_SECRET;
|
||
if (!secret || !adminSecret || secret !== adminSecret) {
|
||
throw createError({ statusCode: 401, message: "Unauthorized" });
|
||
}
|
||
|
||
const body = (await readBody(event)) as { userId?: string };
|
||
const userId = body?.userId;
|
||
if (!userId)
|
||
throw createError({ statusCode: 400, message: "userId missing" });
|
||
|
||
const connections = await getMailConnections(userId);
|
||
if (connections.length === 0) return { ok: true, scanned: 0, blocked: 0, skippedNoConsent: 0 };
|
||
|
||
// Consent-Gate (DSGVO Art. 9): Cron ist NICHT user-initiiert — Art. 9-Daten dürfen
|
||
// ohne explizite Einwilligung nicht verarbeitet werden. Connections ohne consent_at überspringen.
|
||
const skippedNoConsent = connections.filter((c) => !c.consentAt).length;
|
||
const eligibleConnections = connections.filter((c) => c.consentAt);
|
||
|
||
if (skippedNoConsent > 0) {
|
||
console.log(
|
||
`[scan-internal] skipping ${skippedNoConsent} connections — no consent_at (pending re-consent)`,
|
||
);
|
||
}
|
||
|
||
if (eligibleConnections.length === 0) {
|
||
return { ok: true, scanned: 0, blocked: 0, skippedNoConsent };
|
||
}
|
||
|
||
// Plan-aware blocklist
|
||
const profile = await getProfile(userId);
|
||
const limits = getPlanLimits(profile?.plan ?? "free");
|
||
const inGrace =
|
||
profile?.globalBlocklistGraceUntil != null &&
|
||
new Date(profile.globalBlocklistGraceUntil) > new Date();
|
||
const includeGlobal = limits.globalBlocklist === "full" || inGrace;
|
||
|
||
await deleteOldMailBlocked(userId);
|
||
|
||
// Groq API Key aus runtimeConfig (Infisical-injiziert)
|
||
const config = useRuntimeConfig(event);
|
||
const groqApiKey: string = (config.groqApiKey as string) || process.env.GROQ_API_KEY || "";
|
||
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
|
||
|
||
let totalScanned = 0;
|
||
let totalBlocked = 0;
|
||
|
||
for (const connection of eligibleConnections) {
|
||
let imapAuth: { user: string; accessToken: string } | { user: string; pass: string };
|
||
try {
|
||
imapAuth = await resolveImapAuth(connection, msClientId);
|
||
} catch {
|
||
continue;
|
||
}
|
||
|
||
const useImplicitTls = !connection.useStarttls;
|
||
const imap = new ImapFlow({
|
||
host: connection.imapHost,
|
||
port: connection.imapPort,
|
||
secure: useImplicitTls,
|
||
...(connection.useStarttls ? { requireTLS: true } : {}),
|
||
auth: imapAuth,
|
||
logger: false,
|
||
tls: { rejectUnauthorized: connection.rejectUnauthorized ?? true },
|
||
});
|
||
|
||
let scanned = 0;
|
||
let newlyBlocked = 0;
|
||
|
||
try {
|
||
await imap.connect();
|
||
|
||
const mailboxes = await imap.list();
|
||
const scannable = mailboxes.filter(
|
||
(mb: any) => !mb.flags?.has("\\Noselect"),
|
||
);
|
||
console.log(
|
||
`[scan-internal] ${connection.email} scanning ${scannable.length} folders`,
|
||
);
|
||
|
||
for (const mb of scannable) {
|
||
let lock: any;
|
||
try {
|
||
lock = await imap.getMailboxLock(mb.path);
|
||
} catch {
|
||
continue;
|
||
}
|
||
try {
|
||
const SCAN_LIMIT = 200;
|
||
const status = await imap.status(mb.path, { messages: true });
|
||
const msgCount = (status as any).messages ?? 0;
|
||
if (msgCount === 0) continue;
|
||
|
||
const fetchRange =
|
||
msgCount > SCAN_LIMIT ? `${msgCount - SCAN_LIMIT + 1}:*` : "1:*";
|
||
const allMessages = await imap.fetchAll(fetchRange, {
|
||
envelope: true,
|
||
});
|
||
scanned += allMessages.length;
|
||
totalScanned += allMessages.length;
|
||
|
||
const allUids = allMessages.map(
|
||
(m: any) => `${mb.path}:${String(m.uid ?? m.seq)}`,
|
||
);
|
||
|
||
// Alle Sender-Domains sammeln für Blocklist-Lookup
|
||
const senderDomains = allMessages
|
||
.map((m: any) =>
|
||
((m.envelope?.from?.[0]?.address ?? "").toLowerCase().split("@")[1] ?? ""),
|
||
)
|
||
.filter(Boolean);
|
||
|
||
const [blockedDomainSet, alreadyBlockedSet] = await Promise.all([
|
||
getBlocklistedDomainsSet(senderDomains, userId, includeGlobal),
|
||
getAlreadyBlockedUidSet(allUids, userId),
|
||
]);
|
||
|
||
const toInsert: Parameters<typeof insertMailBlocked>[0] = [];
|
||
const uidsToDelete: string[] = [];
|
||
const sampleInserts: Parameters<typeof insertMailClassificationSample>[0][] = [];
|
||
|
||
for (const msg of allMessages) {
|
||
const from = msg.envelope?.from?.[0];
|
||
const senderEmail = (from?.address ?? "").toLowerCase();
|
||
const senderName = from?.name ?? null;
|
||
const subject = (msg.envelope?.subject ?? "").trim();
|
||
const msgDate = msg.envelope?.date ?? new Date();
|
||
const uid = `${mb.path}:${String(msg.uid ?? msg.seq)}`;
|
||
|
||
// Layer 0: Already blocked → skip, kein Sample
|
||
if (alreadyBlockedSet.has(uid)) continue;
|
||
|
||
const result = await classifyMail({
|
||
mail: { senderEmail, senderName, subject },
|
||
blockedDomainSet,
|
||
groqApiKey,
|
||
});
|
||
|
||
// Layer 5: Sample-Capture (immer, außer Layer 0)
|
||
const senderDomain = senderEmail.split("@")[1] ?? null;
|
||
sampleInserts.push({
|
||
userId,
|
||
connectionId: connection.id,
|
||
senderName: senderName?.slice(0, 255) ?? null,
|
||
senderDomain: senderDomain?.slice(0, 255) ?? null,
|
||
relayDecodedDomain: result.relayDecodedDomain?.slice(0, 255) ?? null,
|
||
subject: subject.slice(0, 998) || null,
|
||
features: result.features as unknown as Record<string, unknown>,
|
||
finalAction: result.action,
|
||
triggerSource: result.triggerSource,
|
||
groqIsGambling: result.groq?.isGambling ?? null,
|
||
groqConfidence: result.groq?.confidence ?? null,
|
||
groqReason: result.groq?.reason ?? null,
|
||
});
|
||
|
||
if (result.action !== "blocked") continue;
|
||
|
||
uidsToDelete.push(String(msg.uid));
|
||
toInsert.push({
|
||
userId,
|
||
connectionId: connection.id,
|
||
gmailMessageId: uid,
|
||
senderEmail: senderEmail || "unbekannt",
|
||
senderName,
|
||
subject: subject.slice(0, 200) || "(kein Betreff)",
|
||
receivedAt: msgDate,
|
||
action: "deleted",
|
||
triggerSource: result.triggerSource,
|
||
});
|
||
newlyBlocked++;
|
||
}
|
||
|
||
if (uidsToDelete.length > 0) {
|
||
try {
|
||
await imap.messageDelete(uidsToDelete.join(","), { uid: true });
|
||
} catch {
|
||
try {
|
||
for (const uid of uidsToDelete) {
|
||
await imap
|
||
.messageFlagsAdd(uid, ["\\Deleted"], { uid: true })
|
||
.catch(() => {});
|
||
}
|
||
await (imap as any).expunge().catch(() => {});
|
||
} catch {
|
||
/* ignore */
|
||
}
|
||
}
|
||
console.log(
|
||
`[scan-internal] ${connection.email} | ${mb.path} | deleted ${uidsToDelete.length} gambling mails`,
|
||
);
|
||
}
|
||
|
||
await insertMailBlocked(toInsert);
|
||
|
||
// Samples fire-and-forget (kein Scan-Result abhängig davon)
|
||
if (sampleInserts.length > 0) {
|
||
Promise.all(sampleInserts.map((s) => insertMailClassificationSample(s))).catch((err) => {
|
||
console.warn("[scan-internal] sample insert failed (non-fatal):", err);
|
||
});
|
||
}
|
||
|
||
if (toInsert.length > 0) {
|
||
const providerMeta = resolveProviderMeta(connection.imapHost);
|
||
await upsertMailBlockedStat({
|
||
userId,
|
||
mailConnectionId: connection.id,
|
||
provider: providerMeta.provider,
|
||
providerLabel: providerMeta.providerLabel,
|
||
count: toInsert.length,
|
||
});
|
||
}
|
||
} finally {
|
||
lock.release();
|
||
}
|
||
}
|
||
|
||
await imap.logout();
|
||
} catch {
|
||
try {
|
||
await imap.logout();
|
||
} catch {}
|
||
}
|
||
|
||
totalBlocked += newlyBlocked;
|
||
await updateMailConnectionScanStats(
|
||
connection.id,
|
||
scanned,
|
||
newlyBlocked,
|
||
connection.emailsBlocked,
|
||
connection.emailsScanned,
|
||
connection.scanInterval,
|
||
);
|
||
}
|
||
|
||
return { ok: true, scanned: totalScanned, blocked: totalBlocked, skippedNoConsent };
|
||
});
|