rebreak-monorepo/backend/server/api/mail/scan-internal.post.ts
chahinebrini b757486579 fix(mail): forceFullSweep on domain-add + 30s idle tick
Domain/display-name adds now force a full re-scan so newly-added gambling
senders are caught immediately instead of waiting for the incremental UID
window. IMAP-idle NOOP tick lowered 2min -> 30s to close the Junk-folder gap
faster (Outlook drops straight into Junk, which IDLE does not watch).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 00:11:01 +02:00

422 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { ImapFlow } from "imapflow";
import {
getMailConnections,
deleteOldMailBlocked,
getAlreadyBlockedUidSet,
insertMailBlocked,
upsertMailBlockedStat,
updateMailConnectionScanStats,
insertMailClassificationSample,
patchFolderScanState,
markFullSweepDone,
} from "../../db/mail";
import { getBlocklistedDomainsSet, getMailDisplayNamePatterns } from "../../db/domains";
import { getProfile } from "../../db/profile";
import { getPlanLimits } from "../../utils/plan-features";
import { resolveProviderMeta } from "../../utils/imap-providers";
import { resolveImapAuth } from "../../utils/mail-auth";
import { classifyMail } from "../../utils/mail-classifier";
/**
* POST /api/mail/scan-internal
* Called by cron or IMAP proxy. Scans ALL mailbox folders.
* Free: only custom domains + keywords. Pro/Legend: global blocklist + custom.
*
* Klassifikations-Pipeline: Layer 04 via mail-classifier.ts.
* Layer 5 (Sample-Capture): nach jeder Klassifikation.
*/
export default defineEventHandler(async (event) => {
const secret = getHeader(event, "x-admin-secret");
const adminSecret = process.env.NUXT_ADMIN_SECRET || process.env.ADMIN_SECRET;
if (!secret || !adminSecret || secret !== adminSecret) {
throw createError({ statusCode: 401, message: "Unauthorized" });
}
const body = (await readBody(event)) as { userId?: string; forceFullSweep?: boolean };
const userId = body?.userId;
if (!userId)
throw createError({ statusCode: 400, message: "userId missing" });
const connections = await getMailConnections(userId);
if (connections.length === 0) return { ok: true, scanned: 0, blocked: 0, skippedNoConsent: 0 };
// Consent-Gate (DSGVO Art. 9): Cron ist NICHT user-initiiert — Art. 9-Daten dürfen
// ohne explizite Einwilligung nicht verarbeitet werden. Connections ohne consent_at überspringen.
const skippedNoConsent = connections.filter((c) => !c.consentAt).length;
const eligibleConnections = connections.filter((c) => c.consentAt);
if (skippedNoConsent > 0) {
console.log(
`[scan-internal] skipping ${skippedNoConsent} connections — no consent_at (pending re-consent)`,
);
}
if (eligibleConnections.length === 0) {
return { ok: true, scanned: 0, blocked: 0, skippedNoConsent };
}
// Plan-aware blocklist
const profile = await getProfile(userId);
const limits = getPlanLimits(profile?.plan ?? "free");
const inGrace =
profile?.globalBlocklistGraceUntil != null &&
new Date(profile.globalBlocklistGraceUntil) > new Date();
const includeGlobal = limits.globalBlocklist === "full" || inGrace;
await deleteOldMailBlocked(userId);
const config = useRuntimeConfig(event);
const oauthClientIds = {
msClientId: (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "",
googleClientId: (config.googleOauthClientId as string) || process.env.GOOGLE_OAUTH_CLIENT_ID || "",
};
let totalScanned = 0;
let totalBlocked = 0;
for (const connection of eligibleConnections) {
let imapAuth: { user: string; accessToken: string } | { user: string; pass: string };
try {
imapAuth = await resolveImapAuth(connection, oauthClientIds);
} catch (authErr) {
console.error(`[scan-internal] resolveImapAuth failed for ${connection.email}:`, authErr);
continue;
}
const useImplicitTls = !connection.useStarttls;
const imap = new ImapFlow({
host: connection.imapHost,
port: connection.imapPort,
secure: useImplicitTls,
...(connection.useStarttls ? { requireTLS: true } : {}),
auth: imapAuth,
logger: false,
tls: { rejectUnauthorized: connection.rejectUnauthorized ?? true },
});
let scanned = 0;
let newlyBlocked = 0;
// ─── Quality Full-Sweep Wächter ──────────────────────────────────────────
// 1×/Tag: alle Ordner werden als lastUid=0 behandelt (Full-Sweep) um sicher-
// zustellen dass Blocklist-Updates auch ältere Mails erfassen.
// Wichtig: wir behandeln lastUid temporär als 0 — NICHT persistieren.
// Nach dem Sweep wird der echte maxUid gespeichert + lastFullSweepAt=NOW().
//
// forceFullSweep: explizit erzwungener Full-Sweep (z.B. nach Custom-Domain-Add).
// Damit werden Alt-Mails der neuen Domain sofort erfasst, unabhängig davon
// ob der letzte Full-Sweep < 24h her ist.
const needsFullSweep =
body.forceFullSweep === true ||
!connection.lastFullSweepAt ||
Date.now() - new Date(connection.lastFullSweepAt).getTime() > 24 * 3_600_000;
// Aktueller folder_scan_state aus der DB (JSONB, Prisma liefert plain object)
const folderScanState = (connection.folderScanState ?? {}) as Record<
string,
{ lastUid: number; uidvalidity: number }
>;
try {
await imap.connect();
const mailboxes = await imap.list();
// System-Folder ausschließen: All-Mail, Drafts, Sent, Trash, Flagged.
// Junk/Spam BLEIBEN drin — Casino-Mails landen häufig direkt im Spam-Folder.
// Hinweis: iCloud und GMX liefern specialUse oft nicht → nur Noselect-Flag
// als harter Ausschluss, specialUse-Prüfung als weiche Ergänzung.
const SKIP_SPECIAL_USE = /^\\(All|Drafts|Sent|Trash|Flagged)$/;
const scannable = mailboxes.filter((mb: any) => {
if (mb.flags?.has("\\Noselect")) return false;
if (mb.specialUse && SKIP_SPECIAL_USE.test(mb.specialUse)) return false;
return true;
});
const skippedSystemFolders = mailboxes.length - scannable.length;
console.log(
`[scan-internal] ${connection.email} scanning ${scannable.length} folders` +
(skippedSystemFolders > 0 ? ` (${skippedSystemFolders} system folders skipped)` : "") +
(needsFullSweep ? " [full-sweep]" : " [incremental]"),
);
for (const mb of scannable) {
let lock: any;
try {
lock = await imap.getMailboxLock(mb.path);
} catch (lockErr) {
console.warn(`[scan-internal] ${connection.email} | ${mb.path} | getMailboxLock failed, skipping folder:`, lockErr);
continue;
}
try {
const SCAN_LIMIT = 200;
// ─── UID-Scan-Strategie (Phase 2) ──────────────────────────────────
// 1. Status holen: messages, uidNext, uidValidity
const status = await imap.status(mb.path, {
messages: true,
uidNext: true,
uidValidity: true,
});
const msgCount = (status as any).messages ?? 0;
// imapflow liefert uidValidity als BigInt (IMAP-Spec: 32-bit unsigned).
// Number() konvertiert BigInt sicher — UIDVALIDITY ist max 2^32-1, weit unter
// Number.MAX_SAFE_INTEGER (2^53-1), kein Präzisionsverlust möglich.
// Ohne Number() würde JSON.stringify({uidvalidity: BigInt(x)}) werfen und
// patchFolderScanState still crashen (verschluckt vom äußeren catch {}).
const serverUidValidity: number = Number((status as any).uidValidity ?? 0);
if (msgCount === 0) {
// Ordner leer — trotzdem Zustand für diesen Ordner persistieren
// (verhindert endloses Re-Fetching auf leere Ordner).
if (serverUidValidity > 0) {
try {
await patchFolderScanState(connection.id, mb.path, {
lastUid: 0,
uidvalidity: serverUidValidity,
});
} catch (e) {
console.error(`[scan-internal] persist failed — patchFolderScanState(${mb.path}, empty folder):`, e);
}
}
continue;
}
// 2. Gespeicherten Zustand lesen + UIDVALIDITY-Wächter
const saved = folderScanState[mb.path];
let lastUid = saved?.lastUid ?? 0;
if (needsFullSweep) {
// Quality Full-Sweep: temporär als 0 behandeln (Full-Scan), aber
// lastUid=0 NICHT dauerhaft in folderScanState schreiben.
// Nach dem Sweep speichern wir den echten maxUid.
lastUid = 0;
} else if (saved && serverUidValidity > 0 && saved.uidvalidity !== serverUidValidity) {
// UIDVALIDITY hat sich geändert: Ordner wurde server-seitig resettet.
// Alle bisherigen UIDs sind ungültig → Full-Scan für diesen Ordner.
console.log(
`[scan-internal] ${connection.email} | ${mb.path} | UIDVALIDITY changed ` +
`(${saved.uidvalidity}${serverUidValidity}) — forcing full sweep for this folder`,
);
lastUid = 0;
}
// 3. Nachrichten fetchen: inkrementell oder full
let allMessages: any[];
if (lastUid > 0) {
// Inkrementell: nur UIDs > lastUid suchen.
// WICHTIG: { uid: true } als zweites Argument ist Pflicht!
// Ohne es sendet ImapFlow "SEARCH UID X:*" statt "UID SEARCH UID X:*"
// → Server antwortet mit Sequence Numbers statt UIDs.
// fetchAll(..., { uid: true }) würde die Seq-Nums als UIDs interpretieren
// → fetcht falsche (alte) Mails → neue Mail wird übersehen.
const newUids = await (imap as any).search({ uid: `${lastUid + 1}:*` }, { uid: true });
if (!newUids || newUids.length === 0) {
// Keine neuen Nachrichten → Ordner skippen, kein fetchAll nötig
continue;
}
// Fetch nur die neuen UIDs — newUids enthält jetzt echte UIDs
allMessages = await imap.fetchAll(newUids.join(","), { envelope: true }, { uid: true } as any);
} else {
// Full-Sweep (erster Scan, UIDVALIDITY-Reset, oder Quality-Full-Sweep):
// identische Logik wie vorher — letzten SCAN_LIMIT Nachrichten
const fetchRange =
msgCount > SCAN_LIMIT ? `${msgCount - SCAN_LIMIT + 1}:*` : "1:*";
allMessages = await imap.fetchAll(fetchRange, { envelope: true });
}
if (!allMessages || allMessages.length === 0) continue;
scanned += allMessages.length;
totalScanned += allMessages.length;
const allUids = allMessages.map(
(m: any) => `${mb.path}:${String(m.uid ?? m.seq)}`,
);
// Alle Sender-Domains sammeln für Blocklist-Lookup
const senderDomains = allMessages
.map((m: any) =>
((m.envelope?.from?.[0]?.address ?? "").toLowerCase().split("@")[1] ?? ""),
)
.filter(Boolean);
const [blockedDomainSet, alreadyBlockedSet, customDisplayNames] = await Promise.all([
getBlocklistedDomainsSet(senderDomains, userId, includeGlobal),
getAlreadyBlockedUidSet(allUids, userId),
getMailDisplayNamePatterns(userId),
]);
const toInsert: Parameters<typeof insertMailBlocked>[0] = [];
const uidsToDelete: string[] = [];
const sampleInserts: Parameters<typeof insertMailClassificationSample>[0][] = [];
for (const msg of allMessages) {
const from = msg.envelope?.from?.[0];
const senderEmail = (from?.address ?? "").toLowerCase();
const senderName = from?.name ?? null;
const subject = (msg.envelope?.subject ?? "").trim();
const msgDate = msg.envelope?.date ?? new Date();
const uid = `${mb.path}:${String(msg.uid ?? msg.seq)}`;
// Layer 0: Already blocked → skip, kein Sample
if (alreadyBlockedSet.has(uid)) continue;
const result = await classifyMail({
mail: { senderEmail, senderName, subject },
blockedDomainSet,
customDisplayNames,
});
// Layer 5: Sample-Capture (immer, außer Layer 0)
const senderDomain = senderEmail.split("@")[1] ?? null;
sampleInserts.push({
userId,
connectionId: connection.id,
senderName: senderName?.slice(0, 255) ?? null,
senderDomain: senderDomain?.slice(0, 255) ?? null,
relayDecodedDomain: result.relayDecodedDomain?.slice(0, 255) ?? null,
subject: subject.slice(0, 998) || null,
features: result.features as unknown as Record<string, unknown>,
finalAction: result.action,
triggerSource: result.triggerSource,
});
if (result.action !== "blocked") continue;
uidsToDelete.push(String(msg.uid));
toInsert.push({
userId,
connectionId: connection.id,
gmailMessageId: uid,
senderEmail: senderEmail || "unbekannt",
senderName,
subject: subject.slice(0, 200) || "(kein Betreff)",
receivedAt: msgDate,
action: "deleted",
triggerSource: result.triggerSource,
});
newlyBlocked++;
}
if (uidsToDelete.length > 0) {
// Gmail-Detection: imap.messageDelete() auf Gmail erzeugt kein echtes DELETE —
// Gmail bewegt die Mail in "[Gmail]/All Mail" statt sie zu entfernen. Für Gmail
// müssen wir per messageMove() in den Trash verschieben, der dann nach 30 Tagen
// automatisch geleert wird.
const isGmail = connection.imapHost === "imap.gmail.com";
if (isGmail) {
// Trash-Folder via specialUse='\\Trash' discovern, Fallback: '[Gmail]/Trash'
const trashMailbox = mailboxes.find(
(mb2: any) => mb2.specialUse === "\\Trash",
);
const trashFolder = trashMailbox?.path ?? "[Gmail]/Trash";
try {
await imap.messageMove(uidsToDelete.join(","), trashFolder, { uid: true });
console.log(
`[scan-internal] ${connection.email} | ${mb.path} | moved ${uidsToDelete.length} gambling mails to ${trashFolder} (Gmail)`,
);
} catch (moveErr) {
// Move fehlgeschlagen — eskalieren statt stumm ignorieren. Der Scan-Run
// schreibt trotzdem den DB-Insert (mail_blocked), aber loggt den Fehler
// damit Operations/Alerting reagieren kann.
console.error(
`[scan-internal] Gmail MOVE to ${trashFolder} failed for ${connection.email} | ${mb.path}:`,
moveErr,
);
}
} else {
// Non-Gmail (iCloud, Outlook, IONOS, etc.): EXPUNGE funktioniert korrekt
try {
await imap.messageDelete(uidsToDelete.join(","), { uid: true });
} catch {
try {
for (const uid of uidsToDelete) {
await imap
.messageFlagsAdd(uid, ["\\Deleted"], { uid: true })
.catch(() => {});
}
await (imap as any).expunge().catch(() => {});
} catch {
/* ignore */
}
}
console.log(
`[scan-internal] ${connection.email} | ${mb.path} | deleted ${uidsToDelete.length} gambling mails`,
);
}
}
await insertMailBlocked(toInsert);
// Samples fire-and-forget (kein Scan-Result abhängig davon)
if (sampleInserts.length > 0) {
Promise.all(sampleInserts.map((s) => insertMailClassificationSample(s))).catch((err) => {
console.warn("[scan-internal] sample insert failed (non-fatal):", err);
});
}
if (toInsert.length > 0) {
const providerMeta = resolveProviderMeta(connection.imapHost);
await upsertMailBlockedStat({
userId,
mailConnectionId: connection.id,
provider: providerMeta.provider,
providerLabel: providerMeta.providerLabel,
count: toInsert.length,
});
}
// ─── UID-Zustand persistieren ─────────────────────────────────────
// maxUid über alle gefetchten Nachrichten berechnen (numeric UID aus IMAP).
// Nur persistieren wenn wir mindestens eine Nachricht hatten.
const maxUid = allMessages.reduce((max: number, m: any) => {
const u = typeof m.uid === "number" ? m.uid : parseInt(String(m.uid ?? "0"), 10);
return u > max ? u : max;
}, 0);
if (maxUid > 0 && serverUidValidity > 0) {
try {
await patchFolderScanState(connection.id, mb.path, {
lastUid: maxUid,
uidvalidity: serverUidValidity,
});
} catch (e) {
console.error(`[scan-internal] persist failed — patchFolderScanState(${mb.path}, maxUid=${maxUid}, uidvalidity=${serverUidValidity}):`, e);
}
}
} finally {
lock.release();
}
}
// ─── Full-Sweep abschließen ─────────────────────────────────────────────
if (needsFullSweep) {
try {
await markFullSweepDone(connection.id);
console.log(`[scan-internal] ${connection.email} | full-sweep complete, lastFullSweepAt updated`);
} catch (e) {
console.error(`[scan-internal] persist failed — markFullSweepDone(${connection.id}):`, e);
}
}
await imap.logout();
} catch (connErr) {
console.error(`[scan-internal] connection-level error for ${connection.email}:`, connErr);
try {
await imap.logout();
} catch {}
}
totalBlocked += newlyBlocked;
await updateMailConnectionScanStats(
connection.id,
scanned,
newlyBlocked,
connection.emailsBlocked,
connection.emailsScanned,
connection.scanInterval,
);
}
return { ok: true, scanned: totalScanned, blocked: totalBlocked, skippedNoConsent };
});