rebreak-monorepo/backend/server/api/mail/scan-internal.post.ts
chahinebrini 00ec716694 fix(mail): skip Gmail system folders in scan + raise subject-keyword score to 50
Fix 1 (scan-internal): filter out \All, \Drafts, \Sent, \Trash, \Flagged via
specialUse — stops [Gmail]/All Mail from consuming the SCAN_LIMIT=200 and
blocking new INBOX mails from reaching fetch range. \Junk/\Spam stay in scope.
Folders without specialUse (iCloud, GMX) pass through untouched — no false
exclusions without confirmed metadata.

Fix 2 (mail-classifier): raise SUBJECT_GAMBLING_KEYWORD from 35 to 50 so a
single unambiguous casino/jackpot/freispiel subject hit alone reaches the
SCORE_BLOCK_MIDRANGE threshold and triggers a block. Previously 35 pts fell
short when sender domain was generic and display name empty.

Tests: 9 new cases added (2 Fix-2 classifier + 4 Fix-1 folder-filter unit +
1 computeScore score=50 exact assertion). All 265 tests green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 05:12:14 +02:00

296 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { ImapFlow } from "imapflow";
import {
getMailConnections,
deleteOldMailBlocked,
getAlreadyBlockedUidSet,
insertMailBlocked,
upsertMailBlockedStat,
updateMailConnectionScanStats,
insertMailClassificationSample,
} from "../../db/mail";
import { getBlocklistedDomainsSet, getCustomMailDisplayNames } from "../../db/domains";
import { getProfile } from "../../db/profile";
import { getPlanLimits } from "../../utils/plan-features";
import { resolveProviderMeta } from "../../utils/imap-providers";
import { resolveImapAuth } from "../../utils/mail-auth";
import { classifyMail } from "../../utils/mail-classifier";
/**
* POST /api/mail/scan-internal
* Called by cron or IMAP proxy. Scans ALL mailbox folders.
* Free: only custom domains + keywords. Pro/Legend: global blocklist + custom.
*
* Klassifikations-Pipeline: Layer 04 via mail-classifier.ts.
* Layer 5 (Sample-Capture): nach jeder Klassifikation.
*/
export default defineEventHandler(async (event) => {
const secret = getHeader(event, "x-admin-secret");
const adminSecret = process.env.NUXT_ADMIN_SECRET || process.env.ADMIN_SECRET;
if (!secret || !adminSecret || secret !== adminSecret) {
throw createError({ statusCode: 401, message: "Unauthorized" });
}
const body = (await readBody(event)) as { userId?: string };
const userId = body?.userId;
if (!userId)
throw createError({ statusCode: 400, message: "userId missing" });
const connections = await getMailConnections(userId);
if (connections.length === 0) return { ok: true, scanned: 0, blocked: 0, skippedNoConsent: 0 };
// Consent-Gate (DSGVO Art. 9): Cron ist NICHT user-initiiert — Art. 9-Daten dürfen
// ohne explizite Einwilligung nicht verarbeitet werden. Connections ohne consent_at überspringen.
const skippedNoConsent = connections.filter((c) => !c.consentAt).length;
const eligibleConnections = connections.filter((c) => c.consentAt);
if (skippedNoConsent > 0) {
console.log(
`[scan-internal] skipping ${skippedNoConsent} connections — no consent_at (pending re-consent)`,
);
}
if (eligibleConnections.length === 0) {
return { ok: true, scanned: 0, blocked: 0, skippedNoConsent };
}
// Plan-aware blocklist
const profile = await getProfile(userId);
const limits = getPlanLimits(profile?.plan ?? "free");
const inGrace =
profile?.globalBlocklistGraceUntil != null &&
new Date(profile.globalBlocklistGraceUntil) > new Date();
const includeGlobal = limits.globalBlocklist === "full" || inGrace;
await deleteOldMailBlocked(userId);
const config = useRuntimeConfig(event);
const msClientId: string = (config.msOauthClientId as string) || process.env.MS_OAUTH_CLIENT_ID || "";
let totalScanned = 0;
let totalBlocked = 0;
for (const connection of eligibleConnections) {
let imapAuth: { user: string; accessToken: string } | { user: string; pass: string };
try {
imapAuth = await resolveImapAuth(connection, msClientId);
} catch {
continue;
}
const useImplicitTls = !connection.useStarttls;
const imap = new ImapFlow({
host: connection.imapHost,
port: connection.imapPort,
secure: useImplicitTls,
...(connection.useStarttls ? { requireTLS: true } : {}),
auth: imapAuth,
logger: false,
tls: { rejectUnauthorized: connection.rejectUnauthorized ?? true },
});
let scanned = 0;
let newlyBlocked = 0;
try {
await imap.connect();
const mailboxes = await imap.list();
// System-Folder ausschließen: All-Mail, Drafts, Sent, Trash, Flagged.
// Junk/Spam BLEIBEN drin — Casino-Mails landen häufig direkt im Spam-Folder.
// Hinweis: iCloud und GMX liefern specialUse oft nicht → nur Noselect-Flag
// als harter Ausschluss, specialUse-Prüfung als weiche Ergänzung.
const SKIP_SPECIAL_USE = /^\\(All|Drafts|Sent|Trash|Flagged)$/;
const scannable = mailboxes.filter((mb: any) => {
if (mb.flags?.has("\\Noselect")) return false;
if (mb.specialUse && SKIP_SPECIAL_USE.test(mb.specialUse)) return false;
return true;
});
const skippedSystemFolders = mailboxes.length - scannable.length;
console.log(
`[scan-internal] ${connection.email} scanning ${scannable.length} folders` +
(skippedSystemFolders > 0 ? ` (${skippedSystemFolders} system folders skipped)` : ""),
);
for (const mb of scannable) {
let lock: any;
try {
lock = await imap.getMailboxLock(mb.path);
} catch {
continue;
}
try {
const SCAN_LIMIT = 200;
const status = await imap.status(mb.path, { messages: true });
const msgCount = (status as any).messages ?? 0;
if (msgCount === 0) continue;
const fetchRange =
msgCount > SCAN_LIMIT ? `${msgCount - SCAN_LIMIT + 1}:*` : "1:*";
const allMessages = await imap.fetchAll(fetchRange, {
envelope: true,
});
scanned += allMessages.length;
totalScanned += allMessages.length;
const allUids = allMessages.map(
(m: any) => `${mb.path}:${String(m.uid ?? m.seq)}`,
);
// Alle Sender-Domains sammeln für Blocklist-Lookup
const senderDomains = allMessages
.map((m: any) =>
((m.envelope?.from?.[0]?.address ?? "").toLowerCase().split("@")[1] ?? ""),
)
.filter(Boolean);
const [blockedDomainSet, alreadyBlockedSet, customDisplayNames] = await Promise.all([
getBlocklistedDomainsSet(senderDomains, userId, includeGlobal),
getAlreadyBlockedUidSet(allUids, userId),
getCustomMailDisplayNames(userId),
]);
const toInsert: Parameters<typeof insertMailBlocked>[0] = [];
const uidsToDelete: string[] = [];
const sampleInserts: Parameters<typeof insertMailClassificationSample>[0][] = [];
for (const msg of allMessages) {
const from = msg.envelope?.from?.[0];
const senderEmail = (from?.address ?? "").toLowerCase();
const senderName = from?.name ?? null;
const subject = (msg.envelope?.subject ?? "").trim();
const msgDate = msg.envelope?.date ?? new Date();
const uid = `${mb.path}:${String(msg.uid ?? msg.seq)}`;
// Layer 0: Already blocked → skip, kein Sample
if (alreadyBlockedSet.has(uid)) continue;
const result = await classifyMail({
mail: { senderEmail, senderName, subject },
blockedDomainSet,
customDisplayNames,
});
// Layer 5: Sample-Capture (immer, außer Layer 0)
const senderDomain = senderEmail.split("@")[1] ?? null;
sampleInserts.push({
userId,
connectionId: connection.id,
senderName: senderName?.slice(0, 255) ?? null,
senderDomain: senderDomain?.slice(0, 255) ?? null,
relayDecodedDomain: result.relayDecodedDomain?.slice(0, 255) ?? null,
subject: subject.slice(0, 998) || null,
features: result.features as unknown as Record<string, unknown>,
finalAction: result.action,
triggerSource: result.triggerSource,
});
if (result.action !== "blocked") continue;
uidsToDelete.push(String(msg.uid));
toInsert.push({
userId,
connectionId: connection.id,
gmailMessageId: uid,
senderEmail: senderEmail || "unbekannt",
senderName,
subject: subject.slice(0, 200) || "(kein Betreff)",
receivedAt: msgDate,
action: "deleted",
triggerSource: result.triggerSource,
});
newlyBlocked++;
}
if (uidsToDelete.length > 0) {
// Gmail-Detection: imap.messageDelete() auf Gmail erzeugt kein echtes DELETE —
// Gmail bewegt die Mail in "[Gmail]/All Mail" statt sie zu entfernen. Für Gmail
// müssen wir per messageMove() in den Trash verschieben, der dann nach 30 Tagen
// automatisch geleert wird.
const isGmail = connection.imapHost === "imap.gmail.com";
if (isGmail) {
// Trash-Folder via specialUse='\\Trash' discovern, Fallback: '[Gmail]/Trash'
const trashMailbox = mailboxes.find(
(mb2: any) => mb2.specialUse === "\\Trash",
);
const trashFolder = trashMailbox?.path ?? "[Gmail]/Trash";
try {
await imap.messageMove(uidsToDelete.join(","), trashFolder, { uid: true });
console.log(
`[scan-internal] ${connection.email} | ${mb.path} | moved ${uidsToDelete.length} gambling mails to ${trashFolder} (Gmail)`,
);
} catch (moveErr) {
// Move fehlgeschlagen — eskalieren statt stumm ignorieren. Der Scan-Run
// schreibt trotzdem den DB-Insert (mail_blocked), aber loggt den Fehler
// damit Operations/Alerting reagieren kann.
console.error(
`[scan-internal] Gmail MOVE to ${trashFolder} failed for ${connection.email} | ${mb.path}:`,
moveErr,
);
}
} else {
// Non-Gmail (iCloud, Outlook, IONOS, etc.): EXPUNGE funktioniert korrekt
try {
await imap.messageDelete(uidsToDelete.join(","), { uid: true });
} catch {
try {
for (const uid of uidsToDelete) {
await imap
.messageFlagsAdd(uid, ["\\Deleted"], { uid: true })
.catch(() => {});
}
await (imap as any).expunge().catch(() => {});
} catch {
/* ignore */
}
}
console.log(
`[scan-internal] ${connection.email} | ${mb.path} | deleted ${uidsToDelete.length} gambling mails`,
);
}
}
await insertMailBlocked(toInsert);
// Samples fire-and-forget (kein Scan-Result abhängig davon)
if (sampleInserts.length > 0) {
Promise.all(sampleInserts.map((s) => insertMailClassificationSample(s))).catch((err) => {
console.warn("[scan-internal] sample insert failed (non-fatal):", err);
});
}
if (toInsert.length > 0) {
const providerMeta = resolveProviderMeta(connection.imapHost);
await upsertMailBlockedStat({
userId,
mailConnectionId: connection.id,
provider: providerMeta.provider,
providerLabel: providerMeta.providerLabel,
count: toInsert.length,
});
}
} finally {
lock.release();
}
}
await imap.logout();
} catch {
try {
await imap.logout();
} catch {}
}
totalBlocked += newlyBlocked;
await updateMailConnectionScanStats(
connection.id,
scanned,
newlyBlocked,
connection.emailsBlocked,
connection.emailsScanned,
connection.scanInterval,
);
}
return { ok: true, scanned: totalScanned, blocked: totalBlocked, skippedNoConsent };
});