rebreak-monorepo/backend/scripts/generate-pir-input.ts
chahinebrini 29bbf23405 feat(protection): iOS NEURLFilter-Spike + PIR-Server-Ops
NEURLFilter-Stack (iOS 26): Extension RebreakURLFilter -> URLFilterExtension
umbenannt, url-filter-provider-Entitlement, Bloom-Prefilter-Extension,
PIR-Client-Config (pirServerURL/pirAuthToken via Build-Env).
PIR-Server-Ops unter ops/pir-server/ (Dockerfile, build-and-deploy, Patches,
DTS-Report). backend/scripts/generate-pir-input.ts erzeugt die PIR-Datenbank.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 18:09:42 +02:00

194 lines
6.5 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* generate-pir-input.ts
*
* Erzeugt input.txtpb für den iOS-NEURLFilter-PIR-Server.
*
* Format (gegen Apples pir_database.proto verifiziert):
* rows: { keyword: "bet365.com" value: "1" }
* rows: { keyword: "poker.de" value: "1" }
* ...
* Eine Zeile pro Domain. KEIN Top-Level-KeywordDatabase-Wrapper.
*
* Modi:
* Default (--source db): liest BlocklistDomain aus PostgreSQL via Prisma.
* DATABASE_URL muss gesetzt sein (via `infisical run`).
* --source hagezi: Fallback — fetcht HaGeZi gambling-blocklist direkt
* ohne DB-Zugriff.
*
* Optionen:
* --source <db|hagezi> Default: db
* --output <pfad> Default: ./input.txtpb (relativ zum CWD)
* --dry-run Nur zählen, nicht schreiben
*
* Aufruf:
* infisical run -- npx tsx scripts/generate-pir-input.ts
* infisical run -- npx tsx scripts/generate-pir-input.ts --output /srv/pir-server/data/input.txtpb
* npx tsx scripts/generate-pir-input.ts --source hagezi --output /tmp/test.txtpb
*/
import { writeFileSync } from "node:fs";
import { resolve } from "node:path";
import { PrismaClient } from "../server/generated/prisma/client.js";
import { PrismaPg } from "@prisma/adapter-pg";
// ─── Normalisierung (identisch zu server/utils/domainHash.ts) ────────────────
function normalizeDomain(input: string): string {
return input
.trim()
.toLowerCase()
.replace(/^https?:\/\//, "")
.replace(/\/.*$/, "")
.replace(/^www\./, "");
}
// ─── CLI-Argument-Parsing ─────────────────────────────────────────────────────
const args = process.argv.slice(2);
function getFlag(name: string): string | undefined {
const idx = args.indexOf(name);
if (idx === -1) return undefined;
return args[idx + 1];
}
const source = (getFlag("--source") ?? "db") as "db" | "hagezi";
const outputArg = getFlag("--output") ?? "input.txtpb";
const outputPath = resolve(process.cwd(), outputArg);
const dryRun = args.includes("--dry-run");
if (source !== "db" && source !== "hagezi") {
console.error(`Unbekannte Source "${source}". Erlaubt: db, hagezi`);
process.exit(1);
}
// ─── Domain-Fetch ─────────────────────────────────────────────────────────────
async function fetchFromDb(): Promise<string[]> {
const dbUrl = process.env.DATABASE_URL;
if (!dbUrl) {
console.error(
"DATABASE_URL nicht gesetzt. Script via `infisical run --` aufrufen.",
);
process.exit(1);
}
console.log("Verbinde zu PostgreSQL...");
const adapter = new PrismaPg({ connectionString: dbUrl });
const prisma = new PrismaClient({ adapter, log: ["error"] });
try {
const count = await prisma.blocklistDomain.count({
where: { isActive: true },
});
console.log(`Lade ${count.toLocaleString("de-DE")} aktive Domains aus DB...`);
const rows = await prisma.blocklistDomain.findMany({
where: { isActive: true },
select: { domain: true },
});
return rows.map((r) => r.domain);
} finally {
await prisma.$disconnect();
}
}
const HAGEZI_URL =
"https://raw.githubusercontent.com/hagezi/dns-blocklists/main/adblock/gambling.txt";
async function fetchFromHagezi(): Promise<string[]> {
console.log(`Fetche HaGeZi gambling-blocklist von ${HAGEZI_URL}...`);
const res = await fetch(HAGEZI_URL);
if (!res.ok) {
throw new Error(`HaGeZi-Fetch fehlgeschlagen: ${res.status} ${res.statusText}`);
}
const raw = await res.text();
return raw
.split("\n")
.map((l) => l.trim())
.filter((l) => l.startsWith("||") && l.endsWith("^"))
.map((l) => l.slice(2, -1).toLowerCase())
.filter((d) => d.length > 0 && !d.includes("/") && d.includes("."));
}
// ─── Normalisieren + Dedupe + Sort ────────────────────────────────────────────
function processdomains(raw: string[]): string[] {
const seen = new Set<string>();
for (const d of raw) {
const n = normalizeDomain(d);
if (n.length > 0 && n.includes(".")) {
seen.add(n);
}
}
return Array.from(seen).sort();
}
// ─── .txtpb-Formatierung ──────────────────────────────────────────────────────
function formatTxtpb(domains: string[]): string {
// Jede Domain → eine Zeile: rows: { keyword: "domain.com" value: "1" }
// kein Wrapper, kein BOM, UTF-8
const lines = domains.map((d) => `rows: { keyword: "${d}" value: "1" }`);
return lines.join("\n") + "\n";
}
// ─── Main ─────────────────────────────────────────────────────────────────────
async function main() {
console.log(`PIR input.txtpb Generator`);
console.log(`Source : ${source}`);
console.log(`Output : ${outputPath}`);
if (dryRun) console.log(`Mode : dry-run (kein Schreibvorgang)`);
console.log("");
// 1. Domains laden
const raw =
source === "db" ? await fetchFromDb() : await fetchFromHagezi();
console.log(`Rohe Domains geladen: ${raw.length.toLocaleString("de-DE")}`);
// 2. Normalisieren + Dedupe + Sort
const domains = processdomains(raw);
console.log(
`Nach Normalisierung + Dedupe: ${domains.length.toLocaleString("de-DE")} Domains`,
);
// 3. Vorschau
console.log("\nBeispiel (erste 5 Zeilen):");
domains.slice(0, 5).forEach((d) =>
console.log(` rows: { keyword: "${d}" value: "1" }`),
);
console.log(" ...");
console.log("\nBeispiel (letzte 3 Zeilen):");
domains.slice(-3).forEach((d) =>
console.log(` rows: { keyword: "${d}" value: "1" }`),
);
if (dryRun) {
console.log("\ndry-run: Datei wird NICHT geschrieben.");
return;
}
// 4. Schreiben
const content = formatTxtpb(domains);
const bytes = Buffer.byteLength(content, "utf8");
writeFileSync(outputPath, content, { encoding: "utf8" });
console.log(
`\nGeschrieben: ${outputPath}`,
);
console.log(
`Groesse : ${(bytes / 1024 / 1024).toFixed(2)} MB (${bytes.toLocaleString("de-DE")} Bytes)`,
);
console.log(`Domains : ${domains.length.toLocaleString("de-DE")}`);
}
main().catch((err) => {
console.error("Fehler:", err);
process.exit(1);
});