#!/usr/bin/env npx tsx /** * generate-pir-input.ts * * Erzeugt input.txtpb für den iOS-NEURLFilter-PIR-Server. * * Format (gegen Apples pir_database.proto verifiziert): * rows: { keyword: "bet365.com" value: "1" } * rows: { keyword: "poker.de" value: "1" } * ... * Eine Zeile pro Domain. KEIN Top-Level-KeywordDatabase-Wrapper. * * Modi: * Default (--source db): liest BlocklistDomain aus PostgreSQL via Prisma. * DATABASE_URL muss gesetzt sein (via `infisical run`). * --source hagezi: Fallback — fetcht HaGeZi gambling-blocklist direkt * ohne DB-Zugriff. * * Optionen: * --source Default: db * --output Default: ./input.txtpb (relativ zum CWD) * --dry-run Nur zählen, nicht schreiben * * Aufruf: * infisical run -- npx tsx scripts/generate-pir-input.ts * infisical run -- npx tsx scripts/generate-pir-input.ts --output /srv/pir-server/data/input.txtpb * npx tsx scripts/generate-pir-input.ts --source hagezi --output /tmp/test.txtpb */ import { writeFileSync } from "node:fs"; import { resolve } from "node:path"; import { PrismaClient } from "../server/generated/prisma/client.js"; import { PrismaPg } from "@prisma/adapter-pg"; // ─── Normalisierung (identisch zu server/utils/domainHash.ts) ──────────────── function normalizeDomain(input: string): string { return input .trim() .toLowerCase() .replace(/^https?:\/\//, "") .replace(/\/.*$/, "") .replace(/^www\./, ""); } // ─── CLI-Argument-Parsing ───────────────────────────────────────────────────── const args = process.argv.slice(2); function getFlag(name: string): string | undefined { const idx = args.indexOf(name); if (idx === -1) return undefined; return args[idx + 1]; } const source = (getFlag("--source") ?? "db") as "db" | "hagezi"; const outputArg = getFlag("--output") ?? "input.txtpb"; const outputPath = resolve(process.cwd(), outputArg); const dryRun = args.includes("--dry-run"); if (source !== "db" && source !== "hagezi") { console.error(`Unbekannte Source "${source}". Erlaubt: db, hagezi`); process.exit(1); } // ─── Domain-Fetch ───────────────────────────────────────────────────────────── async function fetchFromDb(): Promise { const dbUrl = process.env.DATABASE_URL; if (!dbUrl) { console.error( "DATABASE_URL nicht gesetzt. Script via `infisical run --` aufrufen.", ); process.exit(1); } console.log("Verbinde zu PostgreSQL..."); const adapter = new PrismaPg({ connectionString: dbUrl }); const prisma = new PrismaClient({ adapter, log: ["error"] }); try { const count = await prisma.blocklistDomain.count({ where: { isActive: true }, }); console.log(`Lade ${count.toLocaleString("de-DE")} aktive Domains aus DB...`); const rows = await prisma.blocklistDomain.findMany({ where: { isActive: true }, select: { domain: true }, }); return rows.map((r) => r.domain); } finally { await prisma.$disconnect(); } } const HAGEZI_URL = "https://raw.githubusercontent.com/hagezi/dns-blocklists/main/adblock/gambling.txt"; async function fetchFromHagezi(): Promise { console.log(`Fetche HaGeZi gambling-blocklist von ${HAGEZI_URL}...`); const res = await fetch(HAGEZI_URL); if (!res.ok) { throw new Error(`HaGeZi-Fetch fehlgeschlagen: ${res.status} ${res.statusText}`); } const raw = await res.text(); return raw .split("\n") .map((l) => l.trim()) .filter((l) => l.startsWith("||") && l.endsWith("^")) .map((l) => l.slice(2, -1).toLowerCase()) .filter((d) => d.length > 0 && !d.includes("/") && d.includes(".")); } // ─── Normalisieren + Dedupe + Sort ──────────────────────────────────────────── function processdomains(raw: string[]): string[] { const seen = new Set(); for (const d of raw) { const n = normalizeDomain(d); if (n.length > 0 && n.includes(".")) { seen.add(n); } } return Array.from(seen).sort(); } // ─── .txtpb-Formatierung ────────────────────────────────────────────────────── function formatTxtpb(domains: string[]): string { // Jede Domain → eine Zeile: rows: { keyword: "domain.com" value: "1" } // kein Wrapper, kein BOM, UTF-8 const lines = domains.map((d) => `rows: { keyword: "${d}" value: "1" }`); return lines.join("\n") + "\n"; } // ─── Main ───────────────────────────────────────────────────────────────────── async function main() { console.log(`PIR input.txtpb Generator`); console.log(`Source : ${source}`); console.log(`Output : ${outputPath}`); if (dryRun) console.log(`Mode : dry-run (kein Schreibvorgang)`); console.log(""); // 1. Domains laden const raw = source === "db" ? await fetchFromDb() : await fetchFromHagezi(); console.log(`Rohe Domains geladen: ${raw.length.toLocaleString("de-DE")}`); // 2. Normalisieren + Dedupe + Sort const domains = processdomains(raw); console.log( `Nach Normalisierung + Dedupe: ${domains.length.toLocaleString("de-DE")} Domains`, ); // 3. Vorschau console.log("\nBeispiel (erste 5 Zeilen):"); domains.slice(0, 5).forEach((d) => console.log(` rows: { keyword: "${d}" value: "1" }`), ); console.log(" ..."); console.log("\nBeispiel (letzte 3 Zeilen):"); domains.slice(-3).forEach((d) => console.log(` rows: { keyword: "${d}" value: "1" }`), ); if (dryRun) { console.log("\ndry-run: Datei wird NICHT geschrieben."); return; } // 4. Schreiben const content = formatTxtpb(domains); const bytes = Buffer.byteLength(content, "utf8"); writeFileSync(outputPath, content, { encoding: "utf8" }); console.log( `\nGeschrieben: ${outputPath}`, ); console.log( `Groesse : ${(bytes / 1024 / 1024).toFixed(2)} MB (${bytes.toLocaleString("de-DE")} Bytes)`, ); console.log(`Domains : ${domains.length.toLocaleString("de-DE")}`); } main().catch((err) => { console.error("Fehler:", err); process.exit(1); });