NEURLFilter-Stack (iOS 26): Extension RebreakURLFilter -> URLFilterExtension umbenannt, url-filter-provider-Entitlement, Bloom-Prefilter-Extension, PIR-Client-Config (pirServerURL/pirAuthToken via Build-Env). PIR-Server-Ops unter ops/pir-server/ (Dockerfile, build-and-deploy, Patches, DTS-Report). backend/scripts/generate-pir-input.ts erzeugt die PIR-Datenbank. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
194 lines
6.5 KiB
TypeScript
194 lines
6.5 KiB
TypeScript
#!/usr/bin/env npx tsx
|
|
/**
|
|
* generate-pir-input.ts
|
|
*
|
|
* Erzeugt input.txtpb für den iOS-NEURLFilter-PIR-Server.
|
|
*
|
|
* Format (gegen Apples pir_database.proto verifiziert):
|
|
* rows: { keyword: "bet365.com" value: "1" }
|
|
* rows: { keyword: "poker.de" value: "1" }
|
|
* ...
|
|
* Eine Zeile pro Domain. KEIN Top-Level-KeywordDatabase-Wrapper.
|
|
*
|
|
* Modi:
|
|
* Default (--source db): liest BlocklistDomain aus PostgreSQL via Prisma.
|
|
* DATABASE_URL muss gesetzt sein (via `infisical run`).
|
|
* --source hagezi: Fallback — fetcht HaGeZi gambling-blocklist direkt
|
|
* ohne DB-Zugriff.
|
|
*
|
|
* Optionen:
|
|
* --source <db|hagezi> Default: db
|
|
* --output <pfad> Default: ./input.txtpb (relativ zum CWD)
|
|
* --dry-run Nur zählen, nicht schreiben
|
|
*
|
|
* Aufruf:
|
|
* infisical run -- npx tsx scripts/generate-pir-input.ts
|
|
* infisical run -- npx tsx scripts/generate-pir-input.ts --output /srv/pir-server/data/input.txtpb
|
|
* npx tsx scripts/generate-pir-input.ts --source hagezi --output /tmp/test.txtpb
|
|
*/
|
|
|
|
import { writeFileSync } from "node:fs";
|
|
import { resolve } from "node:path";
|
|
import { PrismaClient } from "../server/generated/prisma/client.js";
|
|
import { PrismaPg } from "@prisma/adapter-pg";
|
|
|
|
// ─── Normalisierung (identisch zu server/utils/domainHash.ts) ────────────────
|
|
|
|
function normalizeDomain(input: string): string {
|
|
return input
|
|
.trim()
|
|
.toLowerCase()
|
|
.replace(/^https?:\/\//, "")
|
|
.replace(/\/.*$/, "")
|
|
.replace(/^www\./, "");
|
|
}
|
|
|
|
// ─── CLI-Argument-Parsing ─────────────────────────────────────────────────────
|
|
|
|
const args = process.argv.slice(2);
|
|
|
|
function getFlag(name: string): string | undefined {
|
|
const idx = args.indexOf(name);
|
|
if (idx === -1) return undefined;
|
|
return args[idx + 1];
|
|
}
|
|
|
|
const source = (getFlag("--source") ?? "db") as "db" | "hagezi";
|
|
const outputArg = getFlag("--output") ?? "input.txtpb";
|
|
const outputPath = resolve(process.cwd(), outputArg);
|
|
const dryRun = args.includes("--dry-run");
|
|
|
|
if (source !== "db" && source !== "hagezi") {
|
|
console.error(`Unbekannte Source "${source}". Erlaubt: db, hagezi`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// ─── Domain-Fetch ─────────────────────────────────────────────────────────────
|
|
|
|
async function fetchFromDb(): Promise<string[]> {
|
|
const dbUrl = process.env.DATABASE_URL;
|
|
if (!dbUrl) {
|
|
console.error(
|
|
"DATABASE_URL nicht gesetzt. Script via `infisical run --` aufrufen.",
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log("Verbinde zu PostgreSQL...");
|
|
const adapter = new PrismaPg({ connectionString: dbUrl });
|
|
const prisma = new PrismaClient({ adapter, log: ["error"] });
|
|
|
|
try {
|
|
const count = await prisma.blocklistDomain.count({
|
|
where: { isActive: true },
|
|
});
|
|
console.log(`Lade ${count.toLocaleString("de-DE")} aktive Domains aus DB...`);
|
|
|
|
const rows = await prisma.blocklistDomain.findMany({
|
|
where: { isActive: true },
|
|
select: { domain: true },
|
|
});
|
|
|
|
return rows.map((r) => r.domain);
|
|
} finally {
|
|
await prisma.$disconnect();
|
|
}
|
|
}
|
|
|
|
const HAGEZI_URL =
|
|
"https://raw.githubusercontent.com/hagezi/dns-blocklists/main/adblock/gambling.txt";
|
|
|
|
async function fetchFromHagezi(): Promise<string[]> {
|
|
console.log(`Fetche HaGeZi gambling-blocklist von ${HAGEZI_URL}...`);
|
|
const res = await fetch(HAGEZI_URL);
|
|
if (!res.ok) {
|
|
throw new Error(`HaGeZi-Fetch fehlgeschlagen: ${res.status} ${res.statusText}`);
|
|
}
|
|
const raw = await res.text();
|
|
|
|
return raw
|
|
.split("\n")
|
|
.map((l) => l.trim())
|
|
.filter((l) => l.startsWith("||") && l.endsWith("^"))
|
|
.map((l) => l.slice(2, -1).toLowerCase())
|
|
.filter((d) => d.length > 0 && !d.includes("/") && d.includes("."));
|
|
}
|
|
|
|
// ─── Normalisieren + Dedupe + Sort ────────────────────────────────────────────
|
|
|
|
function processdomains(raw: string[]): string[] {
|
|
const seen = new Set<string>();
|
|
for (const d of raw) {
|
|
const n = normalizeDomain(d);
|
|
if (n.length > 0 && n.includes(".")) {
|
|
seen.add(n);
|
|
}
|
|
}
|
|
return Array.from(seen).sort();
|
|
}
|
|
|
|
// ─── .txtpb-Formatierung ──────────────────────────────────────────────────────
|
|
|
|
function formatTxtpb(domains: string[]): string {
|
|
// Jede Domain → eine Zeile: rows: { keyword: "domain.com" value: "1" }
|
|
// kein Wrapper, kein BOM, UTF-8
|
|
const lines = domains.map((d) => `rows: { keyword: "${d}" value: "1" }`);
|
|
return lines.join("\n") + "\n";
|
|
}
|
|
|
|
// ─── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
async function main() {
|
|
console.log(`PIR input.txtpb Generator`);
|
|
console.log(`Source : ${source}`);
|
|
console.log(`Output : ${outputPath}`);
|
|
if (dryRun) console.log(`Mode : dry-run (kein Schreibvorgang)`);
|
|
console.log("");
|
|
|
|
// 1. Domains laden
|
|
const raw =
|
|
source === "db" ? await fetchFromDb() : await fetchFromHagezi();
|
|
|
|
console.log(`Rohe Domains geladen: ${raw.length.toLocaleString("de-DE")}`);
|
|
|
|
// 2. Normalisieren + Dedupe + Sort
|
|
const domains = processdomains(raw);
|
|
console.log(
|
|
`Nach Normalisierung + Dedupe: ${domains.length.toLocaleString("de-DE")} Domains`,
|
|
);
|
|
|
|
// 3. Vorschau
|
|
console.log("\nBeispiel (erste 5 Zeilen):");
|
|
domains.slice(0, 5).forEach((d) =>
|
|
console.log(` rows: { keyword: "${d}" value: "1" }`),
|
|
);
|
|
console.log(" ...");
|
|
console.log("\nBeispiel (letzte 3 Zeilen):");
|
|
domains.slice(-3).forEach((d) =>
|
|
console.log(` rows: { keyword: "${d}" value: "1" }`),
|
|
);
|
|
|
|
if (dryRun) {
|
|
console.log("\ndry-run: Datei wird NICHT geschrieben.");
|
|
return;
|
|
}
|
|
|
|
// 4. Schreiben
|
|
const content = formatTxtpb(domains);
|
|
const bytes = Buffer.byteLength(content, "utf8");
|
|
writeFileSync(outputPath, content, { encoding: "utf8" });
|
|
|
|
console.log(
|
|
`\nGeschrieben: ${outputPath}`,
|
|
);
|
|
console.log(
|
|
`Groesse : ${(bytes / 1024 / 1024).toFixed(2)} MB (${bytes.toLocaleString("de-DE")} Bytes)`,
|
|
);
|
|
console.log(`Domains : ${domains.length.toLocaleString("de-DE")}`);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Fehler:", err);
|
|
process.exit(1);
|
|
});
|