rebreak-monorepo/backend/server/utils/strip-markdown.ts

/**
 * Entfernt Markdown-Formatierung aus LLM-Antworten.
 *
 * Hintergrund: Trotz expliziter "kein Markdown"-Anweisung im System-Prompt
 * emittieren manche Modelle (insbesondere Claude Haiku) weiterhin **bold**,
 * Bullet-Lists und ähnliches. Das wirkt in der Mobile-App unsauber, weil dort
 * kein Markdown-Renderer aktiv ist — User sehen rohe Sterne.
 *
 * Diese Funktion ist ein Safety-Net: nach LLM-Call angewendet, garantiert
 * sie sauberen Klartext, ohne den eigentlichen Inhalt zu beschädigen.
 */
export function stripMarkdown(input: string): string {
  if (!input) return input;

  let out = input;

  // Bold/italic: **text** / __text__ / *text* / _text_
  out = out.replace(/\*\*\*(.+?)\*\*\*/g, "$1");
  out = out.replace(/___(.+?)___/g, "$1");
  out = out.replace(/\*\*(.+?)\*\*/g, "$1");
  out = out.replace(/__(.+?)__/g, "$1");
  // Single * or _ als italic — nur wenn auf beiden Seiten Wort-Boundary,
  // sonst zerstören wir Listen-Bullets oder Multiplikationen
  out = out.replace(/(^|\s)\*([^\s*][^*]*?)\*(?=\s|$|[.,;:!?])/g, "$1$2");
  out = out.replace(/(^|\s)_([^\s_][^_]*?)_(?=\s|$|[.,;:!?])/g, "$1$2");

  // Headings: # / ## / ### am Zeilenanfang
  out = out.replace(/^#{1,6}\s+/gm, "");

  // Bullet-Lists: "- foo", "* foo", "+ foo" am Zeilenanfang
  // Behalte einfachen Aufzählungs-Bindestrich aber entferne markdown-marker-Charakter
  // → "- Schutz" wird "• Schutz" (Bullet ohne Markdown-Semantik)
  out = out.replace(/^[ \t]*[-*+][ \t]+/gm, "• ");

  // Numbered Lists: "1. foo", "2. foo" am Zeilenanfang
  // Zahlen behalten (informativer als bullet) — nur falls strikt entfernt werden soll
  // out = out.replace(/^[ \t]*\d+\.[ \t]+/gm, "");

  // Inline-Code: `text`
  out = out.replace(/`([^`]+)`/g, "$1");

  // Code-Blocks: ```...```
  out = out.replace(/```[a-zA-Z]*\n?([\s\S]*?)```/g, "$1");

  // Links: [text](url) → text
  out = out.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");

  // Blockquotes: "> foo" → "foo"
  out = out.replace(/^>[ \t]?/gm, "");

  // Horizontal rules: --- / *** / ___ alleinstehend
  out = out.replace(/^[\t ]*[-*_]{3,}[\t ]*$/gm, "");

  // Multiple Leerzeilen → max 1 Leerzeile
  out = out.replace(/\n{3,}/g, "\n\n");

  return out.trim();
}