update

2026-06-06 17:04:06 +02:00
parent 4aa3130807
commit c84fbbb484
11 changed files with 419 additions and 325 deletions
--- a/backend/generator.py
+++ b/backend/generator.py
@@ -71,6 +71,99 @@ async def _fail(guide_id: str, msg: str) -> None:
    await update_guide(guide_id, status="error", progress=None, error_msg=msg, updated_at=now)


+def _norm_titel(s: str) -> str:
+    """Normalisiert einen Titel für den Schlüssel-Vergleich."""
+    s = re.sub(r"[`'\"<>]", "", s)
+    return re.sub(r"\s+", " ", s).strip().lower()
+
+
+def _titel(entry: str) -> str:
+    return entry.split(" — ")[0].strip() or entry
+
+
+def _eindeutige_titel(entries: dict[int, str]) -> dict[int, str]:
+    """Macht Titel eindeutig (Suffix " (2)", " (3)" …), damit sie als Schlüssel taugen."""
+    seen: dict[str, int] = {}
+    out: dict[int, str] = {}
+    for num, text in entries.items():
+        titel = _titel(text)
+        key = _norm_titel(titel)
+        seen[key] = seen.get(key, 0) + 1
+        if seen[key] > 1:
+            rest = text.split(" — ", 1)
+            text = f"{titel} ({seen[key]})" + (f" — {rest[1]}" if len(rest) == 2 else "")
+    # zweiter Durchlauf nicht nötig: Suffixe kollidieren praktisch nicht
+        out[num] = text
+    return out
+
+
+def _titel_index(entries: dict[int, str]) -> dict[str, int]:
+    return {_norm_titel(_titel(text)): num for num, text in entries.items()}
+
+
+def _json_datei(path: Path):
+    """Liest eine JSON-Datei (Code-Fences tolerant); None bei fehlend/ungültig."""
+    if not path.exists():
+        return None
+    try:
+        text = path.read_text(encoding="utf-8").strip()
+        text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text)
+        return json.loads(text)
+    except Exception:
+        return None
+
+
+def _resolve_kategorien(data, entries: dict[int, str], min_match: float = 0.85):
+    """{"KERN": [Titel], …} → {num: Kategorie}; None bei zu vielen unbekannten Titeln
+    oder zu geringer Abdeckung der Einträge."""
+    if not isinstance(data, dict):
+        return None
+    idx = _titel_index(entries)
+    mapping: dict[int, str] = {}
+    total = unknown = 0
+    for cat in _CATEGORIES:
+        items = data.get(cat, [])
+        if not isinstance(items, list):
+            return None
+        for t in items:
+            if not isinstance(t, str):
+                return None
+            total += 1
+            num = _titel_aufloesen(idx, t)
+            if num is None:
+                unknown += 1
+            elif num not in mapping:
+                mapping[num] = cat
+    if total == 0:
+        return None
+    if (total - unknown) / total < min_match or len(mapping) / len(entries) < min_match:
+        return None
+    return mapping
+
+
+def _resolve_reihenfolge(data, entries: dict[int, str], min_match: float = 0.85):
+    """Wie _resolve_kategorien, aber liefert die Reihenfolge: {Kategorie: [nums]}."""
+    mapping = _resolve_kategorien(data, entries, min_match)
+    if mapping is None:
+        return None
+    idx = _titel_index(entries)
+    order: dict[str, list[int]] = {c: [] for c in _CATEGORIES}
+    for cat in _CATEGORIES:
+        for t in data.get(cat, []):
+            num = _titel_aufloesen(idx, t) if isinstance(t, str) else None
+            if num is not None and num not in order[cat]:
+                order[cat].append(num)
+    return order
+
+
+def _kategorien_block(mapping: dict[int, str], entries: dict[int, str]) -> str:
+    parts = []
+    for cat in _CATEGORIES:
+        titel = [_titel(entries[n]) for n in sorted(entries) if mapping.get(n) == cat]
+        parts.append(f"{cat}:\n" + ("\n".join(f"- {t}" for t in titel) if titel else "(leer)"))
+    return "\n".join(parts)
+
+
 def _timeout(step: str, n: int = 0) -> int:
    base, per = TIMEOUTS[step]
    return base + per * n
@@ -146,7 +239,7 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout:
            await asyncio.gather(*tasks.keys(), return_exceptions=True)


-# --- Bausteine-Pipeline: 4x Recherche (3 nötig) → 2x Auswahl (1) → 4x Einordnung (3) → 2x Final (1) ---
+# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 4x Einordnung (3) → Mehrheit+Verifikation → Sortierung ---

 _bausteine_progress: dict[str, str] = {}
 _bausteine_errors: dict[str, str] = {}
@@ -154,6 +247,28 @@ _bausteine_cancelled: set[str] = set()
 _bausteine_step: dict[str, int] = {}

 BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Einordnung", "Verifikation", "Sortierung")
+_CATEGORIES = ("KERN", "WICHTIG", "REST")
+
+
+def _bausteine_files(topic: str) -> dict:
+    final_path = bausteine_path(topic)
+    stem, parent = final_path.stem, final_path.parent
+    return {
+        "final": final_path,
+        "recherche": [parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)],
+        "auswahl": [parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)],
+        "auswahl_check": parent / f"{stem}.auswahl-check.json",
+        "einordnung": [parent / f"{stem}.einordnung-{i}.json" for i in (1, 2, 3, 4)],
+        "final_check": parent / f"{stem}.final-check.json",
+        "sortierung": parent / f"{stem}.sortierung.json",
+    }
+
+
+def _alle_slot_dateien(files: dict) -> list[Path]:
+    return [
+        *files["recherche"], *files["auswahl"], files["auswahl_check"],
+        *files["einordnung"], files["final_check"], files["sortierung"],
+    ]


 def cancel_bausteine(topic: str) -> bool:
@@ -163,31 +278,23 @@ def cancel_bausteine(topic: str) -> bool:
    kill_process(f"bausteine-{topic}-")
    return True

-_CATEGORIES = ("KERN", "WICHTIG", "REST")
-

 def _resume_step(topic: str) -> int:
    """Erster noch offener Schritt anhand der persistierten Zwischendateien."""
-    final_path = bausteine_path(topic)
-    stem, parent = final_path.stem, final_path.parent
-    if sum((parent / f"{stem}.recherche-{i}.md").exists() for i in (1, 2, 3, 4)) < 3:
+    files = _bausteine_files(topic)
+    if sum(p.exists() for p in files["recherche"]) < 3:
        return 0
-    if not any((parent / f"{stem}.auswahl-{i}.md").exists() for i in (1, 2)):
+    if not any(p.exists() for p in files["auswahl"]):
        return 1
-    if not (parent / f"{stem}.auswahl-check.md").exists():
+    if not files["auswahl_check"].exists():
        return 2
-    if sum((parent / f"{stem}.einordnung-{i}.md").exists() for i in (1, 2, 3)) < 3:
+    if sum(p.exists() for p in files["einordnung"]) < 3:
        return 3
-    if not (parent / f"{stem}.final-check.md").exists():
+    if not files["final_check"].exists():
        return 4
    return 5


-def _sortierung_path(topic: str):
-    final_path = bausteine_path(topic)
-    return final_path.parent / f"{final_path.stem}.sortierung.md"
-
-
 def bausteine_status(topic: str) -> dict:
    ready = bausteine_path(topic).exists()
    generating = topic in _bausteine_progress
@@ -200,7 +307,7 @@ def bausteine_status(topic: str) -> dict:
        ]
    elif ready:
        states = ["done"] * len(BAUSTEINE_STEPS)
-        if not _sortierung_path(topic).exists():
+        if not _bausteine_files(topic)["sortierung"].exists():
            states[-1] = "pending"
    else:
        nxt = _resume_step(topic)
@@ -221,16 +328,14 @@ def active_bausteine() -> list[dict]:


 def reset_bausteine(topic: str) -> None:
-    final_path = bausteine_path(topic)
-    final_path.unlink(missing_ok=True)
-    for i in (1, 2, 3, 4):
-        (final_path.parent / f"{final_path.stem}.recherche-{i}.md").unlink(missing_ok=True)
-        (final_path.parent / f"{final_path.stem}.einordnung-{i}.md").unlink(missing_ok=True)
-    for i in (1, 2):
-        (final_path.parent / f"{final_path.stem}.auswahl-{i}.md").unlink(missing_ok=True)
-    (final_path.parent / f"{final_path.stem}.auswahl-check.md").unlink(missing_ok=True)
-    (final_path.parent / f"{final_path.stem}.final-check.md").unlink(missing_ok=True)
-    (final_path.parent / f"{final_path.stem}.sortierung.md").unlink(missing_ok=True)
+    files = _bausteine_files(topic)
+    files["final"].unlink(missing_ok=True)
+    for p in _alle_slot_dateien(files):
+        p.unlink(missing_ok=True)
+    # Altlasten früherer Formatversionen
+    stem, parent = files["final"].stem, files["final"].parent
+    for alt in parent.glob(f"{stem}.*.md"):
+        alt.unlink(missing_ok=True)
    _bausteine_errors.pop(topic, None)


@@ -259,25 +364,21 @@ def _parse_auswahl(text: str) -> dict[int, str]:
    return entries


-def _parse_einordnung(text: str) -> dict[int, str]:
-    """Parst eine Einordnung (`KERN:` gefolgt von `N Titel`-Zeilen) zu Nummer→Kategorie."""
+def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]:
+    """Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall."""
    mapping: dict[int, str] = {}
-    current = None
-    for line in text.splitlines():
-        s = line.strip().lstrip("-*# ").strip()
-        if not s:
+    disputes: list[int] = []
+    for num in entries:
+        votes = [m[num] for m in mappings if num in m]
+        if not votes:
+            disputes.append(num)
            continue
-        m = re.match(r"(KERN|WICHTIG|REST)\b[:\s]*(.*)$", s, re.IGNORECASE)
-        if m:
-            current = m.group(1).upper()
-            for num in re.findall(r"\b\d+\b", m.group(2)):
-                mapping.setdefault(int(num), current)
-            continue
-        if current:
-            m = re.match(r"(\d+)\b", s)
-            if m:
-                mapping.setdefault(int(m.group(1)), current)
-    return mapping
+        cat, count = Counter(votes).most_common(1)[0]
+        if count >= 2:
+            mapping[num] = cat
+        else:
+            disputes.append(num)
+    return mapping, disputes


 def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str], order: dict[str, list[int]] | None = None) -> str:
@@ -293,9 +394,6 @@ def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[in
            _log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST")
            cat = "REST"
        grouped[cat].append(num)
-    unknown = sorted(set(mapping) - set(entries))
-    if unknown:
-        _log(topic, f"finale Einordnung enthält unbekannte Nummern (ignoriert): {unknown}")
    if order:
        for cat in _CATEGORIES:
            wanted = set(grouped[cat])
@@ -324,79 +422,41 @@ def _auswahl_payload(path: Path):
    return (text, entries) if entries else None


-def _parse_auswahl_check(text: str):
-    """Parst die Auswahl-Prüfung: NACHTRÄGE (neue Einträge) + STREICHEN (Nummern)."""
-    additions: list[str] = []
-    removals: set[int] = set()
-    mode = None
-    seen_marker = False
-    for line in text.splitlines():
-        s = line.strip().lstrip("-*# ").strip()
-        if not s:
-            continue
-        u = s.upper().rstrip(":")
-        if u.startswith("NACHTR"):
-            mode = "add"
-            seen_marker = True
-            continue
-        if u.startswith("STREICH"):
-            mode = "del"
-            seen_marker = True
-            continue
-        if u == "OK":
-            seen_marker = True
-            continue
-        if mode == "add":
-            additions.append(s)
-        elif mode == "del":
-            m = re.match(r"(\d+)\b", s)
-            if m:
-                removals.add(int(m.group(1)))
-    if not seen_marker:
-        return None  # Antwort hat das Format nicht getroffen
-    return {"add": additions, "remove": removals}
+def _auswahl_check_schema(data):
+    """{"nachtraege": [...], "streichen": [...]} — None bei Schema-Verstoß."""
+    if not isinstance(data, dict):
+        return None
+    nach = data.get("nachtraege", [])
+    streich = data.get("streichen", [])
+    if not isinstance(nach, list) or not isinstance(streich, list):
+        return None
+    if not all(isinstance(x, str) for x in [*nach, *streich]):
+        return None
+    return {"nachtraege": nach, "streichen": streich}


-def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]:
-    """Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall."""
-    mapping: dict[int, str] = {}
-    disputes: list[int] = []
-    for num in entries:
-        votes = [m[num] for m in mappings if num in m]
-        if not votes:
-            disputes.append(num)
-            continue
-        cat, count = Counter(votes).most_common(1)[0]
-        if count >= 2:
-            mapping[num] = cat
-        else:
-            disputes.append(num)
-    return mapping, disputes
-
-
-def _einordnung_block(mapping: dict[int, str], entries: dict[int, str]) -> str:
-    parts = []
-    for cat in _CATEGORIES:
-        nums = [n for n in sorted(entries) if mapping.get(n) == cat]
-        lines = "\n".join(f"{n} {_titel(entries[n])}" for n in nums)
-        parts.append(f"{cat}:\n{lines}" if lines else f"{cat}:")
-    return "\n".join(parts)
+def _titel_aufloesen(idx: dict[str, int], t: str) -> int | None:
+    """Titel → Nummer; toleriert mitgeschleppte Beschreibungen ("Titel — …")."""
+    if not isinstance(t, str):
+        return None
+    return idx.get(_norm_titel(t)) or idx.get(_norm_titel(_titel(t)))


 async def _run_sortierung(topic: str, entries: dict[int, str], mapping: dict[int, str], provider: str, cancelled) -> dict[str, list[int]] | None:
-    """Sortiert innerhalb der Kategorien; schreibt bei Erfolg den Marker und liefert die Reihenfolge."""
+    """Sortiert innerhalb der Kategorien; die JSON-Datei des Agenten ist zugleich der Marker."""
+    out = _bausteine_files(topic)["sortierung"]
+    out.unlink(missing_ok=True)
    slots = [{
        "key": f"bausteine-{topic}-sortierung-1",
-        "prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_einordnung_block(mapping, entries)),
-        "role": "quick", "capabilities": "none",
-        "payload": (lambda result: (result[1].strip(), _parse_einordnung(result[1])) if _parse_einordnung(result[1]) else None),
+        "prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_kategorien_block(mapping, entries), out_path=out),
+        "role": "quick", "capabilities": "files",
+        "payload": (lambda result: _resolve_reihenfolge(_json_datei(out), entries)),
    }]
    res = await _race(topic, "Sortierung", slots, 1, _timeout("sortierung", len(entries)), provider, cancelled=cancelled)
    if res is None:
+        out.unlink(missing_ok=True)
        return None
-    raw, sort_mapping = res[0]
-    _sortierung_path(topic).write_text(raw, encoding="utf-8")
-    return {cat: [num for num, c in sort_mapping.items() if c == cat] for cat in _CATEGORIES}
+    return res[0]


 async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None:
@@ -405,16 +465,9 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
    _bausteine_progress[topic] = "Wartend…"
    _bausteine_errors.pop(topic, None)

-    final_path = bausteine_path(topic)
+    files = _bausteine_files(topic)
+    final_path = files["final"]
    project = project_dir(topic) if project_dir(topic).is_dir() else None
-    stem = final_path.stem
-    recherche_paths = [final_path.parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)]
-    auswahl_paths = [final_path.parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)]
-    einordnung_paths = [final_path.parent / f"{stem}.einordnung-{i}.md" for i in (1, 2, 3)]
-    auswahl_check_path = final_path.parent / f"{stem}.auswahl-check.md"
-    final_check_path = final_path.parent / f"{stem}.final-check.md"
-    sortierung_path = _sortierung_path(topic)
-    slot_files = [*recherche_paths, *auswahl_paths, *einordnung_paths, auswahl_check_path, final_check_path, sortierung_path]

    def set_p(msg: str, step: int | None = None) -> None:
        _bausteine_progress[topic] = msg
@@ -429,16 +482,18 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =

    try:
        async with _semaphore:
-            # Fertig, aber ohne Sortier-Marker (ältere Pipeline-Version): nur die Sortierung nachholen.
-            if final_path.exists() and not sortierung_path.exists():
+            # Fertig, aber ohne Sortier-Marker (ältere Version): nur die Sortierung nachholen.
+            if final_path.exists() and not files["sortierung"].exists():
                cats = _parse_kategorien(final_path.read_text(encoding="utf-8"))
-                entries, mapping = {}, {}
+                entries: dict[int, str] = {}
+                mapping: dict[int, str] = {}
                i = 0
                for cat in _CATEGORIES:
                    for text in cats.get(cat, []):
                        i += 1
                        entries[i] = text
                        mapping[i] = cat
+                entries = _eindeutige_titel(entries)
                if entries:
                    set_p("Sortiere Bausteine…", step=5)
                    order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled)
@@ -452,15 +507,15 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                return

            # „Neu erstellen": fertige (sortierte) Bausteine → kompletter Frischstart.
-            # Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume, fertige Schritte überspringen.
+            # Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume.
            if final_path.exists():
-                for p_alt in slot_files:
+                for p_alt in _alle_slot_dateien(files):
                    p_alt.unlink(missing_ok=True)

            # Schritt 1: 4 Recherche-Agenten, 3 gültige nötig — vorhandene Slot-Dateien zählen
-            recherchen = []
+            recherchen: list[str] = []
            offen = []
-            for i, path in enumerate(recherche_paths, 1):
+            for i, path in enumerate(files["recherche"], 1):
                text = _file_payload(path)
                if text is not None and len(recherchen) < 3:
                    recherchen.append(text)
@@ -494,12 +549,12 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =

            # Schritt 2: 2 Auswahl-Agenten, der erste gewinnt — vorhandene gültige Datei wird übernommen
            n_est = max(len(_parse_auswahl(t)) for t in recherchen)
-            results_block = "\n\n".join(f"### Recherche {i}\n\n{text}" for i, text in enumerate(recherchen, 1))
-            bestehende = next((res for p in auswahl_paths if (res := _auswahl_payload(p)) is not None), None)
+            bestehende = next((res for p in files["auswahl"] if (res := _auswahl_payload(p)) is not None), None)
            if bestehende is not None:
                flat, entries = bestehende
            else:
                set_p("Konsolidiere Recherche…", step=1)
+                results_block = "\n\n".join(f"### Recherche {i}\n\n{text}" for i, text in enumerate(recherchen, 1))
                slots = [
                    {
                        "key": f"bausteine-{topic}-auswahl-{i}",
@@ -507,7 +562,7 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                        "role": "fast", "capabilities": "files",
                        "payload": (lambda result, p=path: _auswahl_payload(p)),
                    }
-                    for i, path in enumerate(auswahl_paths, 1)
+                    for i, path in enumerate(files["auswahl"], 1)
                ]
                auswahl = await _race(topic, "Auswahl", slots, 1, _timeout("auswahl", n_est), provider, cancelled=is_cancelled)
                if is_cancelled():
@@ -518,16 +573,21 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                    return
                flat, entries = auswahl[0]

-            # Schritt 2b: Auswahl-Prüfung (nicht fatal) — gespeicherte Antwort wird erneut angewendet
+            # Schritt 2b: Auswahl-Prüfung gegen die Recherche-Titel (JSON, nicht fatal)
            set_p("Prüfe Auswahl…", step=2)
-            raw_check = auswahl_check_path.read_text(encoding="utf-8") if auswahl_check_path.exists() else None
-            patch = _parse_auswahl_check(raw_check) if raw_check is not None else None
+            check_path = files["auswahl_check"]
+            patch = _auswahl_check_schema(_json_datei(check_path))
            if patch is None:
+                check_path.unlink(missing_ok=True)
+                titel_listen = "\n\n".join(
+                    f"### Recherche {i}\n" + "\n".join(f"- {_titel(t)}" for t in _parse_auswahl(text).values())
+                    for i, text in enumerate(recherchen, 1)
+                )
                slots = [{
                    "key": f"bausteine-{topic}-auswahlcheck-1",
-                    "prompt": _prompt("Bausteine-Auswahl-Check", topic=topic, results=results_block, auswahl=flat),
-                    "role": "fast", "capabilities": "none",
-                    "payload": (lambda result: (result[1].strip(), _parse_auswahl_check(result[1])) if _parse_auswahl_check(result[1]) is not None else None),
+                    "prompt": _prompt("Bausteine-Auswahl-Check", topic=topic, results=titel_listen, auswahl=flat, out_path=check_path),
+                    "role": "fast", "capabilities": "files",
+                    "payload": (lambda result: _auswahl_check_schema(_json_datei(check_path))),
                }]
                checks = await _race(topic, "Auswahl-Check", slots, 1, _timeout("auswahl_check", len(entries)), provider, cancelled=is_cancelled)
                if is_cancelled():
@@ -536,40 +596,44 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                if checks is None:
                    _log(topic, "Auswahl-Check fehlgeschlagen — fahre ohne Korrekturen fort")
                else:
-                    raw_check, patch = checks[0]
-                    auswahl_check_path.write_text(raw_check, encoding="utf-8")
-            if patch is not None:
-                if patch["remove"]:
-                    _log(topic, f"Auswahl-Check streicht Duplikate: {sorted(patch['remove'])}")
-                    entries = {n: t for n, t in entries.items() if n not in patch["remove"]}
-                if patch["add"]:
-                    _log(topic, f"Auswahl-Check ergänzt {len(patch['add'])} Bausteine")
-                if patch["remove"] or patch["add"]:
-                    texts = [t for _, t in sorted(entries.items())] + patch["add"]
-                    entries = {i: t for i, t in enumerate(texts, 1)}
-                    flat = "\n".join(f"{i}. {t}" for i, t in entries.items())
+                    patch = checks[0]
+            if patch is not None and (patch["streichen"] or patch["nachtraege"]):
+                idx = _titel_index(entries)
+                weg = {num for t in patch["streichen"] if (num := _titel_aufloesen(idx, t)) is not None}
+                if weg:
+                    _log(topic, f"Auswahl-Check streicht Duplikate: {sorted(weg)}")
+                    entries = {n: t for n, t in entries.items() if n not in weg}
+                if patch["nachtraege"]:
+                    _log(topic, f"Auswahl-Check ergänzt {len(patch['nachtraege'])} Bausteine")
+                texts = [t for _, t in sorted(entries.items())] + list(patch["nachtraege"])
+                entries = {i: t for i, t in enumerate(texts, 1)}

-            # Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig — gespeicherte Stimmen einlesen
+            # Ab hier ist der Titel der Schlüssel — eindeutig machen
+            entries = _eindeutige_titel(entries)
+            bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
+
+            # Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig (JSON-Dateien, Titel-validiert)
            n = len(entries)
-            einordnungen = []
-            for path in einordnung_paths:
-                if path.exists():
-                    text = path.read_text(encoding="utf-8")
-                    parsed = _parse_einordnung(text)
-                    if parsed:
-                        einordnungen.append((text, parsed))
-            einordnungen = einordnungen[:3]
+            einordnungen: list[dict[int, str]] = []
+            offen = []
+            for i, path in enumerate(files["einordnung"], 1):
+                m = _resolve_kategorien(_json_datei(path), entries)
+                if m is not None and len(einordnungen) < 3:
+                    einordnungen.append(m)
+                else:
+                    path.unlink(missing_ok=True)
+                    offen.append((i, path))
            vorhanden = len(einordnungen)
            set_p(f"Einordnung läuft ({vorhanden}/3 gültig)…", step=3)
            if vorhanden < 3:
                slots = [
                    {
                        "key": f"bausteine-{topic}-einordnung-{i}",
-                        "prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=flat),
-                        "role": "quick", "capabilities": "none",
-                        "payload": (lambda result: (result[1].strip(), _parse_einordnung(result[1])) if _parse_einordnung(result[1]) else None),
+                        "prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=bausteine_liste, out_path=path),
+                        "role": "quick", "capabilities": "files",
+                        "payload": (lambda result, p=path: _resolve_kategorien(_json_datei(p), entries)),
                    }
-                    for i in range(vorhanden + 1, 5)
+                    for i, path in offen
                ]
                neue = await _race(
                    topic, "Einordnung", slots, 3 - vorhanden, _timeout("einordnung", n), provider,
@@ -582,54 +646,59 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                if neue is None:
                    _bausteine_errors[topic] = "Einordnung fehlgeschlagen (Quorum nicht erreicht)"
                    return
-                for path, (text, _) in zip(einordnung_paths[vorhanden:], neue):
-                    path.write_text(text, encoding="utf-8")
                einordnungen += neue

-            # Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent — gespeicherte Antwort wird erneut angewendet
+            # Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent (antwortet nur mit Deltas, JSON)
            set_p("Verifiziere Einordnung…", step=4)
-            mapping, disputes = _majority([m for _, m in einordnungen], entries)
+            mapping, disputes = _majority(einordnungen, entries)
            if disputes:
                _log(topic, f"Keine Mehrheit bei: {disputes}")
-            raw_final = final_check_path.read_text(encoding="utf-8") if final_check_path.exists() else None
-            if raw_final is not None and not (_parse_einordnung(raw_final) or "OK" in raw_final.upper()):
-                raw_final = None
-            if raw_final is None:
-                streit_block = "\n".join(f"{num} {entries[num]}" for num in disputes) or "(keine)"
-                final_prompt = _prompt(
-                    "Bausteine-Einordnung-Final",
-                    topic=topic,
-                    einordnung=_einordnung_block(mapping, entries),
-                    streitfaelle=streit_block,
-                )
-                slots = [
-                    {
-                        "key": f"bausteine-{topic}-final-{i}",
-                        "prompt": final_prompt,
-                        "role": "fast", "capabilities": "none",
-                        "payload": (lambda result: result[1].strip() if (_parse_einordnung(result[1]) or "OK" in result[1].upper()) else None),
-                    }
-                    for i in (1, 2)
-                ]
+
+            def _final_schema(data):
+                if not isinstance(data, dict):
+                    return None
+                idx = _titel_index(entries)
+                out: dict[int, str] = {}
+                for t, cat in data.items():
+                    if not isinstance(t, str) or cat not in _CATEGORIES:
+                        return None
+                    num = _titel_aufloesen(idx, t)
+                    if num is not None:
+                        out[num] = cat
+                return out  # leeres Dict = alles bestätigt
+
+            fc_path = files["final_check"]
+            overrides = _final_schema(_json_datei(fc_path))
+            if overrides is None:
+                fc_path.unlink(missing_ok=True)
+                streit_block = "\n".join(f"- {entries[n]}" for n in disputes) or "(keine)"
+                slots = [{
+                    "key": f"bausteine-{topic}-final-1",
+                    "prompt": _prompt(
+                        "Bausteine-Einordnung-Final",
+                        topic=topic, einordnung=_kategorien_block(mapping, entries),
+                        streitfaelle=streit_block, out_path=fc_path,
+                    ),
+                    "role": "fast", "capabilities": "files",
+                    "payload": (lambda result: _final_schema(_json_datei(fc_path))),
+                }]
                finals = await _race(topic, "Final", slots, 1, _timeout("final", n), provider, cancelled=is_cancelled)
                if is_cancelled():
                    abgebrochen()
                    return
                if finals is None:
                    _log(topic, "Final-Verifikation fehlgeschlagen — Mehrheitsentscheid bleibt unverändert")
+                    overrides = {}
                else:
-                    raw_final = finals[0]
-                    final_check_path.write_text(raw_final, encoding="utf-8")
-            if raw_final is not None:
-                overrides = {num: cat for num, cat in _parse_einordnung(raw_final).items() if num in entries}
-                korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes}
-                if korrekturen:
-                    _log(topic, f"Final-Verifikation korrigiert: {korrekturen}")
-                mapping.update(overrides)
+                    overrides = finals[0]
+            korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes}
+            if korrekturen:
+                _log(topic, f"Final-Verifikation korrigiert: { {_titel(entries[n]): c for n, c in korrekturen.items()} }")
+            mapping.update(overrides)
            for num in disputes:
                if num not in mapping:
-                    _log(topic, f"Streitfall {num} unentschieden → WICHTIG")
-                    mapping[num] = "WICHTIG"
+                    _log(topic, f"Streitfall '{_titel(entries[num])}' unentschieden → REST")
+                    mapping[num] = "REST"

            # Schritt 5: Sortierung innerhalb der Kategorien (einfach → komplex, nicht fatal)
            set_p("Sortiere Bausteine…", step=5)
@@ -644,7 +713,6 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
        _bausteine_errors[topic] = str(e)[:2000]
    finally:
        # Kein Datei-Cleanup: Zwischendateien bleiben für Resume bzw. Nachvollziehbarkeit.
-        # Aufräumen passiert nur explizit über reset_bausteine().
        _bausteine_progress.pop(topic, None)
        _bausteine_step.pop(topic, None)
        _bausteine_cancelled.discard(topic)
@@ -680,33 +748,36 @@ def _parse_kategorien(text: str) -> dict[str, list[str]]:
    return cats


-def _titel(entry: str) -> str:
-    return entry.split(" — ")[0].strip() or entry
-
-
-def _parse_gliederung(text: str, valid: set[int], topic: str) -> list[dict]:
-    """Parst die Gliederung (`KAPITEL: Titel` + `N Titel`-Zeilen) → [{"title", "nums"}]."""
+def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
+    """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern."""
+    if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list):
+        return None
+    idx = _titel_index(entries)
    chapters: list[dict] = []
    seen: set[int] = set()
-    for line in text.splitlines():
-        s = line.strip().lstrip("-*# ").strip()
-        if not s:
-            continue
-        m = re.match(r"KAPITEL\s*:\s*(.+)", s, re.IGNORECASE)
-        if m:
-            chapters.append({"title": m.group(1).strip(), "nums": []})
-            continue
-        m = re.match(r"(\d+)\b", s)
-        if m and chapters:
-            num = int(m.group(1))
-            if num in valid and num not in seen:
-                chapters[-1]["nums"].append(num)
+    total = unknown = 0
+    for ch in data["kapitel"]:
+        if not isinstance(ch, dict) or not isinstance(ch.get("bausteine"), list):
+            return None
+        nums = []
+        for t in ch["bausteine"]:
+            total += 1
+            num = _titel_aufloesen(idx, t) if isinstance(t, str) else None
+            if num is None:
+                unknown += 1
+            elif num not in seen:
+                nums.append(num)
                seen.add(num)
-    missing = sorted(valid - seen)
+        if nums:
+            chapters.append({"title": str(ch.get("titel", "")).strip() or "Kapitel", "nums": nums})
+    if not chapters or total == 0:
+        return None
+    if (total - unknown) / total < 0.85 or len(seen) / len(entries) < 0.85:
+        return None
+    missing = sorted(set(entries) - seen)
    if missing:
-        _log(topic, f"Gliederung: Bausteine {missing} fehlen → Kapitel 'Weitere Themen'")
        chapters.append({"title": "Weitere Themen", "nums": missing})
-    return [c for c in chapters if c["nums"]]
+    return chapters


 def _split_chunks(chapters: list[dict], n: int) -> list[list[dict]]:
@@ -735,16 +806,16 @@ def _zuteilung_text(chunk: list[dict], entries: dict[int, str]) -> str:
    lines = []
    for ch in chunk:
        lines.append(f"KAPITEL: {ch['title']}")
-        lines.extend(f"{num} {entries[num]}" for num in ch["nums"])
+        lines.extend(f"- {entries[num]}" for num in ch["nums"])
    return "\n".join(lines)


 _FRAGMENT_KAPITEL_RE = re.compile(r"<!--\s*kapitel\s*:\s*(.*?)\s*-->", re.IGNORECASE)
-_FRAGMENT_SECTION_RE = re.compile(r"<!--\s*section\s*:\s*(\d+)\s*(?:\|\s*(.*?))?\s*-->", re.IGNORECASE)
+_FRAGMENT_SECTION_RE = re.compile(r"<!--\s*section\s*:\s*(.*?)\s*-->", re.IGNORECASE)


 def _parse_fragment(text: str) -> list[dict]:
-    """Parst eine Writer-Datei → [{"kapitel", "num", "title", "md"}] in Datei-Reihenfolge."""
+    """Parst eine Writer-Datei → [{"kapitel", "titel", "md"}] in Datei-Reihenfolge."""
    sections: list[dict] = []
    kapitel = None
    current = None
@@ -757,7 +828,7 @@ def _parse_fragment(text: str) -> list[dict]:
            continue
        m = _FRAGMENT_SECTION_RE.match(s)
        if m:
-            current = {"kapitel": kapitel, "num": int(m.group(1)), "title": (m.group(2) or "").strip(), "md": []}
+            current = {"kapitel": kapitel, "titel": m.group(1), "md": []}
            sections.append(current)
            continue
        if current is not None:
@@ -767,10 +838,6 @@ def _parse_fragment(text: str) -> list[dict]:
    return sections


-def _section_json(sec: dict, entries: dict[int, str]) -> dict:
-    return {"num": sec["num"], "title": sec["title"] or _titel(entries[sec["num"]]), "md": sec["md"]}
-
-
 async def _generate_onepager(
    guide_id: str, topic: str, instructions: str, provider: str,
    project: Path | None, content_path: Path, fragment_paths: list[Path],
@@ -778,6 +845,21 @@ async def _generate_onepager(
    def is_cancelled() -> bool:
        return guide_id in _cancelled

+    def karten_schema(data):
+        if not isinstance(data, dict):
+            return None
+        if data.get("ok") is True:
+            return "ok"
+        karten = data.get("karten")
+        if not isinstance(karten, list) or not karten:
+            return None
+        out = []
+        for k in karten:
+            if not isinstance(k, dict) or not isinstance(k.get("titel"), str) or not isinstance(k.get("merksatz"), str):
+                return None
+            out.append({"titel": k["titel"].strip(), "merksatz": k["merksatz"].strip()})
+        return out
+
    # Schritt 1: Recherche — eigene Faktenbasis, unabhängig von den Bausteinen
    await _set_progress(guide_id, "Recherchiere…")
    recherche_path = content_path.parent / f"{content_path.stem}.recherche.md"
@@ -801,13 +883,16 @@ async def _generate_onepager(
        return None
    recherche = res[0]

-    # Schritt 2: Bauen — Karten nur aus der Faktenbasis
+    # Schritt 2: Bauen — Karten nur aus der Faktenbasis (JSON)
    await _set_progress(guide_id, "Baue OnePager…")
+    karten_path = content_path.parent / f"{content_path.stem}.karten.json"
+    fragment_paths.append(karten_path)
+    karten_path.unlink(missing_ok=True)
    slots = [{
        "key": f"{guide_id}-bauen",
-        "prompt": _prompt("OnePager-Bauen", topic=topic, recherche=recherche, extra=_extra(instructions)),
-        "role": "fast", "capabilities": "none",
-        "payload": (lambda result: _parse_auswahl(result[1]) or None),
+        "prompt": _prompt("OnePager-Bauen", topic=topic, recherche=recherche, out_path=karten_path, extra=_extra(instructions)),
+        "role": "fast", "capabilities": "files",
+        "payload": (lambda result: (k if isinstance(k := karten_schema(_json_datei(karten_path)), list) else None)),
    }]
    res = await _race(topic, "OnePager-Bauen", slots, 1, _timeout("onepager_bauen"), provider, cancelled=is_cancelled)
    if is_cancelled():
@@ -815,31 +900,32 @@ async def _generate_onepager(
    if res is None:
        await _fail(guide_id, "OnePager-Bau fehlgeschlagen")
        return None
-    cards = res[0]
+    karten = res[0]

-    # Schritt 3: Verifizieren — OK oder vollständig korrigierte Liste (nicht fatal)
+    # Schritt 3: Verifizieren — {"ok": true} oder vollständig korrigierte Liste (nicht fatal)
    await _set_progress(guide_id, "Verifiziere OnePager…")
-    karten_block = "\n".join(f"{i}. {t}" for i, t in cards.items())
+    check_path = content_path.parent / f"{content_path.stem}.onepager-check.json"
+    fragment_paths.append(check_path)
+    check_path.unlink(missing_ok=True)
+    karten_block = "\n".join(f"- {k['titel']} — {k['merksatz']}" for k in karten)
    slots = [{
        "key": f"{guide_id}-verify",
-        "prompt": _prompt("OnePager-Verifikation", topic=topic, recherche=recherche, karten=karten_block),
-        "role": "fast", "capabilities": "none",
-        "payload": (lambda result: result[1].strip() if (_parse_auswahl(result[1]) or "OK" in result[1].upper()) else None),
+        "prompt": _prompt("OnePager-Verifikation", topic=topic, recherche=recherche, karten=karten_block, out_path=check_path),
+        "role": "fast", "capabilities": "files",
+        "payload": (lambda result: karten_schema(_json_datei(check_path))),
    }]
    res = await _race(topic, "OnePager-Verifikation", slots, 1, _timeout("onepager_verify"), provider, cancelled=is_cancelled)
    if is_cancelled():
        return None
    if res is None:
        _log(topic, "OnePager-Verifikation fehlgeschlagen — ungeprüfte Version wird verwendet")
-    else:
-        corrected = _parse_auswahl(res[0])
-        if corrected:
-            _log(topic, "OnePager-Verifikation hat Korrekturen geliefert")
-            cards = corrected
+    elif isinstance(res[0], list):
+        _log(topic, "OnePager-Verifikation hat Korrekturen geliefert")
+        karten = res[0]

    sections = [
-        {"num": i, "title": _titel(text), "md": text.split(" — ", 1)[1].strip() if " — " in text else text}
-        for i, text in cards.items()
+        {"num": i, "title": k["titel"], "md": k["merksatz"]}
+        for i, k in enumerate(karten, 1)
    ]
    return [{"title": topic, "sections": sections}]

@@ -849,30 +935,35 @@ async def _generate_sections(
    facts: str, instructions: str, provider: str,
    content_path: Path, fragment_paths: list[Path],
 ) -> list[dict] | None:
+    def is_cancelled() -> bool:
+        return guide_id in _cancelled
+
    spec = (TEMPLATES_DIR / "Format" / "Section.md").read_text(encoding="utf-8")
-    bausteine_block = "\n".join(f"{i}. {t}" for i, t in entries.items())
+    bausteine_liste = "\n".join(f"- {t}" for t in entries.values())

    if format_name == "MiniGuide":
        # Ein Writer, gliedert selbst in Kapitel
        plan = None
-        zuteilungen = [bausteine_block]
+        zuteilungen = [bausteine_liste]
        chunk_sizes = [len(entries)]
    else:
        await _set_progress(guide_id, "Plane Gliederung…")
-        returncode, stdout, stderr = await run_agent(
-            f"{guide_id}-plan",
-            _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_block, extra=_extra(instructions)),
-            _timeout("plan", len(entries)), provider=provider, role="guide", capabilities="none",
-        )
-        if guide_id in _cancelled:
+        plan_path = content_path.parent / f"{content_path.stem}.gliederung.json"
+        fragment_paths.append(plan_path)
+        plan_path.unlink(missing_ok=True)
+        slots = [{
+            "key": f"{guide_id}-plan",
+            "prompt": _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_liste, out_path=plan_path, extra=_extra(instructions)),
+            "role": "guide", "capabilities": "files",
+            "payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries)),
+        }]
+        res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", len(entries)), provider, cancelled=is_cancelled)
+        if is_cancelled():
            return None
-        if returncode != 0:
-            await _fail(guide_id, _claude_error("Plan-Fehler", returncode, stdout, stderr))
-            return None
-        plan = _parse_gliederung(stdout, set(entries), topic)
-        if not plan:
-            await _fail(guide_id, "Gliederung nicht parsebar")
+        if res is None:
+            await _fail(guide_id, "Gliederung fehlgeschlagen")
            return None
+        plan = res[0]
        chunks = _split_chunks(plan, WRITER_COUNT[format_name])
        zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks]
        chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks]
@@ -893,7 +984,7 @@ async def _generate_sections(
        )
        for i, (zuteilung, path, size) in enumerate(zip(zuteilungen, paths, chunk_sizes), 1)
    ], return_exceptions=True)
-    if guide_id in _cancelled:
+    if is_cancelled():
        return None
    for i, (r, p) in enumerate(zip(results, paths), 1):
        if isinstance(r, BaseException):
@@ -911,28 +1002,38 @@ async def _generate_sections(
        return None

    await _set_progress(guide_id, "Setze zusammen…")
+    idx = _titel_index(entries)
+    by_num: dict[int, dict] = {}
+    fragment_order: list[int] = []
+    for sec in fragments:
+        num = _titel_aufloesen(idx, sec["titel"])
+        if num is None:
+            _log(topic, f"Writer lieferte unbekannte Section '{sec['titel'][:40]}' (ignoriert)")
+            continue
+        if num not in by_num:
+            by_num[num] = sec
+            fragment_order.append(num)
+
+    def section_json(num: int) -> dict:
+        sec = by_num[num]
+        return {"num": num, "title": _titel(entries[num]), "md": sec["md"]}
+
    chapters: list[dict] = []
    if plan is None:
        # MiniGuide: Kapitel aus den Fragment-Markern in Datei-Reihenfolge
-        seen: set[int] = set()
-        for sec in fragments:
-            if sec["num"] not in entries or sec["num"] in seen:
-                continue
-            seen.add(sec["num"])
-            title = sec["kapitel"] or topic
+        for num in fragment_order:
+            title = by_num[num]["kapitel"] or topic
            if not chapters or chapters[-1]["title"] != title:
                chapters.append({"title": title, "sections": []})
-            chapters[-1]["sections"].append(_section_json(sec, entries))
-        missing = sorted(set(entries) - seen)
+            chapters[-1]["sections"].append(section_json(num))
    else:
-        by_num = {sec["num"]: sec for sec in fragments if sec["num"] in entries}
        for ch in plan:
-            sections = [_section_json(by_num[num], entries) for num in ch["nums"] if num in by_num]
+            sections = [section_json(num) for num in ch["nums"] if num in by_num]
            if sections:
                chapters.append({"title": ch["title"], "sections": sections})
-        missing = sorted(set(entries) - set(by_num))
+    missing = sorted(set(entries) - set(by_num))
    if missing:
-        _log(topic, f"Sections fehlen in der Writer-Ausgabe: {missing}")
+        _log(topic, f"Sections fehlen in der Writer-Ausgabe: {[_titel(entries[n]) for n in missing]}")
    if not chapters:
        await _fail(guide_id, "Keine Sections in der Writer-Ausgabe gefunden")
        return None
@@ -962,7 +1063,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
                if not selected:
                    await _fail(guide_id, "Keine passenden Bausteine gefunden")
                    return
-                entries = {i: text for i, text in enumerate(selected, 1)}
+                entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)})
                facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema")
                chapters = await _generate_sections(
                    guide_id, topic, format_name, entries,