update

2026-06-06 21:31:03 +02:00
parent 20f42974a5
commit 08e67cb4f1
10 changed files with 111 additions and 178 deletions
--- a/backend/config.py
+++ b/backend/config.py
@@ -15,7 +15,6 @@ TIMEOUTS = {
    "recherche":     (1800, 0),   # fix 30 min
    "auswahl":       (600, 10),
    "auswahl_check": (300, 2),
-    "sortierung":    (300, 5),
    "plan":          (300, 5),
    "writer":        (600, 120),  # pro Section im Chunk
    "onepager_recherche": (900, 0),
@@ -23,11 +22,11 @@ TIMEOUTS = {
    "onepager_verify":    (300, 0),
 }

-# Welcher Anteil der sortierten Baustein-Liste in welches Format fließt: (Anteil, Mindestanzahl).
+# Auswahl-Auftrag je Format: (Mindest-Anteil, Mindestanzahl, Zweck).
 FORMAT_ANTEIL = {
-    "MiniGuide": (0.10, 8),
-    "Guide":     (0.50, 20),
-    "FullGuide": (1.00, 0),
+    "MiniGuide": (0.05, 8, "einen kompakten Anfänger-Guide — der schnelle Einstieg ins Thema"),
+    "Guide":     (0.33, 20, "einen ausführlichen Anfänger-Guide — ein solides Fundament im Thema"),
+    "FullGuide": (0.90, 0, "einen Komplett-Guide — das ganze Thema"),
 }

 # Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden.
--- a/backend/generator.py
+++ b/backend/generator.py
@@ -115,50 +115,6 @@ def _json_datei(path: Path):
        return None


-def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None:
-    """{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln
-    oder zu geringer Abdeckung der Einträge."""
-    if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list):
-        return None
-    idx = _titel_index(entries)
-    nums: list[int] = []
-    total = unknown = 0
-    for t in data["reihenfolge"]:
-        if not isinstance(t, str):
-            return None
-        total += 1
-        num = _titel_aufloesen(idx, t)
-        if num is None:
-            unknown += 1
-        elif num not in nums:
-            nums.append(num)
-    if total == 0:
-        return None
-    if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match:
-        return None
-    return nums
-
-
-def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]:
-    """Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende."""
-    raenge: dict[int, list[int]] = {num: [] for num in entries}
-    for liste in listen:
-        for rang, num in enumerate(liste):
-            if num in raenge:
-                raenge[num].append(rang)
-    ohne = [num for num, r in raenge.items() if not r]
-    if ohne:
-        _log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende")
-
-    def key(num: int):
-        r = sorted(raenge[num])
-        if not r:
-            return (10**9, 10**9, num)
-        return (r[len(r) // 2], sum(r) / len(r), num)
-
-    return sorted(entries, key=key)
-
-
 def _timeout(step: str, n: int = 0) -> int:
    base, per = TIMEOUTS[step]
    return base + per * n
@@ -234,14 +190,14 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout:
            await asyncio.gather(*tasks.keys(), return_exceptions=True)


-# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) ---
+# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Prüfung — reines Inventar, unsortiert ---

 _bausteine_progress: dict[str, str] = {}
 _bausteine_errors: dict[str, str] = {}
 _bausteine_cancelled: set[str] = set()
 _bausteine_step: dict[str, int] = {}

-BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung")
+BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung")
 _CATEGORIES = ("KERN", "WICHTIG", "REST")  # nur noch für den Altformat-Reader


@@ -253,12 +209,11 @@ def _bausteine_files(topic: str) -> dict:
        "recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)],
        "auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)],
        "auswahl_check": arbeit / "auswahl-check.json",
-        "sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)],
    }


 def _alle_slot_dateien(files: dict) -> list[Path]:
-    return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]]
+    return [*files["recherche"], *files["auswahl"], files["auswahl_check"]]


 def cancel_bausteine(topic: str) -> bool:
@@ -531,49 +486,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
                texts = [t for _, t in sorted(entries.items())] + list(patch["nachtraege"])
                entries = {i: t for i, t in enumerate(texts, 1)}

-            # Ab hier ist der Titel der Schlüssel — eindeutig machen
+            # Titel eindeutig machen und unsortiertes Inventar schreiben
            entries = _eindeutige_titel(entries)
-            bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
-
-            # Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang
-            n = len(entries)
-            sortierungen: list[list[int]] = []
-            offen = []
-            for i, path in enumerate(files["sortierung"], 1):
-                liste = _resolve_liste(_json_datei(path), entries)
-                if liste is not None and len(sortierungen) < 3:
-                    sortierungen.append(liste)
-                else:
-                    path.unlink(missing_ok=True)
-                    offen.append((i, path))
-            vorhanden = len(sortierungen)
-            set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3)
-            if vorhanden < 3:
-                slots = [
-                    {
-                        "key": f"bausteine-{topic}-sortierung-{i}",
-                        "prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path),
-                        "role": "quick", "capabilities": "files",
-                        "payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)),
-                    }
-                    for i, path in offen
-                ]
-                neue = await _race(
-                    topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider,
-                    on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"),
-                    cancelled=is_cancelled,
-                )
-                if is_cancelled():
-                    abgebrochen()
-                    return
-                if neue is None:
-                    _bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)"
-                    return
-                sortierungen += neue
-
-            reihenfolge = _merge_sortierungen(topic, sortierungen, entries)
            final_path.write_text(
-                "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n",
+                "\n".join(f"{i}. {t}" for i, t in entries.items()) + "\n",
                encoding="utf-8",
            )
    except Exception as e:
@@ -591,8 +507,11 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
 WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4}


-def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
-    """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern."""
+def _resolve_gliederung(data, entries: dict[int, str], soll: int) -> list[dict] | None:
+    """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}].
+
+    `soll` = Mindest-Anzahl gewählter Bausteine (mit kleiner Toleranz).
+    """
    if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list):
        return None
    idx = _titel_index(entries)
@@ -615,11 +534,10 @@ def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
            chapters.append({"title": str(ch.get("titel", "")).strip() or "Kapitel", "nums": nums})
    if not chapters or total == 0:
        return None
-    if (total - unknown) / total < 0.85 or len(seen) / len(entries) < 0.85:
+    if (total - unknown) / total < 0.85:
+        return None
+    if len(seen) < 0.9 * soll:
        return None
-    missing = sorted(set(entries) - seen)
-    if missing:
-        chapters.append({"title": "Weitere Themen", "nums": missing})
    return chapters


@@ -792,33 +710,38 @@ async def _generate_sections(

    spec = (TEMPLATES_DIR / "Format" / "Section.md").read_text(encoding="utf-8")
    bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
+    n = len(entries)
+    anteil, minimum, zweck = FORMAT_ANTEIL[format_name]
+    k = min(n, max(minimum, math.ceil(anteil * n)))
+    auswahl_auftrag = (
+        f"Wähle MINDESTENS {k} der Bausteine und baue daraus {zweck}. "
+        "Wähle, was diesem Zweck dient — lass weg, was dafür nicht nötig ist."
+    )

-    if format_name == "MiniGuide":
-        # Ein Writer, gliedert selbst in Kapitel
-        plan = None
-        zuteilungen = [bausteine_liste]
-        chunk_sizes = [len(entries)]
-    else:
-        await _set_progress(guide_id, "Plane Gliederung…")
-        plan_path = content_path.parent / f"{content_path.stem}.gliederung.json"
-        fragment_paths.append(plan_path)
-        plan_path.unlink(missing_ok=True)
-        slots = [{
-            "key": f"{guide_id}-plan",
-            "prompt": _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_liste, out_path=plan_path, extra=_extra(instructions)),
-            "role": "guide", "capabilities": "files",
-            "payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries)),
-        }]
-        res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", len(entries)), provider, cancelled=is_cancelled)
-        if is_cancelled():
-            return None
-        if res is None:
-            await _fail(guide_id, "Gliederung fehlgeschlagen")
-            return None
-        plan = res[0]
-        chunks = _split_chunks(plan, WRITER_COUNT[format_name])
-        zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks]
-        chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks]
+    await _set_progress(guide_id, "Wähle Bausteine & plane Gliederung…")
+    plan_path = content_path.parent / f"{content_path.stem}.gliederung.json"
+    fragment_paths.append(plan_path)
+    plan_path.unlink(missing_ok=True)
+    slots = [{
+        "key": f"{guide_id}-plan",
+        "prompt": _prompt(
+            "Guide-Plan",
+            topic=topic, format_name=format_name, bausteine=bausteine_liste,
+            auswahl_auftrag=auswahl_auftrag, out_path=plan_path, extra=_extra(instructions),
+        ),
+        "role": "guide", "capabilities": "files",
+        "payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries, k)),
+    }]
+    res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", n), provider, cancelled=is_cancelled)
+    if is_cancelled():
+        return None
+    if res is None:
+        await _fail(guide_id, "Gliederung fehlgeschlagen")
+        return None
+    plan = res[0]
+    chunks = _split_chunks(plan, WRITER_COUNT[format_name])
+    zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks]
+    chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks]

    writer_count = len(zuteilungen)
    await _set_progress(guide_id, f"Schreibe Sections ({writer_count} Writer)…" if writer_count > 1 else "Schreibe Sections…")
@@ -856,7 +779,6 @@ async def _generate_sections(
    await _set_progress(guide_id, "Setze zusammen…")
    idx = _titel_index(entries)
    by_num: dict[int, dict] = {}
-    fragment_order: list[int] = []
    for sec in fragments:
        num = _titel_aufloesen(idx, sec["titel"])
        if num is None:
@@ -864,26 +786,17 @@ async def _generate_sections(
            continue
        if num not in by_num:
            by_num[num] = sec
-            fragment_order.append(num)
-
-    def section_json(num: int) -> dict:
-        sec = by_num[num]
-        return {"num": num, "title": _titel(entries[num]), "md": sec["md"]}

    chapters: list[dict] = []
-    if plan is None:
-        # MiniGuide: Kapitel aus den Fragment-Markern in Datei-Reihenfolge
-        for num in fragment_order:
-            title = by_num[num]["kapitel"] or topic
-            if not chapters or chapters[-1]["title"] != title:
-                chapters.append({"title": title, "sections": []})
-            chapters[-1]["sections"].append(section_json(num))
-    else:
-        for ch in plan:
-            sections = [section_json(num) for num in ch["nums"] if num in by_num]
-            if sections:
-                chapters.append({"title": ch["title"], "sections": sections})
-    missing = sorted(set(entries) - set(by_num))
+    for ch in plan:
+        sections = [
+            {"num": num, "title": _titel(entries[num]), "md": by_num[num]["md"]}
+            for num in ch["nums"] if num in by_num
+        ]
+        if sections:
+            chapters.append({"title": ch["title"], "sections": sections})
+    geplant = {num for ch in plan for num in ch["nums"]}
+    missing = sorted(geplant - set(by_num))
    if missing:
        _log(topic, f"Sections fehlen in der Writer-Ausgabe: {[_titel(entries[n]) for n in missing]}")
    if not chapters:
@@ -913,10 +826,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
                if not alle:
                    await _fail(guide_id, "Keine Bausteine gefunden")
                    return
-                anteil, minimum = FORMAT_ANTEIL[format_name]
-                k = min(len(alle), max(minimum, math.ceil(anteil * len(alle))))
-                selected = [text for _, text in sorted(alle.items())][:k]
-                entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)})
+                entries = _eindeutige_titel(alle)
                facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema")
                chapters = await _generate_sections(
                    guide_id, topic, format_name, entries,