diff --git a/backend/config.py b/backend/config.py index 0a66931..8f2c357 100644 --- a/backend/config.py +++ b/backend/config.py @@ -15,7 +15,6 @@ TIMEOUTS = { "recherche": (1800, 0), # fix 30 min "auswahl": (600, 10), "auswahl_check": (300, 2), - "sortierung": (300, 5), "plan": (300, 5), "writer": (600, 120), # pro Section im Chunk "onepager_recherche": (900, 0), @@ -23,11 +22,11 @@ TIMEOUTS = { "onepager_verify": (300, 0), } -# Welcher Anteil der sortierten Baustein-Liste in welches Format fließt: (Anteil, Mindestanzahl). +# Auswahl-Auftrag je Format: (Mindest-Anteil, Mindestanzahl, Zweck). FORMAT_ANTEIL = { - "MiniGuide": (0.10, 8), - "Guide": (0.50, 20), - "FullGuide": (1.00, 0), + "MiniGuide": (0.05, 8, "einen kompakten Anfänger-Guide — der schnelle Einstieg ins Thema"), + "Guide": (0.33, 20, "einen ausführlichen Anfänger-Guide — ein solides Fundament im Thema"), + "FullGuide": (0.90, 0, "einen Komplett-Guide — das ganze Thema"), } # Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden. diff --git a/backend/generator.py b/backend/generator.py index 0774bf0..15dffcb 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -115,50 +115,6 @@ def _json_datei(path: Path): return None -def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None: - """{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln - oder zu geringer Abdeckung der Einträge.""" - if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list): - return None - idx = _titel_index(entries) - nums: list[int] = [] - total = unknown = 0 - for t in data["reihenfolge"]: - if not isinstance(t, str): - return None - total += 1 - num = _titel_aufloesen(idx, t) - if num is None: - unknown += 1 - elif num not in nums: - nums.append(num) - if total == 0: - return None - if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match: - return None - return nums - - -def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]: - """Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende.""" - raenge: dict[int, list[int]] = {num: [] for num in entries} - for liste in listen: - for rang, num in enumerate(liste): - if num in raenge: - raenge[num].append(rang) - ohne = [num for num, r in raenge.items() if not r] - if ohne: - _log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende") - - def key(num: int): - r = sorted(raenge[num]) - if not r: - return (10**9, 10**9, num) - return (r[len(r) // 2], sum(r) / len(r), num) - - return sorted(entries, key=key) - - def _timeout(step: str, n: int = 0) -> int: base, per = TIMEOUTS[step] return base + per * n @@ -234,14 +190,14 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout: await asyncio.gather(*tasks.keys(), return_exceptions=True) -# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) --- +# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Prüfung — reines Inventar, unsortiert --- _bausteine_progress: dict[str, str] = {} _bausteine_errors: dict[str, str] = {} _bausteine_cancelled: set[str] = set() _bausteine_step: dict[str, int] = {} -BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung") +BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung") _CATEGORIES = ("KERN", "WICHTIG", "REST") # nur noch für den Altformat-Reader @@ -253,12 +209,11 @@ def _bausteine_files(topic: str) -> dict: "recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)], "auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)], "auswahl_check": arbeit / "auswahl-check.json", - "sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)], } def _alle_slot_dateien(files: dict) -> list[Path]: - return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]] + return [*files["recherche"], *files["auswahl"], files["auswahl_check"]] def cancel_bausteine(topic: str) -> bool: @@ -531,49 +486,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = texts = [t for _, t in sorted(entries.items())] + list(patch["nachtraege"]) entries = {i: t for i, t in enumerate(texts, 1)} - # Ab hier ist der Titel der Schlüssel — eindeutig machen + # Titel eindeutig machen und unsortiertes Inventar schreiben entries = _eindeutige_titel(entries) - bausteine_liste = "\n".join(f"- {t}" for t in entries.values()) - - # Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang - n = len(entries) - sortierungen: list[list[int]] = [] - offen = [] - for i, path in enumerate(files["sortierung"], 1): - liste = _resolve_liste(_json_datei(path), entries) - if liste is not None and len(sortierungen) < 3: - sortierungen.append(liste) - else: - path.unlink(missing_ok=True) - offen.append((i, path)) - vorhanden = len(sortierungen) - set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3) - if vorhanden < 3: - slots = [ - { - "key": f"bausteine-{topic}-sortierung-{i}", - "prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path), - "role": "quick", "capabilities": "files", - "payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)), - } - for i, path in offen - ] - neue = await _race( - topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider, - on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"), - cancelled=is_cancelled, - ) - if is_cancelled(): - abgebrochen() - return - if neue is None: - _bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)" - return - sortierungen += neue - - reihenfolge = _merge_sortierungen(topic, sortierungen, entries) final_path.write_text( - "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n", + "\n".join(f"{i}. {t}" for i, t in entries.items()) + "\n", encoding="utf-8", ) except Exception as e: @@ -591,8 +507,11 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4} -def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None: - """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern.""" +def _resolve_gliederung(data, entries: dict[int, str], soll: int) -> list[dict] | None: + """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]. + + `soll` = Mindest-Anzahl gewählter Bausteine (mit kleiner Toleranz). + """ if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list): return None idx = _titel_index(entries) @@ -615,11 +534,10 @@ def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None: chapters.append({"title": str(ch.get("titel", "")).strip() or "Kapitel", "nums": nums}) if not chapters or total == 0: return None - if (total - unknown) / total < 0.85 or len(seen) / len(entries) < 0.85: + if (total - unknown) / total < 0.85: + return None + if len(seen) < 0.9 * soll: return None - missing = sorted(set(entries) - seen) - if missing: - chapters.append({"title": "Weitere Themen", "nums": missing}) return chapters @@ -792,33 +710,38 @@ async def _generate_sections( spec = (TEMPLATES_DIR / "Format" / "Section.md").read_text(encoding="utf-8") bausteine_liste = "\n".join(f"- {t}" for t in entries.values()) + n = len(entries) + anteil, minimum, zweck = FORMAT_ANTEIL[format_name] + k = min(n, max(minimum, math.ceil(anteil * n))) + auswahl_auftrag = ( + f"Wähle MINDESTENS {k} der Bausteine und baue daraus {zweck}. " + "Wähle, was diesem Zweck dient — lass weg, was dafür nicht nötig ist." + ) - if format_name == "MiniGuide": - # Ein Writer, gliedert selbst in Kapitel - plan = None - zuteilungen = [bausteine_liste] - chunk_sizes = [len(entries)] - else: - await _set_progress(guide_id, "Plane Gliederung…") - plan_path = content_path.parent / f"{content_path.stem}.gliederung.json" - fragment_paths.append(plan_path) - plan_path.unlink(missing_ok=True) - slots = [{ - "key": f"{guide_id}-plan", - "prompt": _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_liste, out_path=plan_path, extra=_extra(instructions)), - "role": "guide", "capabilities": "files", - "payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries)), - }] - res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", len(entries)), provider, cancelled=is_cancelled) - if is_cancelled(): - return None - if res is None: - await _fail(guide_id, "Gliederung fehlgeschlagen") - return None - plan = res[0] - chunks = _split_chunks(plan, WRITER_COUNT[format_name]) - zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks] - chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks] + await _set_progress(guide_id, "Wähle Bausteine & plane Gliederung…") + plan_path = content_path.parent / f"{content_path.stem}.gliederung.json" + fragment_paths.append(plan_path) + plan_path.unlink(missing_ok=True) + slots = [{ + "key": f"{guide_id}-plan", + "prompt": _prompt( + "Guide-Plan", + topic=topic, format_name=format_name, bausteine=bausteine_liste, + auswahl_auftrag=auswahl_auftrag, out_path=plan_path, extra=_extra(instructions), + ), + "role": "guide", "capabilities": "files", + "payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries, k)), + }] + res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", n), provider, cancelled=is_cancelled) + if is_cancelled(): + return None + if res is None: + await _fail(guide_id, "Gliederung fehlgeschlagen") + return None + plan = res[0] + chunks = _split_chunks(plan, WRITER_COUNT[format_name]) + zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks] + chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks] writer_count = len(zuteilungen) await _set_progress(guide_id, f"Schreibe Sections ({writer_count} Writer)…" if writer_count > 1 else "Schreibe Sections…") @@ -856,7 +779,6 @@ async def _generate_sections( await _set_progress(guide_id, "Setze zusammen…") idx = _titel_index(entries) by_num: dict[int, dict] = {} - fragment_order: list[int] = [] for sec in fragments: num = _titel_aufloesen(idx, sec["titel"]) if num is None: @@ -864,26 +786,17 @@ async def _generate_sections( continue if num not in by_num: by_num[num] = sec - fragment_order.append(num) - - def section_json(num: int) -> dict: - sec = by_num[num] - return {"num": num, "title": _titel(entries[num]), "md": sec["md"]} chapters: list[dict] = [] - if plan is None: - # MiniGuide: Kapitel aus den Fragment-Markern in Datei-Reihenfolge - for num in fragment_order: - title = by_num[num]["kapitel"] or topic - if not chapters or chapters[-1]["title"] != title: - chapters.append({"title": title, "sections": []}) - chapters[-1]["sections"].append(section_json(num)) - else: - for ch in plan: - sections = [section_json(num) for num in ch["nums"] if num in by_num] - if sections: - chapters.append({"title": ch["title"], "sections": sections}) - missing = sorted(set(entries) - set(by_num)) + for ch in plan: + sections = [ + {"num": num, "title": _titel(entries[num]), "md": by_num[num]["md"]} + for num in ch["nums"] if num in by_num + ] + if sections: + chapters.append({"title": ch["title"], "sections": sections}) + geplant = {num for ch in plan for num in ch["nums"]} + missing = sorted(geplant - set(by_num)) if missing: _log(topic, f"Sections fehlen in der Writer-Ausgabe: {[_titel(entries[n]) for n in missing]}") if not chapters: @@ -913,10 +826,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio if not alle: await _fail(guide_id, "Keine Bausteine gefunden") return - anteil, minimum = FORMAT_ANTEIL[format_name] - k = min(len(alle), max(minimum, math.ceil(anteil * len(alle)))) - selected = [text for _, text in sorted(alle.items())][:k] - entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)}) + entries = _eindeutige_titel(alle) facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema") chapters = await _generate_sections( guide_id, topic, format_name, entries, diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 15b48b7..12f8960 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -9,7 +9,9 @@ "version": "0.0.0", "dependencies": { "dompurify": "^3.4.7", + "highlight.js": "^11.11.1", "marked": "^18.0.4", + "marked-highlight": "^2.2.4", "vue": "^3.5.32" }, "devDependencies": { @@ -1366,6 +1368,15 @@ "node": ">=6.9.0" } }, + "node_modules/highlight.js": { + "version": "11.11.1", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz", + "integrity": "sha512-Xwwo44whKBVCYoliBQwaPvtd/2tYFkRQtXDWj1nackaV2JPXx3L0+Jvd8/qCJ2p+ML0/XVkJ2q+Mr+UVdpJK5w==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/hookable": { "version": "5.5.3", "resolved": "https://registry.npmjs.org/hookable/-/hookable-5.5.3.tgz", @@ -1756,6 +1767,15 @@ "node": ">= 20" } }, + "node_modules/marked-highlight": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/marked-highlight/-/marked-highlight-2.2.4.tgz", + "integrity": "sha512-PZxisNMJDduSjc0q6uvjsnqqHCXc9s0eyzxDO9sB1eNGJnd/H1/Fu+z6g/liC1dfJdFW4SftMwMlLvsBhUPrqQ==", + "license": "MIT", + "peerDependencies": { + "marked": ">=4 <19" + } + }, "node_modules/mrmime": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.1.tgz", diff --git a/frontend/package.json b/frontend/package.json index 7a89248..9cf4659 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -10,7 +10,9 @@ }, "dependencies": { "dompurify": "^3.4.7", + "highlight.js": "^11.11.1", "marked": "^18.0.4", + "marked-highlight": "^2.2.4", "vue": "^3.5.32" }, "devDependencies": { diff --git a/frontend/src/components/TopicDetail.vue b/frontend/src/components/TopicDetail.vue index 505d56a..0532aa2 100644 --- a/frontend/src/components/TopicDetail.vue +++ b/frontend/src/components/TopicDetail.vue @@ -1,9 +1,21 @@