update
This commit is contained in:
@@ -1,20 +1,22 @@
|
||||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import shutil
|
||||
import re
|
||||
import uuid
|
||||
from collections import Counter
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from agents import run_agent, kill_process
|
||||
from config import (
|
||||
DEFAULT_PROVIDER,
|
||||
FORMAT_ANTEIL,
|
||||
TEMPLATES_DIR,
|
||||
TIMEOUTS,
|
||||
MAX_CONCURRENT_GENERATIONS,
|
||||
)
|
||||
from database import update_guide
|
||||
from paths import bausteine_path, guide_content_path, project_dir
|
||||
from paths import arbeit_dir, bausteine_path, guide_content_path, project_dir
|
||||
|
||||
_semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS)
|
||||
_cancelled: set[str] = set()
|
||||
@@ -113,55 +115,48 @@ def _json_datei(path: Path):
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_kategorien(data, entries: dict[int, str], min_match: float = 0.85):
|
||||
"""{"KERN": [Titel], …} → {num: Kategorie}; None bei zu vielen unbekannten Titeln
|
||||
def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None:
|
||||
"""{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln
|
||||
oder zu geringer Abdeckung der Einträge."""
|
||||
if not isinstance(data, dict):
|
||||
if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list):
|
||||
return None
|
||||
idx = _titel_index(entries)
|
||||
mapping: dict[int, str] = {}
|
||||
nums: list[int] = []
|
||||
total = unknown = 0
|
||||
for cat in _CATEGORIES:
|
||||
items = data.get(cat, [])
|
||||
if not isinstance(items, list):
|
||||
for t in data["reihenfolge"]:
|
||||
if not isinstance(t, str):
|
||||
return None
|
||||
for t in items:
|
||||
if not isinstance(t, str):
|
||||
return None
|
||||
total += 1
|
||||
num = _titel_aufloesen(idx, t)
|
||||
if num is None:
|
||||
unknown += 1
|
||||
elif num not in mapping:
|
||||
mapping[num] = cat
|
||||
total += 1
|
||||
num = _titel_aufloesen(idx, t)
|
||||
if num is None:
|
||||
unknown += 1
|
||||
elif num not in nums:
|
||||
nums.append(num)
|
||||
if total == 0:
|
||||
return None
|
||||
if (total - unknown) / total < min_match or len(mapping) / len(entries) < min_match:
|
||||
if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match:
|
||||
return None
|
||||
return mapping
|
||||
return nums
|
||||
|
||||
|
||||
def _resolve_reihenfolge(data, entries: dict[int, str], min_match: float = 0.85):
|
||||
"""Wie _resolve_kategorien, aber liefert die Reihenfolge: {Kategorie: [nums]}."""
|
||||
mapping = _resolve_kategorien(data, entries, min_match)
|
||||
if mapping is None:
|
||||
return None
|
||||
idx = _titel_index(entries)
|
||||
order: dict[str, list[int]] = {c: [] for c in _CATEGORIES}
|
||||
for cat in _CATEGORIES:
|
||||
for t in data.get(cat, []):
|
||||
num = _titel_aufloesen(idx, t) if isinstance(t, str) else None
|
||||
if num is not None and num not in order[cat]:
|
||||
order[cat].append(num)
|
||||
return order
|
||||
def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]:
|
||||
"""Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende."""
|
||||
raenge: dict[int, list[int]] = {num: [] for num in entries}
|
||||
for liste in listen:
|
||||
for rang, num in enumerate(liste):
|
||||
if num in raenge:
|
||||
raenge[num].append(rang)
|
||||
ohne = [num for num, r in raenge.items() if not r]
|
||||
if ohne:
|
||||
_log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende")
|
||||
|
||||
def key(num: int):
|
||||
r = sorted(raenge[num])
|
||||
if not r:
|
||||
return (10**9, 10**9, num)
|
||||
return (r[len(r) // 2], sum(r) / len(r), num)
|
||||
|
||||
def _kategorien_block(mapping: dict[int, str], entries: dict[int, str]) -> str:
|
||||
parts = []
|
||||
for cat in _CATEGORIES:
|
||||
titel = [_titel(entries[n]) for n in sorted(entries) if mapping.get(n) == cat]
|
||||
parts.append(f"{cat}:\n" + ("\n".join(f"- {t}" for t in titel) if titel else "(leer)"))
|
||||
return "\n".join(parts)
|
||||
return sorted(entries, key=key)
|
||||
|
||||
|
||||
def _timeout(step: str, n: int = 0) -> int:
|
||||
@@ -239,36 +234,31 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout:
|
||||
await asyncio.gather(*tasks.keys(), return_exceptions=True)
|
||||
|
||||
|
||||
# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 4x Einordnung (3) → Mehrheit+Verifikation → Sortierung ---
|
||||
# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) ---
|
||||
|
||||
_bausteine_progress: dict[str, str] = {}
|
||||
_bausteine_errors: dict[str, str] = {}
|
||||
_bausteine_cancelled: set[str] = set()
|
||||
_bausteine_step: dict[str, int] = {}
|
||||
|
||||
BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Einordnung", "Verifikation", "Sortierung")
|
||||
_CATEGORIES = ("KERN", "WICHTIG", "REST")
|
||||
BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung")
|
||||
_CATEGORIES = ("KERN", "WICHTIG", "REST") # nur noch für den Altformat-Reader
|
||||
|
||||
|
||||
def _bausteine_files(topic: str) -> dict:
|
||||
final_path = bausteine_path(topic)
|
||||
stem, parent = final_path.stem, final_path.parent
|
||||
arbeit = arbeit_dir(topic)
|
||||
return {
|
||||
"final": final_path,
|
||||
"recherche": [parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)],
|
||||
"auswahl": [parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)],
|
||||
"auswahl_check": parent / f"{stem}.auswahl-check.json",
|
||||
"einordnung": [parent / f"{stem}.einordnung-{i}.json" for i in (1, 2, 3, 4)],
|
||||
"final_check": parent / f"{stem}.final-check.json",
|
||||
"sortierung": parent / f"{stem}.sortierung.json",
|
||||
"final": bausteine_path(topic),
|
||||
"arbeit": arbeit,
|
||||
"recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)],
|
||||
"auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)],
|
||||
"auswahl_check": arbeit / "auswahl-check.json",
|
||||
"sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)],
|
||||
}
|
||||
|
||||
|
||||
def _alle_slot_dateien(files: dict) -> list[Path]:
|
||||
return [
|
||||
*files["recherche"], *files["auswahl"], files["auswahl_check"],
|
||||
*files["einordnung"], files["final_check"], files["sortierung"],
|
||||
]
|
||||
return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]]
|
||||
|
||||
|
||||
def cancel_bausteine(topic: str) -> bool:
|
||||
@@ -288,11 +278,7 @@ def _resume_step(topic: str) -> int:
|
||||
return 1
|
||||
if not files["auswahl_check"].exists():
|
||||
return 2
|
||||
if sum(p.exists() for p in files["einordnung"]) < 3:
|
||||
return 3
|
||||
if not files["final_check"].exists():
|
||||
return 4
|
||||
return 5
|
||||
return 3
|
||||
|
||||
|
||||
def bausteine_status(topic: str) -> dict:
|
||||
@@ -307,8 +293,6 @@ def bausteine_status(topic: str) -> dict:
|
||||
]
|
||||
elif ready:
|
||||
states = ["done"] * len(BAUSTEINE_STEPS)
|
||||
if not _bausteine_files(topic)["sortierung"].exists():
|
||||
states[-1] = "pending"
|
||||
else:
|
||||
nxt = _resume_step(topic)
|
||||
partial = nxt > 0
|
||||
@@ -330,12 +314,7 @@ def active_bausteine() -> list[dict]:
|
||||
def reset_bausteine(topic: str) -> None:
|
||||
files = _bausteine_files(topic)
|
||||
files["final"].unlink(missing_ok=True)
|
||||
for p in _alle_slot_dateien(files):
|
||||
p.unlink(missing_ok=True)
|
||||
# Altlasten früherer Formatversionen
|
||||
stem, parent = files["final"].stem, files["final"].parent
|
||||
for alt in parent.glob(f"{stem}.*.md"):
|
||||
alt.unlink(missing_ok=True)
|
||||
shutil.rmtree(files["arbeit"], ignore_errors=True)
|
||||
_bausteine_errors.pop(topic, None)
|
||||
|
||||
|
||||
@@ -351,7 +330,7 @@ def _build_recherche_prompt(topic: str, out_path: Path, instructions: str = "",
|
||||
|
||||
|
||||
def _parse_auswahl(text: str) -> dict[int, str]:
|
||||
"""Parst die konsolidierte Liste: `N. Titel — Kurzbeschreibung` pro Zeile."""
|
||||
"""Parst eine Baustein-Liste: `N. Titel — Kurzbeschreibung` pro Zeile."""
|
||||
entries: dict[int, str] = {}
|
||||
last = None
|
||||
for line in text.splitlines():
|
||||
@@ -364,46 +343,30 @@ def _parse_auswahl(text: str) -> dict[int, str]:
|
||||
return entries
|
||||
|
||||
|
||||
def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]:
|
||||
"""Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall."""
|
||||
mapping: dict[int, str] = {}
|
||||
disputes: list[int] = []
|
||||
for num in entries:
|
||||
votes = [m[num] for m in mappings if num in m]
|
||||
if not votes:
|
||||
disputes.append(num)
|
||||
def _parse_kategorien(text: str) -> dict[str, list[str]]:
|
||||
"""Altformat-Reader: finale Baustein-Datei mit ## KERN/WICHTIG/REST-Abschnitten."""
|
||||
cats: dict[str, list[str]] = {}
|
||||
current = None
|
||||
for line in text.splitlines():
|
||||
s = line.strip()
|
||||
m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE)
|
||||
if m:
|
||||
current = m.group(1).upper()
|
||||
cats.setdefault(current, [])
|
||||
continue
|
||||
cat, count = Counter(votes).most_common(1)[0]
|
||||
if count >= 2:
|
||||
mapping[num] = cat
|
||||
else:
|
||||
disputes.append(num)
|
||||
return mapping, disputes
|
||||
m = re.match(r"(\d+)[.)]\s+(.*\S)", s)
|
||||
if m and current:
|
||||
cats[current].append(m.group(2))
|
||||
return cats
|
||||
|
||||
|
||||
def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str], order: dict[str, list[int]] | None = None) -> str:
|
||||
"""Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung.
|
||||
|
||||
`order` (Kategorie → Nummern in Lernreihenfolge) sortiert innerhalb der
|
||||
Kategorien; nicht gelistete Nummern hängen in Originalreihenfolge hinten an.
|
||||
"""
|
||||
grouped: dict[str, list[int]] = {c: [] for c in _CATEGORIES}
|
||||
for num in sorted(entries):
|
||||
cat = mapping.get(num)
|
||||
if cat is None:
|
||||
_log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST")
|
||||
cat = "REST"
|
||||
grouped[cat].append(num)
|
||||
if order:
|
||||
for cat in _CATEGORIES:
|
||||
wanted = set(grouped[cat])
|
||||
seq = [n for n in order.get(cat, []) if n in wanted]
|
||||
grouped[cat] = seq + [n for n in grouped[cat] if n not in seq]
|
||||
parts = []
|
||||
for cat in _CATEGORIES:
|
||||
lines = "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(grouped[cat], 1))
|
||||
parts.append(f"## {cat}\n{lines}")
|
||||
return "\n\n".join(parts) + "\n"
|
||||
def _lade_bausteine(text: str) -> dict[int, str]:
|
||||
"""Lädt die finale Baustein-Datei — sortierte Liste (neu) oder Kategorien (Altformat)."""
|
||||
if re.search(r"^#+\s*KERN\b", text, re.IGNORECASE | re.MULTILINE):
|
||||
cats = _parse_kategorien(text)
|
||||
texts = [t for cat in _CATEGORIES for t in cats.get(cat, [])]
|
||||
return {i: t for i, t in enumerate(texts, 1)}
|
||||
return _parse_auswahl(text)
|
||||
|
||||
|
||||
def _file_payload(path: Path):
|
||||
@@ -442,23 +405,6 @@ def _titel_aufloesen(idx: dict[str, int], t: str) -> int | None:
|
||||
return idx.get(_norm_titel(t)) or idx.get(_norm_titel(_titel(t)))
|
||||
|
||||
|
||||
async def _run_sortierung(topic: str, entries: dict[int, str], mapping: dict[int, str], provider: str, cancelled) -> dict[str, list[int]] | None:
|
||||
"""Sortiert innerhalb der Kategorien; die JSON-Datei des Agenten ist zugleich der Marker."""
|
||||
out = _bausteine_files(topic)["sortierung"]
|
||||
out.unlink(missing_ok=True)
|
||||
slots = [{
|
||||
"key": f"bausteine-{topic}-sortierung-1",
|
||||
"prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_kategorien_block(mapping, entries), out_path=out),
|
||||
"role": "quick", "capabilities": "files",
|
||||
"payload": (lambda result: _resolve_reihenfolge(_json_datei(out), entries)),
|
||||
}]
|
||||
res = await _race(topic, "Sortierung", slots, 1, _timeout("sortierung", len(entries)), provider, cancelled=cancelled)
|
||||
if res is None:
|
||||
out.unlink(missing_ok=True)
|
||||
return None
|
||||
return res[0]
|
||||
|
||||
|
||||
async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None:
|
||||
if topic in _bausteine_progress:
|
||||
return
|
||||
@@ -482,31 +428,8 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
|
||||
|
||||
try:
|
||||
async with _semaphore:
|
||||
# Fertig, aber ohne Sortier-Marker (ältere Version): nur die Sortierung nachholen.
|
||||
if final_path.exists() and not files["sortierung"].exists():
|
||||
cats = _parse_kategorien(final_path.read_text(encoding="utf-8"))
|
||||
entries: dict[int, str] = {}
|
||||
mapping: dict[int, str] = {}
|
||||
i = 0
|
||||
for cat in _CATEGORIES:
|
||||
for text in cats.get(cat, []):
|
||||
i += 1
|
||||
entries[i] = text
|
||||
mapping[i] = cat
|
||||
entries = _eindeutige_titel(entries)
|
||||
if entries:
|
||||
set_p("Sortiere Bausteine…", step=5)
|
||||
order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled)
|
||||
if is_cancelled():
|
||||
abgebrochen()
|
||||
return
|
||||
if order is None:
|
||||
_bausteine_errors[topic] = "Sortierung fehlgeschlagen"
|
||||
return
|
||||
final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8")
|
||||
return
|
||||
|
||||
# „Neu erstellen": fertige (sortierte) Bausteine → kompletter Frischstart.
|
||||
files["arbeit"].mkdir(parents=True, exist_ok=True)
|
||||
# „Neu erstellen": fertige Bausteine → kompletter Frischstart.
|
||||
# Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume.
|
||||
if final_path.exists():
|
||||
for p_alt in _alle_slot_dateien(files):
|
||||
@@ -612,103 +535,47 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
|
||||
entries = _eindeutige_titel(entries)
|
||||
bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
|
||||
|
||||
# Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig (JSON-Dateien, Titel-validiert)
|
||||
# Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang
|
||||
n = len(entries)
|
||||
einordnungen: list[dict[int, str]] = []
|
||||
sortierungen: list[list[int]] = []
|
||||
offen = []
|
||||
for i, path in enumerate(files["einordnung"], 1):
|
||||
m = _resolve_kategorien(_json_datei(path), entries)
|
||||
if m is not None and len(einordnungen) < 3:
|
||||
einordnungen.append(m)
|
||||
for i, path in enumerate(files["sortierung"], 1):
|
||||
liste = _resolve_liste(_json_datei(path), entries)
|
||||
if liste is not None and len(sortierungen) < 3:
|
||||
sortierungen.append(liste)
|
||||
else:
|
||||
path.unlink(missing_ok=True)
|
||||
offen.append((i, path))
|
||||
vorhanden = len(einordnungen)
|
||||
set_p(f"Einordnung läuft ({vorhanden}/3 gültig)…", step=3)
|
||||
vorhanden = len(sortierungen)
|
||||
set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3)
|
||||
if vorhanden < 3:
|
||||
slots = [
|
||||
{
|
||||
"key": f"bausteine-{topic}-einordnung-{i}",
|
||||
"prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=bausteine_liste, out_path=path),
|
||||
"key": f"bausteine-{topic}-sortierung-{i}",
|
||||
"prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path),
|
||||
"role": "quick", "capabilities": "files",
|
||||
"payload": (lambda result, p=path: _resolve_kategorien(_json_datei(p), entries)),
|
||||
"payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)),
|
||||
}
|
||||
for i, path in offen
|
||||
]
|
||||
neue = await _race(
|
||||
topic, "Einordnung", slots, 3 - vorhanden, _timeout("einordnung", n), provider,
|
||||
on_update=lambda c: set_p(f"Einordnung läuft ({vorhanden + c}/3 gültig)…"),
|
||||
topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider,
|
||||
on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"),
|
||||
cancelled=is_cancelled,
|
||||
)
|
||||
if is_cancelled():
|
||||
abgebrochen()
|
||||
return
|
||||
if neue is None:
|
||||
_bausteine_errors[topic] = "Einordnung fehlgeschlagen (Quorum nicht erreicht)"
|
||||
_bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)"
|
||||
return
|
||||
einordnungen += neue
|
||||
sortierungen += neue
|
||||
|
||||
# Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent (antwortet nur mit Deltas, JSON)
|
||||
set_p("Verifiziere Einordnung…", step=4)
|
||||
mapping, disputes = _majority(einordnungen, entries)
|
||||
if disputes:
|
||||
_log(topic, f"Keine Mehrheit bei: {disputes}")
|
||||
|
||||
def _final_schema(data):
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
idx = _titel_index(entries)
|
||||
out: dict[int, str] = {}
|
||||
for t, cat in data.items():
|
||||
if not isinstance(t, str) or cat not in _CATEGORIES:
|
||||
return None
|
||||
num = _titel_aufloesen(idx, t)
|
||||
if num is not None:
|
||||
out[num] = cat
|
||||
return out # leeres Dict = alles bestätigt
|
||||
|
||||
fc_path = files["final_check"]
|
||||
overrides = _final_schema(_json_datei(fc_path))
|
||||
if overrides is None:
|
||||
fc_path.unlink(missing_ok=True)
|
||||
streit_block = "\n".join(f"- {entries[n]}" for n in disputes) or "(keine)"
|
||||
slots = [{
|
||||
"key": f"bausteine-{topic}-final-1",
|
||||
"prompt": _prompt(
|
||||
"Bausteine-Einordnung-Final",
|
||||
topic=topic, einordnung=_kategorien_block(mapping, entries),
|
||||
streitfaelle=streit_block, out_path=fc_path,
|
||||
),
|
||||
"role": "fast", "capabilities": "files",
|
||||
"payload": (lambda result: _final_schema(_json_datei(fc_path))),
|
||||
}]
|
||||
finals = await _race(topic, "Final", slots, 1, _timeout("final", n), provider, cancelled=is_cancelled)
|
||||
if is_cancelled():
|
||||
abgebrochen()
|
||||
return
|
||||
if finals is None:
|
||||
_log(topic, "Final-Verifikation fehlgeschlagen — Mehrheitsentscheid bleibt unverändert")
|
||||
overrides = {}
|
||||
else:
|
||||
overrides = finals[0]
|
||||
korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes}
|
||||
if korrekturen:
|
||||
_log(topic, f"Final-Verifikation korrigiert: { {_titel(entries[n]): c for n, c in korrekturen.items()} }")
|
||||
mapping.update(overrides)
|
||||
for num in disputes:
|
||||
if num not in mapping:
|
||||
_log(topic, f"Streitfall '{_titel(entries[num])}' unentschieden → REST")
|
||||
mapping[num] = "REST"
|
||||
|
||||
# Schritt 5: Sortierung innerhalb der Kategorien (einfach → komplex, nicht fatal)
|
||||
set_p("Sortiere Bausteine…", step=5)
|
||||
order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled)
|
||||
if is_cancelled():
|
||||
abgebrochen()
|
||||
return
|
||||
if order is None:
|
||||
_log(topic, "Sortierung fehlgeschlagen — Originalreihenfolge bleibt (Nachholen über ▶)")
|
||||
final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8")
|
||||
reihenfolge = _merge_sortierungen(topic, sortierungen, entries)
|
||||
final_path.write_text(
|
||||
"\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception as e:
|
||||
_bausteine_errors[topic] = str(e)[:2000]
|
||||
finally:
|
||||
@@ -720,34 +587,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
|
||||
|
||||
# --- Guide-Generierung: Bausteine → (Plan) → Writer → JSON ---
|
||||
|
||||
# Welche Baustein-Kategorien jedes Format abdeckt.
|
||||
FORMAT_COVERAGE = {
|
||||
"MiniGuide": ("KERN",),
|
||||
"Guide": ("KERN", "WICHTIG"),
|
||||
"FullGuide": ("KERN", "WICHTIG", "REST"),
|
||||
}
|
||||
|
||||
# Parallele Writer pro Format (OnePager hat einen eigenen Weg).
|
||||
WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4}
|
||||
|
||||
|
||||
def _parse_kategorien(text: str) -> dict[str, list[str]]:
|
||||
"""Parst die finale Baustein-Datei (## KERN/WICHTIG/REST mit nummerierten Einträgen)."""
|
||||
cats: dict[str, list[str]] = {}
|
||||
current = None
|
||||
for line in text.splitlines():
|
||||
s = line.strip()
|
||||
m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE)
|
||||
if m:
|
||||
current = m.group(1).upper()
|
||||
cats.setdefault(current, [])
|
||||
continue
|
||||
m = re.match(r"(\d+)[.)]\s+(.*\S)", s)
|
||||
if m and current:
|
||||
cats[current].append(m.group(2))
|
||||
return cats
|
||||
|
||||
|
||||
def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
|
||||
"""{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern."""
|
||||
if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list):
|
||||
@@ -1046,6 +889,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
|
||||
await update_guide(guide_id, status="generating", progress="Starte…", updated_at=now)
|
||||
|
||||
content_path = guide_content_path(topic, format_name)
|
||||
content_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
project = project_dir(topic) if project_dir(topic).is_dir() else None
|
||||
fragment_paths: list[Path] = []
|
||||
|
||||
@@ -1056,13 +900,13 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
|
||||
if format_name == "OnePager":
|
||||
chapters = await _generate_onepager(guide_id, topic, instructions, provider, project, content_path, fragment_paths)
|
||||
else:
|
||||
cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8"))
|
||||
selected: list[str] = []
|
||||
for cat in FORMAT_COVERAGE[format_name]:
|
||||
selected.extend(cats.get(cat, []))
|
||||
if not selected:
|
||||
await _fail(guide_id, "Keine passenden Bausteine gefunden")
|
||||
alle = _lade_bausteine(bausteine_path(topic).read_text(encoding="utf-8"))
|
||||
if not alle:
|
||||
await _fail(guide_id, "Keine Bausteine gefunden")
|
||||
return
|
||||
anteil, minimum = FORMAT_ANTEIL[format_name]
|
||||
k = min(len(alle), max(minimum, math.ceil(anteil * len(alle))))
|
||||
selected = [text for _, text in sorted(alle.items())][:k]
|
||||
entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)})
|
||||
facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema")
|
||||
chapters = await _generate_sections(
|
||||
|
||||
Reference in New Issue
Block a user