update
This commit is contained in:
@@ -15,7 +15,6 @@ TIMEOUTS = {
|
||||
"recherche": (1800, 0), # fix 30 min
|
||||
"auswahl": (600, 10),
|
||||
"auswahl_check": (300, 2),
|
||||
"sortierung": (300, 5),
|
||||
"plan": (300, 5),
|
||||
"writer": (600, 120), # pro Section im Chunk
|
||||
"onepager_recherche": (900, 0),
|
||||
@@ -23,11 +22,11 @@ TIMEOUTS = {
|
||||
"onepager_verify": (300, 0),
|
||||
}
|
||||
|
||||
# Welcher Anteil der sortierten Baustein-Liste in welches Format fließt: (Anteil, Mindestanzahl).
|
||||
# Auswahl-Auftrag je Format: (Mindest-Anteil, Mindestanzahl, Zweck).
|
||||
FORMAT_ANTEIL = {
|
||||
"MiniGuide": (0.10, 8),
|
||||
"Guide": (0.50, 20),
|
||||
"FullGuide": (1.00, 0),
|
||||
"MiniGuide": (0.05, 8, "einen kompakten Anfänger-Guide — der schnelle Einstieg ins Thema"),
|
||||
"Guide": (0.33, 20, "einen ausführlichen Anfänger-Guide — ein solides Fundament im Thema"),
|
||||
"FullGuide": (0.90, 0, "einen Komplett-Guide — das ganze Thema"),
|
||||
}
|
||||
|
||||
# Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden.
|
||||
|
||||
@@ -115,50 +115,6 @@ def _json_datei(path: Path):
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None:
|
||||
"""{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln
|
||||
oder zu geringer Abdeckung der Einträge."""
|
||||
if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list):
|
||||
return None
|
||||
idx = _titel_index(entries)
|
||||
nums: list[int] = []
|
||||
total = unknown = 0
|
||||
for t in data["reihenfolge"]:
|
||||
if not isinstance(t, str):
|
||||
return None
|
||||
total += 1
|
||||
num = _titel_aufloesen(idx, t)
|
||||
if num is None:
|
||||
unknown += 1
|
||||
elif num not in nums:
|
||||
nums.append(num)
|
||||
if total == 0:
|
||||
return None
|
||||
if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match:
|
||||
return None
|
||||
return nums
|
||||
|
||||
|
||||
def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]:
|
||||
"""Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende."""
|
||||
raenge: dict[int, list[int]] = {num: [] for num in entries}
|
||||
for liste in listen:
|
||||
for rang, num in enumerate(liste):
|
||||
if num in raenge:
|
||||
raenge[num].append(rang)
|
||||
ohne = [num for num, r in raenge.items() if not r]
|
||||
if ohne:
|
||||
_log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende")
|
||||
|
||||
def key(num: int):
|
||||
r = sorted(raenge[num])
|
||||
if not r:
|
||||
return (10**9, 10**9, num)
|
||||
return (r[len(r) // 2], sum(r) / len(r), num)
|
||||
|
||||
return sorted(entries, key=key)
|
||||
|
||||
|
||||
def _timeout(step: str, n: int = 0) -> int:
|
||||
base, per = TIMEOUTS[step]
|
||||
return base + per * n
|
||||
@@ -234,14 +190,14 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout:
|
||||
await asyncio.gather(*tasks.keys(), return_exceptions=True)
|
||||
|
||||
|
||||
# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) ---
|
||||
# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Prüfung — reines Inventar, unsortiert ---
|
||||
|
||||
_bausteine_progress: dict[str, str] = {}
|
||||
_bausteine_errors: dict[str, str] = {}
|
||||
_bausteine_cancelled: set[str] = set()
|
||||
_bausteine_step: dict[str, int] = {}
|
||||
|
||||
BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung")
|
||||
BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung")
|
||||
_CATEGORIES = ("KERN", "WICHTIG", "REST") # nur noch für den Altformat-Reader
|
||||
|
||||
|
||||
@@ -253,12 +209,11 @@ def _bausteine_files(topic: str) -> dict:
|
||||
"recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)],
|
||||
"auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)],
|
||||
"auswahl_check": arbeit / "auswahl-check.json",
|
||||
"sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)],
|
||||
}
|
||||
|
||||
|
||||
def _alle_slot_dateien(files: dict) -> list[Path]:
|
||||
return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]]
|
||||
return [*files["recherche"], *files["auswahl"], files["auswahl_check"]]
|
||||
|
||||
|
||||
def cancel_bausteine(topic: str) -> bool:
|
||||
@@ -531,49 +486,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
|
||||
texts = [t for _, t in sorted(entries.items())] + list(patch["nachtraege"])
|
||||
entries = {i: t for i, t in enumerate(texts, 1)}
|
||||
|
||||
# Ab hier ist der Titel der Schlüssel — eindeutig machen
|
||||
# Titel eindeutig machen und unsortiertes Inventar schreiben
|
||||
entries = _eindeutige_titel(entries)
|
||||
bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
|
||||
|
||||
# Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang
|
||||
n = len(entries)
|
||||
sortierungen: list[list[int]] = []
|
||||
offen = []
|
||||
for i, path in enumerate(files["sortierung"], 1):
|
||||
liste = _resolve_liste(_json_datei(path), entries)
|
||||
if liste is not None and len(sortierungen) < 3:
|
||||
sortierungen.append(liste)
|
||||
else:
|
||||
path.unlink(missing_ok=True)
|
||||
offen.append((i, path))
|
||||
vorhanden = len(sortierungen)
|
||||
set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3)
|
||||
if vorhanden < 3:
|
||||
slots = [
|
||||
{
|
||||
"key": f"bausteine-{topic}-sortierung-{i}",
|
||||
"prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path),
|
||||
"role": "quick", "capabilities": "files",
|
||||
"payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)),
|
||||
}
|
||||
for i, path in offen
|
||||
]
|
||||
neue = await _race(
|
||||
topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider,
|
||||
on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"),
|
||||
cancelled=is_cancelled,
|
||||
)
|
||||
if is_cancelled():
|
||||
abgebrochen()
|
||||
return
|
||||
if neue is None:
|
||||
_bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)"
|
||||
return
|
||||
sortierungen += neue
|
||||
|
||||
reihenfolge = _merge_sortierungen(topic, sortierungen, entries)
|
||||
final_path.write_text(
|
||||
"\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n",
|
||||
"\n".join(f"{i}. {t}" for i, t in entries.items()) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception as e:
|
||||
@@ -591,8 +507,11 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
|
||||
WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4}
|
||||
|
||||
|
||||
def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
|
||||
"""{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern."""
|
||||
def _resolve_gliederung(data, entries: dict[int, str], soll: int) -> list[dict] | None:
|
||||
"""{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}].
|
||||
|
||||
`soll` = Mindest-Anzahl gewählter Bausteine (mit kleiner Toleranz).
|
||||
"""
|
||||
if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list):
|
||||
return None
|
||||
idx = _titel_index(entries)
|
||||
@@ -615,11 +534,10 @@ def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
|
||||
chapters.append({"title": str(ch.get("titel", "")).strip() or "Kapitel", "nums": nums})
|
||||
if not chapters or total == 0:
|
||||
return None
|
||||
if (total - unknown) / total < 0.85 or len(seen) / len(entries) < 0.85:
|
||||
if (total - unknown) / total < 0.85:
|
||||
return None
|
||||
if len(seen) < 0.9 * soll:
|
||||
return None
|
||||
missing = sorted(set(entries) - seen)
|
||||
if missing:
|
||||
chapters.append({"title": "Weitere Themen", "nums": missing})
|
||||
return chapters
|
||||
|
||||
|
||||
@@ -792,33 +710,38 @@ async def _generate_sections(
|
||||
|
||||
spec = (TEMPLATES_DIR / "Format" / "Section.md").read_text(encoding="utf-8")
|
||||
bausteine_liste = "\n".join(f"- {t}" for t in entries.values())
|
||||
n = len(entries)
|
||||
anteil, minimum, zweck = FORMAT_ANTEIL[format_name]
|
||||
k = min(n, max(minimum, math.ceil(anteil * n)))
|
||||
auswahl_auftrag = (
|
||||
f"Wähle MINDESTENS {k} der Bausteine und baue daraus {zweck}. "
|
||||
"Wähle, was diesem Zweck dient — lass weg, was dafür nicht nötig ist."
|
||||
)
|
||||
|
||||
if format_name == "MiniGuide":
|
||||
# Ein Writer, gliedert selbst in Kapitel
|
||||
plan = None
|
||||
zuteilungen = [bausteine_liste]
|
||||
chunk_sizes = [len(entries)]
|
||||
else:
|
||||
await _set_progress(guide_id, "Plane Gliederung…")
|
||||
plan_path = content_path.parent / f"{content_path.stem}.gliederung.json"
|
||||
fragment_paths.append(plan_path)
|
||||
plan_path.unlink(missing_ok=True)
|
||||
slots = [{
|
||||
"key": f"{guide_id}-plan",
|
||||
"prompt": _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_liste, out_path=plan_path, extra=_extra(instructions)),
|
||||
"role": "guide", "capabilities": "files",
|
||||
"payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries)),
|
||||
}]
|
||||
res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", len(entries)), provider, cancelled=is_cancelled)
|
||||
if is_cancelled():
|
||||
return None
|
||||
if res is None:
|
||||
await _fail(guide_id, "Gliederung fehlgeschlagen")
|
||||
return None
|
||||
plan = res[0]
|
||||
chunks = _split_chunks(plan, WRITER_COUNT[format_name])
|
||||
zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks]
|
||||
chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks]
|
||||
await _set_progress(guide_id, "Wähle Bausteine & plane Gliederung…")
|
||||
plan_path = content_path.parent / f"{content_path.stem}.gliederung.json"
|
||||
fragment_paths.append(plan_path)
|
||||
plan_path.unlink(missing_ok=True)
|
||||
slots = [{
|
||||
"key": f"{guide_id}-plan",
|
||||
"prompt": _prompt(
|
||||
"Guide-Plan",
|
||||
topic=topic, format_name=format_name, bausteine=bausteine_liste,
|
||||
auswahl_auftrag=auswahl_auftrag, out_path=plan_path, extra=_extra(instructions),
|
||||
),
|
||||
"role": "guide", "capabilities": "files",
|
||||
"payload": (lambda result: _resolve_gliederung(_json_datei(plan_path), entries, k)),
|
||||
}]
|
||||
res = await _race(topic, "Gliederung", slots, 1, _timeout("plan", n), provider, cancelled=is_cancelled)
|
||||
if is_cancelled():
|
||||
return None
|
||||
if res is None:
|
||||
await _fail(guide_id, "Gliederung fehlgeschlagen")
|
||||
return None
|
||||
plan = res[0]
|
||||
chunks = _split_chunks(plan, WRITER_COUNT[format_name])
|
||||
zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks]
|
||||
chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks]
|
||||
|
||||
writer_count = len(zuteilungen)
|
||||
await _set_progress(guide_id, f"Schreibe Sections ({writer_count} Writer)…" if writer_count > 1 else "Schreibe Sections…")
|
||||
@@ -856,7 +779,6 @@ async def _generate_sections(
|
||||
await _set_progress(guide_id, "Setze zusammen…")
|
||||
idx = _titel_index(entries)
|
||||
by_num: dict[int, dict] = {}
|
||||
fragment_order: list[int] = []
|
||||
for sec in fragments:
|
||||
num = _titel_aufloesen(idx, sec["titel"])
|
||||
if num is None:
|
||||
@@ -864,26 +786,17 @@ async def _generate_sections(
|
||||
continue
|
||||
if num not in by_num:
|
||||
by_num[num] = sec
|
||||
fragment_order.append(num)
|
||||
|
||||
def section_json(num: int) -> dict:
|
||||
sec = by_num[num]
|
||||
return {"num": num, "title": _titel(entries[num]), "md": sec["md"]}
|
||||
|
||||
chapters: list[dict] = []
|
||||
if plan is None:
|
||||
# MiniGuide: Kapitel aus den Fragment-Markern in Datei-Reihenfolge
|
||||
for num in fragment_order:
|
||||
title = by_num[num]["kapitel"] or topic
|
||||
if not chapters or chapters[-1]["title"] != title:
|
||||
chapters.append({"title": title, "sections": []})
|
||||
chapters[-1]["sections"].append(section_json(num))
|
||||
else:
|
||||
for ch in plan:
|
||||
sections = [section_json(num) for num in ch["nums"] if num in by_num]
|
||||
if sections:
|
||||
chapters.append({"title": ch["title"], "sections": sections})
|
||||
missing = sorted(set(entries) - set(by_num))
|
||||
for ch in plan:
|
||||
sections = [
|
||||
{"num": num, "title": _titel(entries[num]), "md": by_num[num]["md"]}
|
||||
for num in ch["nums"] if num in by_num
|
||||
]
|
||||
if sections:
|
||||
chapters.append({"title": ch["title"], "sections": sections})
|
||||
geplant = {num for ch in plan for num in ch["nums"]}
|
||||
missing = sorted(geplant - set(by_num))
|
||||
if missing:
|
||||
_log(topic, f"Sections fehlen in der Writer-Ausgabe: {[_titel(entries[n]) for n in missing]}")
|
||||
if not chapters:
|
||||
@@ -913,10 +826,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
|
||||
if not alle:
|
||||
await _fail(guide_id, "Keine Bausteine gefunden")
|
||||
return
|
||||
anteil, minimum = FORMAT_ANTEIL[format_name]
|
||||
k = min(len(alle), max(minimum, math.ceil(anteil * len(alle))))
|
||||
selected = [text for _, text in sorted(alle.items())][:k]
|
||||
entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)})
|
||||
entries = _eindeutige_titel(alle)
|
||||
facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema")
|
||||
chapters = await _generate_sections(
|
||||
guide_id, topic, format_name, entries,
|
||||
|
||||
Reference in New Issue
Block a user