From 4aa313080760cfeb27de1703b201417e8f89cb4c Mon Sep 17 00:00:00 2001 From: Team3 Date: Sat, 6 Jun 2026 16:07:04 +0200 Subject: [PATCH] update --- backend/agents.py | 1 - backend/config.py | 28 +- backend/database.py | 33 + backend/generator.py | 688 ++++++++++++++---- backend/models.py | 11 + backend/paths.py | 2 +- backend/routes.py | 39 +- frontend/src/App.vue | 47 +- frontend/src/api.js | 16 + frontend/src/components/TopicSidebar.vue | 84 ++- templates/Prompt/Bausteine-Auswahl-Check.md | 19 + .../Prompt/Bausteine-Einordnung-Final.md | 30 +- templates/Prompt/Bausteine-Sortierung.md | 17 + templates/Prompt/OnePager-Bauen.md | 14 + templates/Prompt/OnePager-Quelle-Projekt.md | 1 + templates/Prompt/OnePager-Quelle-Thema.md | 1 + templates/Prompt/OnePager-Recherche.md | 8 + templates/Prompt/OnePager-Verifikation.md | 17 + templates/Prompt/OnePager.md | 11 - 19 files changed, 861 insertions(+), 206 deletions(-) create mode 100644 templates/Prompt/Bausteine-Auswahl-Check.md create mode 100644 templates/Prompt/Bausteine-Sortierung.md create mode 100644 templates/Prompt/OnePager-Bauen.md create mode 100644 templates/Prompt/OnePager-Quelle-Projekt.md create mode 100644 templates/Prompt/OnePager-Quelle-Thema.md create mode 100644 templates/Prompt/OnePager-Recherche.md create mode 100644 templates/Prompt/OnePager-Verifikation.md delete mode 100644 templates/Prompt/OnePager.md diff --git a/backend/agents.py b/backend/agents.py index 7476f69..4a9ce97 100644 --- a/backend/agents.py +++ b/backend/agents.py @@ -63,7 +63,6 @@ async def run_agent( return 1, "", f"Unbekannter Provider: {provider}" if shutil.which(PROVIDERS[provider]["cli"]) is None: return 1, "", f"CLI '{PROVIDERS[provider]['cli']}' nicht installiert (Provider: {provider})" - timeout = int(timeout * PROVIDERS[provider].get("timeout_factor", 1)) if provider == "minimax": return await _run_opencode(agent_key, prompt, timeout, role, capabilities) return await _run_claude_cli(agent_key, prompt, timeout, role, capabilities) diff --git a/backend/config.py b/backend/config.py index 2dc8fbe..7f350f3 100644 --- a/backend/config.py +++ b/backend/config.py @@ -7,26 +7,42 @@ FRONTEND_DIST = PROJECT_ROOT / "frontend" / "dist" DB_PATH = STORAGE_DIR / "creator.db" PROJECTS_DIR = PROJECT_ROOT / "projects" -AGENT_TIMEOUT = 3600 - MAX_CONCURRENT_GENERATIONS = 10 +# Timeouts pro Agenten-Schritt: (Basis-Sekunden, Sekunden pro Baustein/Section). +# Gilt für alle Provider gleich — wer zu langsam ist, wird neu gestartet bzw. überholt. +TIMEOUTS = { + "recherche": (1800, 0), # fix 30 min + "auswahl": (600, 10), + "auswahl_check": (300, 2), + "einordnung": (300, 5), + "final": (300, 2), # verifiziert nur noch, kleiner Output + "sortierung": (300, 2), + "plan": (300, 5), + "writer": (600, 120), # pro Section im Chunk + "onepager_recherche": (900, 0), + "onepager_bauen": (300, 0), + "onepager_verify": (300, 0), +} + # Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden. -# Rollen: "guide" = große Generierung, "fast" = Baustein-Recherche/Chat. +# Rollen: "quick" = Massenarbeit (Recherche, Einordnung), +# "fast" = Urteilsaufgaben mit kleinem Output (Auswahl, Final, OnePager, Chat), +# "guide" = große Generierung (Plan, Writer). DEFAULT_PROVIDER = "claude" PROVIDERS = { "claude": { "cli": "claude", "guide": "claude-opus-4-8[1m]", "fast": "claude-sonnet-4-6", + "quick": "claude-haiku-4-5", "env_key": None, # Auth via CLAUDE_CODE_OAUTH_TOKEN oder ~/.claude - "timeout_factor": 1, }, "minimax": { "cli": "opencode", "guide": "minimax/MiniMax-M3", - "fast": "minimax/MiniMax-M3", + "fast": "minimax/MiniMax-M2.7-highspeed", + "quick": "minimax/MiniMax-M2.7-highspeed", "env_key": "MINIMAX_API_KEY", - "timeout_factor": 3, # M3 ist bei großen Dokumenten deutlich langsamer }, } diff --git a/backend/database.py b/backend/database.py index 339875b..52dcf85 100644 --- a/backend/database.py +++ b/backend/database.py @@ -24,6 +24,13 @@ CREATE TABLE IF NOT EXISTS guide_progress ( ) """ +CREATE_TOPICS = """ +CREATE TABLE IF NOT EXISTS topics ( + name TEXT PRIMARY KEY, + created_at TEXT NOT NULL +) +""" + _db: aiosqlite.Connection | None = None @@ -39,6 +46,7 @@ async def init_db(): db = await get_db() await db.execute(CREATE_GUIDES) await db.execute(CREATE_PROGRESS) + await db.execute(CREATE_TOPICS) await db.execute( "UPDATE guides SET status = 'error', progress = NULL, error_msg = 'Server-Neustart' " "WHERE status IN ('queued', 'generating')" @@ -100,6 +108,31 @@ async def delete_guide(guide_id: str) -> bool: return cursor.rowcount > 0 +# --- Themen --- + +async def create_topic(name: str) -> None: + from datetime import datetime, timezone + db = await get_db() + await db.execute( + "INSERT OR IGNORE INTO topics (name, created_at) VALUES (?, ?)", + (name, datetime.now(timezone.utc).isoformat()), + ) + await db.commit() + + +async def list_topics() -> list[str]: + db = await get_db() + cursor = await db.execute("SELECT name FROM topics ORDER BY created_at DESC") + rows = await cursor.fetchall() + return [row[0] for row in rows] + + +async def delete_topic(name: str) -> None: + db = await get_db() + await db.execute("DELETE FROM topics WHERE name = ?", (name,)) + await db.commit() + + # --- Kapitel-Fortschritt --- async def list_progress(guide_id: str) -> list[str]: diff --git a/backend/generator.py b/backend/generator.py index c6ec892..4939021 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -2,14 +2,15 @@ import asyncio import json import re import uuid +from collections import Counter from datetime import datetime, timezone from pathlib import Path from agents import run_agent, kill_process from config import ( - AGENT_TIMEOUT, DEFAULT_PROVIDER, TEMPLATES_DIR, + TIMEOUTS, MAX_CONCURRENT_GENERATIONS, ) from database import update_guide @@ -70,20 +71,148 @@ async def _fail(guide_id: str, msg: str) -> None: await update_guide(guide_id, status="error", progress=None, error_msg=msg, updated_at=now) -# --- Bausteine-Pipeline: 3x Recherche → Auswahl → 2x Einordnung → finale Einordnung --- +def _timeout(step: str, n: int = 0) -> int: + base, per = TIMEOUTS[step] + return base + per * n + + +_MAX_RESTARTS = 2 + + +async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout: int, provider: str, on_update=None, cancelled=None) -> list | None: + """Startet alle Slots parallel und sammelt `quorum` gültige Ergebnisse. + + Slot-Spec: {key, prompt, role, capabilities, payload}. `payload(result)` + prüft die Gültigkeit und liefert das Slot-Ergebnis oder None. + Fehler/Timeout/ungültig → Slot-Neustart (max. _MAX_RESTARTS). Sobald das + Quorum steht, werden die übrigen Agenten gekillt. None = Quorum verfehlt. + `cancelled()` → True bricht ab (keine Restarts, Rückgabe None). + """ + attempts = {i: 0 for i in range(len(slots))} + tasks: dict[asyncio.Task, int] = {} + + def spawn(i: int) -> None: + slot = slots[i] + task = asyncio.create_task(run_agent( + slot["key"], slot["prompt"], timeout, + provider=provider, role=slot["role"], capabilities=slot["capabilities"], + )) + tasks[task] = i + + for i in range(len(slots)): + spawn(i) + + results: list = [] + try: + while tasks: + if cancelled and cancelled(): + return None + done, _ = await asyncio.wait(tasks.keys(), return_when=asyncio.FIRST_COMPLETED) + for task in done: + i = tasks.pop(task) + payload, err = None, None + try: + result = task.result() + if result[0] != 0: + err = _claude_error("Fehler", *result) + else: + payload = slots[i]["payload"](result) + if payload is None: + err = "Ergebnis ungültig/nicht parsebar" + except asyncio.TimeoutError: + err = f"Timeout nach {timeout}s" + except Exception as e: + err = f"{type(e).__name__}: {e}" + + if payload is not None: + results.append(payload) + if on_update: + on_update(len(results)) + if len(results) >= quorum: + return results + continue + + _log(topic, f"{label} {i + 1} (Versuch {attempts[i] + 1}): {err}") + attempts[i] += 1 + if attempts[i] <= _MAX_RESTARTS and not (cancelled and cancelled()): + spawn(i) + _log(topic, f"{label}: Quorum {quorum} nicht erreicht ({len(results)} gültig)") + return None + finally: + for task, i in tasks.items(): + kill_process(slots[i]["key"]) + task.cancel() + if tasks: + await asyncio.gather(*tasks.keys(), return_exceptions=True) + + +# --- Bausteine-Pipeline: 4x Recherche (3 nötig) → 2x Auswahl (1) → 4x Einordnung (3) → 2x Final (1) --- _bausteine_progress: dict[str, str] = {} _bausteine_errors: dict[str, str] = {} +_bausteine_cancelled: set[str] = set() +_bausteine_step: dict[str, int] = {} + +BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Einordnung", "Verifikation", "Sortierung") + + +def cancel_bausteine(topic: str) -> bool: + if topic not in _bausteine_progress: + return False + _bausteine_cancelled.add(topic) + kill_process(f"bausteine-{topic}-") + return True _CATEGORIES = ("KERN", "WICHTIG", "REST") +def _resume_step(topic: str) -> int: + """Erster noch offener Schritt anhand der persistierten Zwischendateien.""" + final_path = bausteine_path(topic) + stem, parent = final_path.stem, final_path.parent + if sum((parent / f"{stem}.recherche-{i}.md").exists() for i in (1, 2, 3, 4)) < 3: + return 0 + if not any((parent / f"{stem}.auswahl-{i}.md").exists() for i in (1, 2)): + return 1 + if not (parent / f"{stem}.auswahl-check.md").exists(): + return 2 + if sum((parent / f"{stem}.einordnung-{i}.md").exists() for i in (1, 2, 3)) < 3: + return 3 + if not (parent / f"{stem}.final-check.md").exists(): + return 4 + return 5 + + +def _sortierung_path(topic: str): + final_path = bausteine_path(topic) + return final_path.parent / f"{final_path.stem}.sortierung.md" + + def bausteine_status(topic: str) -> dict: + ready = bausteine_path(topic).exists() + generating = topic in _bausteine_progress + partial = False + if generating: + current = _bausteine_step.get(topic) + states = [ + "pending" if current is None else "done" if i < current else "active" if i == current else "pending" + for i in range(len(BAUSTEINE_STEPS)) + ] + elif ready: + states = ["done"] * len(BAUSTEINE_STEPS) + if not _sortierung_path(topic).exists(): + states[-1] = "pending" + else: + nxt = _resume_step(topic) + partial = nxt > 0 + states = ["done" if i < nxt else "pending" for i in range(len(BAUSTEINE_STEPS))] return { - "ready": bausteine_path(topic).exists(), - "generating": topic in _bausteine_progress, + "ready": ready, + "generating": generating, "progress": _bausteine_progress.get(topic), "error": _bausteine_errors.get(topic), + "partial": partial, + "steps": [{"label": label, "state": s} for label, s in zip(BAUSTEINE_STEPS, states)], } @@ -92,7 +221,16 @@ def active_bausteine() -> list[dict]: def reset_bausteine(topic: str) -> None: - bausteine_path(topic).unlink(missing_ok=True) + final_path = bausteine_path(topic) + final_path.unlink(missing_ok=True) + for i in (1, 2, 3, 4): + (final_path.parent / f"{final_path.stem}.recherche-{i}.md").unlink(missing_ok=True) + (final_path.parent / f"{final_path.stem}.einordnung-{i}.md").unlink(missing_ok=True) + for i in (1, 2): + (final_path.parent / f"{final_path.stem}.auswahl-{i}.md").unlink(missing_ok=True) + (final_path.parent / f"{final_path.stem}.auswahl-check.md").unlink(missing_ok=True) + (final_path.parent / f"{final_path.stem}.final-check.md").unlink(missing_ok=True) + (final_path.parent / f"{final_path.stem}.sortierung.md").unlink(missing_ok=True) _bausteine_errors.pop(topic, None) @@ -142,25 +280,125 @@ def _parse_einordnung(text: str) -> dict[int, str]: return mapping -def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str]) -> str: - """Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung.""" - grouped: dict[str, list[str]] = {c: [] for c in _CATEGORIES} +def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str], order: dict[str, list[int]] | None = None) -> str: + """Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung. + + `order` (Kategorie → Nummern in Lernreihenfolge) sortiert innerhalb der + Kategorien; nicht gelistete Nummern hängen in Originalreihenfolge hinten an. + """ + grouped: dict[str, list[int]] = {c: [] for c in _CATEGORIES} for num in sorted(entries): cat = mapping.get(num) if cat is None: _log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST") cat = "REST" - grouped[cat].append(entries[num]) + grouped[cat].append(num) unknown = sorted(set(mapping) - set(entries)) if unknown: _log(topic, f"finale Einordnung enthält unbekannte Nummern (ignoriert): {unknown}") + if order: + for cat in _CATEGORIES: + wanted = set(grouped[cat]) + seq = [n for n in order.get(cat, []) if n in wanted] + grouped[cat] = seq + [n for n in grouped[cat] if n not in seq] parts = [] for cat in _CATEGORIES: - lines = "\n".join(f"{i}. {text}" for i, text in enumerate(grouped[cat], 1)) + lines = "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(grouped[cat], 1)) parts.append(f"## {cat}\n{lines}") return "\n\n".join(parts) + "\n" +def _file_payload(path: Path): + """Gültig, wenn die Slot-Datei existiert und nummerierte Einträge enthält.""" + if not path.exists(): + return None + text = path.read_text(encoding="utf-8") + return text if _parse_auswahl(text) else None + + +def _auswahl_payload(path: Path): + if not path.exists(): + return None + text = path.read_text(encoding="utf-8") + entries = _parse_auswahl(text) + return (text, entries) if entries else None + + +def _parse_auswahl_check(text: str): + """Parst die Auswahl-Prüfung: NACHTRÄGE (neue Einträge) + STREICHEN (Nummern).""" + additions: list[str] = [] + removals: set[int] = set() + mode = None + seen_marker = False + for line in text.splitlines(): + s = line.strip().lstrip("-*# ").strip() + if not s: + continue + u = s.upper().rstrip(":") + if u.startswith("NACHTR"): + mode = "add" + seen_marker = True + continue + if u.startswith("STREICH"): + mode = "del" + seen_marker = True + continue + if u == "OK": + seen_marker = True + continue + if mode == "add": + additions.append(s) + elif mode == "del": + m = re.match(r"(\d+)\b", s) + if m: + removals.add(int(m.group(1))) + if not seen_marker: + return None # Antwort hat das Format nicht getroffen + return {"add": additions, "remove": removals} + + +def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]: + """Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall.""" + mapping: dict[int, str] = {} + disputes: list[int] = [] + for num in entries: + votes = [m[num] for m in mappings if num in m] + if not votes: + disputes.append(num) + continue + cat, count = Counter(votes).most_common(1)[0] + if count >= 2: + mapping[num] = cat + else: + disputes.append(num) + return mapping, disputes + + +def _einordnung_block(mapping: dict[int, str], entries: dict[int, str]) -> str: + parts = [] + for cat in _CATEGORIES: + nums = [n for n in sorted(entries) if mapping.get(n) == cat] + lines = "\n".join(f"{n} {_titel(entries[n])}" for n in nums) + parts.append(f"{cat}:\n{lines}" if lines else f"{cat}:") + return "\n".join(parts) + + +async def _run_sortierung(topic: str, entries: dict[int, str], mapping: dict[int, str], provider: str, cancelled) -> dict[str, list[int]] | None: + """Sortiert innerhalb der Kategorien; schreibt bei Erfolg den Marker und liefert die Reihenfolge.""" + slots = [{ + "key": f"bausteine-{topic}-sortierung-1", + "prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_einordnung_block(mapping, entries)), + "role": "quick", "capabilities": "none", + "payload": (lambda result: (result[1].strip(), _parse_einordnung(result[1])) if _parse_einordnung(result[1]) else None), + }] + res = await _race(topic, "Sortierung", slots, 1, _timeout("sortierung", len(entries)), provider, cancelled=cancelled) + if res is None: + return None + raw, sort_mapping = res[0] + _sortierung_path(topic).write_text(raw, encoding="utf-8") + return {cat: [num for num, c in sort_mapping.items() if c == cat] for cat in _CATEGORIES} + + async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: if topic in _bausteine_progress: return @@ -170,107 +408,252 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = final_path = bausteine_path(topic) project = project_dir(topic) if project_dir(topic).is_dir() else None stem = final_path.stem - recherche_paths = [final_path.parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3)] - auswahl_path = final_path.parent / f"{stem}.auswahl.md" + recherche_paths = [final_path.parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)] + auswahl_paths = [final_path.parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)] + einordnung_paths = [final_path.parent / f"{stem}.einordnung-{i}.md" for i in (1, 2, 3)] + auswahl_check_path = final_path.parent / f"{stem}.auswahl-check.md" + final_check_path = final_path.parent / f"{stem}.final-check.md" + sortierung_path = _sortierung_path(topic) + slot_files = [*recherche_paths, *auswahl_paths, *einordnung_paths, auswahl_check_path, final_check_path, sortierung_path] + + def set_p(msg: str, step: int | None = None) -> None: + _bausteine_progress[topic] = msg + if step is not None: + _bausteine_step[topic] = step + + def is_cancelled() -> bool: + return topic in _bausteine_cancelled + + def abgebrochen() -> None: + _bausteine_errors[topic] = "Abgebrochen — Fortschritt bleibt erhalten" try: async with _semaphore: - # Schritt 1: 3 Recherche-Agenten parallel (Thema: Websuche, Projekt: Dateien lesen) - _bausteine_progress[topic] = "Recherche läuft (3 Agenten)…" - caps = "files" if project else "full" - results = await asyncio.gather(*[ - run_agent( - f"bausteine-{topic}-recherche-{i}", - _build_recherche_prompt(topic, path, instructions, project), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities=caps, - ) - for i, path in enumerate(recherche_paths, 1) - ], return_exceptions=True) - for i, (r, p) in enumerate(zip(results, recherche_paths), 1): - if isinstance(r, BaseException): - _log(topic, f"Recherche {i}: {type(r).__name__}: {r}") - elif r[0] != 0: - _log(topic, f"Recherche {i}: {_claude_error('Fehler', *r)}") - elif not p.exists(): - _log(topic, f"Recherche {i}: keine Ausgabedatei erstellt") - recherchen = [p.read_text(encoding="utf-8") for p in recherche_paths if p.exists()] - if not recherchen: - _bausteine_errors[topic] = _gather_error("Recherche-Fehler", results) + # Fertig, aber ohne Sortier-Marker (ältere Pipeline-Version): nur die Sortierung nachholen. + if final_path.exists() and not sortierung_path.exists(): + cats = _parse_kategorien(final_path.read_text(encoding="utf-8")) + entries, mapping = {}, {} + i = 0 + for cat in _CATEGORIES: + for text in cats.get(cat, []): + i += 1 + entries[i] = text + mapping[i] = cat + if entries: + set_p("Sortiere Bausteine…", step=5) + order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled) + if is_cancelled(): + abgebrochen() + return + if order is None: + _bausteine_errors[topic] = "Sortierung fehlgeschlagen" + return + final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8") return - # Schritt 2: Auswahl-Agent konsolidiert die Ergebnisse (ohne Quellen) - _bausteine_progress[topic] = f"Konsolidiere Recherche ({len(recherchen)}/3 erfolgreich)…" - results_block = "\n\n".join(f"### Recherche {i}\n\n{text}" for i, text in enumerate(recherchen, 1)) - returncode, stdout, stderr = await run_agent( - f"bausteine-{topic}-auswahl", - _prompt("Bausteine-Auswahl", topic=topic, results=results_block, out_path=auswahl_path), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities="files", - ) - if returncode != 0 or not auswahl_path.exists(): - _bausteine_errors[topic] = _claude_error("Auswahl-Fehler", returncode, stdout, stderr) - return - flat = auswahl_path.read_text(encoding="utf-8") - entries = _parse_auswahl(flat) - if not entries: - _bausteine_errors[topic] = "Auswahl-Liste nicht parsebar" - return + # „Neu erstellen": fertige (sortierte) Bausteine → kompletter Frischstart. + # Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume, fertige Schritte überspringen. + if final_path.exists(): + for p_alt in slot_files: + p_alt.unlink(missing_ok=True) - # Schritt 3: 2 Einordnungs-Agenten parallel (antworten nur mit Nummer+Titel je Kategorie) - _bausteine_progress[topic] = "Einordnung läuft (2 Agenten)…" - results = await asyncio.gather(*[ - run_agent( - f"bausteine-{topic}-einordnung-{i}", - _prompt("Bausteine-Einordnung", topic=topic, bausteine=flat), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", - ) - for i in (1, 2) - ], return_exceptions=True) - einordnungen = [] - for i, r in enumerate(results, 1): - if isinstance(r, BaseException): - _log(topic, f"Einordnung {i}: {type(r).__name__}: {r}") - elif r[0] != 0: - _log(topic, f"Einordnung {i}: {_claude_error('Fehler', *r)}") - elif not _parse_einordnung(r[1]): - _log(topic, f"Einordnung {i}: Antwort nicht parsebar") + # Schritt 1: 4 Recherche-Agenten, 3 gültige nötig — vorhandene Slot-Dateien zählen + recherchen = [] + offen = [] + for i, path in enumerate(recherche_paths, 1): + text = _file_payload(path) + if text is not None and len(recherchen) < 3: + recherchen.append(text) else: - einordnungen.append(r[1].strip()) - if not einordnungen: - _bausteine_errors[topic] = _gather_error("Einordnungs-Fehler", results) - return + offen.append((i, path)) + vorhanden = len(recherchen) + set_p(f"Recherche läuft ({vorhanden}/3 gültig)…", step=0) + if vorhanden < 3: + caps = "files" if project else "full" + slots = [ + { + "key": f"bausteine-{topic}-recherche-{i}", + "prompt": _build_recherche_prompt(topic, path, instructions, project), + "role": "quick", "capabilities": caps, + "payload": (lambda result, p=path: _file_payload(p)), + } + for i, path in offen + ] + neue = await _race( + topic, "Recherche", slots, 3 - vorhanden, _timeout("recherche"), provider, + on_update=lambda c: set_p(f"Recherche läuft ({vorhanden + c}/3 gültig)…"), + cancelled=is_cancelled, + ) + if is_cancelled(): + abgebrochen() + return + if neue is None: + _bausteine_errors[topic] = "Recherche fehlgeschlagen (Quorum nicht erreicht)" + return + recherchen += neue - # Schritt 4: finale Einordnung — Python validiert und baut die Datei - _bausteine_progress[topic] = f"Finale Einordnung ({len(einordnungen)}/2 erfolgreich)…" - returncode, stdout, stderr = await run_agent( - f"bausteine-{topic}-final", - _prompt( + # Schritt 2: 2 Auswahl-Agenten, der erste gewinnt — vorhandene gültige Datei wird übernommen + n_est = max(len(_parse_auswahl(t)) for t in recherchen) + results_block = "\n\n".join(f"### Recherche {i}\n\n{text}" for i, text in enumerate(recherchen, 1)) + bestehende = next((res for p in auswahl_paths if (res := _auswahl_payload(p)) is not None), None) + if bestehende is not None: + flat, entries = bestehende + else: + set_p("Konsolidiere Recherche…", step=1) + slots = [ + { + "key": f"bausteine-{topic}-auswahl-{i}", + "prompt": _prompt("Bausteine-Auswahl", topic=topic, results=results_block, out_path=path), + "role": "fast", "capabilities": "files", + "payload": (lambda result, p=path: _auswahl_payload(p)), + } + for i, path in enumerate(auswahl_paths, 1) + ] + auswahl = await _race(topic, "Auswahl", slots, 1, _timeout("auswahl", n_est), provider, cancelled=is_cancelled) + if is_cancelled(): + abgebrochen() + return + if auswahl is None: + _bausteine_errors[topic] = "Auswahl fehlgeschlagen (kein gültiges Ergebnis)" + return + flat, entries = auswahl[0] + + # Schritt 2b: Auswahl-Prüfung (nicht fatal) — gespeicherte Antwort wird erneut angewendet + set_p("Prüfe Auswahl…", step=2) + raw_check = auswahl_check_path.read_text(encoding="utf-8") if auswahl_check_path.exists() else None + patch = _parse_auswahl_check(raw_check) if raw_check is not None else None + if patch is None: + slots = [{ + "key": f"bausteine-{topic}-auswahlcheck-1", + "prompt": _prompt("Bausteine-Auswahl-Check", topic=topic, results=results_block, auswahl=flat), + "role": "fast", "capabilities": "none", + "payload": (lambda result: (result[1].strip(), _parse_auswahl_check(result[1])) if _parse_auswahl_check(result[1]) is not None else None), + }] + checks = await _race(topic, "Auswahl-Check", slots, 1, _timeout("auswahl_check", len(entries)), provider, cancelled=is_cancelled) + if is_cancelled(): + abgebrochen() + return + if checks is None: + _log(topic, "Auswahl-Check fehlgeschlagen — fahre ohne Korrekturen fort") + else: + raw_check, patch = checks[0] + auswahl_check_path.write_text(raw_check, encoding="utf-8") + if patch is not None: + if patch["remove"]: + _log(topic, f"Auswahl-Check streicht Duplikate: {sorted(patch['remove'])}") + entries = {n: t for n, t in entries.items() if n not in patch["remove"]} + if patch["add"]: + _log(topic, f"Auswahl-Check ergänzt {len(patch['add'])} Bausteine") + if patch["remove"] or patch["add"]: + texts = [t for _, t in sorted(entries.items())] + patch["add"] + entries = {i: t for i, t in enumerate(texts, 1)} + flat = "\n".join(f"{i}. {t}" for i, t in entries.items()) + + # Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig — gespeicherte Stimmen einlesen + n = len(entries) + einordnungen = [] + for path in einordnung_paths: + if path.exists(): + text = path.read_text(encoding="utf-8") + parsed = _parse_einordnung(text) + if parsed: + einordnungen.append((text, parsed)) + einordnungen = einordnungen[:3] + vorhanden = len(einordnungen) + set_p(f"Einordnung läuft ({vorhanden}/3 gültig)…", step=3) + if vorhanden < 3: + slots = [ + { + "key": f"bausteine-{topic}-einordnung-{i}", + "prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=flat), + "role": "quick", "capabilities": "none", + "payload": (lambda result: (result[1].strip(), _parse_einordnung(result[1])) if _parse_einordnung(result[1]) else None), + } + for i in range(vorhanden + 1, 5) + ] + neue = await _race( + topic, "Einordnung", slots, 3 - vorhanden, _timeout("einordnung", n), provider, + on_update=lambda c: set_p(f"Einordnung läuft ({vorhanden + c}/3 gültig)…"), + cancelled=is_cancelled, + ) + if is_cancelled(): + abgebrochen() + return + if neue is None: + _bausteine_errors[topic] = "Einordnung fehlgeschlagen (Quorum nicht erreicht)" + return + for path, (text, _) in zip(einordnung_paths[vorhanden:], neue): + path.write_text(text, encoding="utf-8") + einordnungen += neue + + # Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent — gespeicherte Antwort wird erneut angewendet + set_p("Verifiziere Einordnung…", step=4) + mapping, disputes = _majority([m for _, m in einordnungen], entries) + if disputes: + _log(topic, f"Keine Mehrheit bei: {disputes}") + raw_final = final_check_path.read_text(encoding="utf-8") if final_check_path.exists() else None + if raw_final is not None and not (_parse_einordnung(raw_final) or "OK" in raw_final.upper()): + raw_final = None + if raw_final is None: + streit_block = "\n".join(f"{num} {entries[num]}" for num in disputes) or "(keine)" + final_prompt = _prompt( "Bausteine-Einordnung-Final", - topic=topic, bausteine=flat, - einordnung_1=einordnungen[0], einordnung_2=einordnungen[-1], - ), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", - ) - if returncode != 0: - _bausteine_errors[topic] = _claude_error("Finale-Einordnungs-Fehler", returncode, stdout, stderr) + topic=topic, + einordnung=_einordnung_block(mapping, entries), + streitfaelle=streit_block, + ) + slots = [ + { + "key": f"bausteine-{topic}-final-{i}", + "prompt": final_prompt, + "role": "fast", "capabilities": "none", + "payload": (lambda result: result[1].strip() if (_parse_einordnung(result[1]) or "OK" in result[1].upper()) else None), + } + for i in (1, 2) + ] + finals = await _race(topic, "Final", slots, 1, _timeout("final", n), provider, cancelled=is_cancelled) + if is_cancelled(): + abgebrochen() + return + if finals is None: + _log(topic, "Final-Verifikation fehlgeschlagen — Mehrheitsentscheid bleibt unverändert") + else: + raw_final = finals[0] + final_check_path.write_text(raw_final, encoding="utf-8") + if raw_final is not None: + overrides = {num: cat for num, cat in _parse_einordnung(raw_final).items() if num in entries} + korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes} + if korrekturen: + _log(topic, f"Final-Verifikation korrigiert: {korrekturen}") + mapping.update(overrides) + for num in disputes: + if num not in mapping: + _log(topic, f"Streitfall {num} unentschieden → WICHTIG") + mapping[num] = "WICHTIG" + + # Schritt 5: Sortierung innerhalb der Kategorien (einfach → komplex, nicht fatal) + set_p("Sortiere Bausteine…", step=5) + order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled) + if is_cancelled(): + abgebrochen() return - mapping = _parse_einordnung(stdout) - if not mapping: - _bausteine_errors[topic] = "Finale Einordnung nicht parsebar" - return - final_path.write_text(_build_final_bausteine(topic, entries, mapping), encoding="utf-8") + if order is None: + _log(topic, "Sortierung fehlgeschlagen — Originalreihenfolge bleibt (Nachholen über ▶)") + final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8") except Exception as e: _bausteine_errors[topic] = str(e)[:2000] finally: + # Kein Datei-Cleanup: Zwischendateien bleiben für Resume bzw. Nachvollziehbarkeit. + # Aufräumen passiert nur explizit über reset_bausteine(). _bausteine_progress.pop(topic, None) - for p in [*recherche_paths, auswahl_path]: - p.unlink(missing_ok=True) + _bausteine_step.pop(topic, None) + _bausteine_cancelled.discard(topic) # --- Guide-Generierung: Bausteine → (Plan) → Writer → JSON --- # Welche Baustein-Kategorien jedes Format abdeckt. FORMAT_COVERAGE = { - "OnePager": ("KERN",), "MiniGuide": ("KERN",), "Guide": ("KERN", "WICHTIG"), "FullGuide": ("KERN", "WICHTIG", "REST"), @@ -388,34 +771,76 @@ def _section_json(sec: dict, entries: dict[int, str]) -> dict: return {"num": sec["num"], "title": sec["title"] or _titel(entries[sec["num"]]), "md": sec["md"]} -async def _generate_onepager(guide_id: str, topic: str, entries: dict[int, str], instructions: str, provider: str) -> list[dict] | None: - await _set_progress(guide_id, "Generiere OnePager…") - bausteine_block = "\n".join(f"{i}. {t}" for i, t in entries.items()) - returncode, stdout, stderr = await run_agent( - f"{guide_id}-onepager", - _prompt("OnePager", topic=topic, bausteine=bausteine_block, extra=_extra(instructions)), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", - ) - if guide_id in _cancelled: +async def _generate_onepager( + guide_id: str, topic: str, instructions: str, provider: str, + project: Path | None, content_path: Path, fragment_paths: list[Path], +) -> list[dict] | None: + def is_cancelled() -> bool: + return guide_id in _cancelled + + # Schritt 1: Recherche — eigene Faktenbasis, unabhängig von den Bausteinen + await _set_progress(guide_id, "Recherchiere…") + recherche_path = content_path.parent / f"{content_path.stem}.recherche.md" + fragment_paths.append(recherche_path) + recherche_path.unlink(missing_ok=True) + if project: + source = _prompt("OnePager-Quelle-Projekt", project=project) + else: + source = _prompt("OnePager-Quelle-Thema", topic=topic) + slots = [{ + "key": f"{guide_id}-recherche", + "prompt": _prompt("OnePager-Recherche", topic=topic, source=source, out_path=recherche_path, extra=_extra(instructions)), + "role": "quick", "capabilities": "files" if project else "full", + "payload": (lambda result: recherche_path.read_text(encoding="utf-8") if recherche_path.exists() else None), + }] + res = await _race(topic, "OnePager-Recherche", slots, 1, _timeout("onepager_recherche"), provider, cancelled=is_cancelled) + if is_cancelled(): return None - if returncode != 0: - await _fail(guide_id, _claude_error("OnePager-Fehler", returncode, stdout, stderr)) + if res is None: + await _fail(guide_id, "OnePager-Recherche fehlgeschlagen") return None - merksaetze: dict[int, str] = {} - for line in stdout.splitlines(): - m = re.match(r"\s*(\d+)\s*[:.\-–—]\s*(.*\S)", line) - if m: - merksaetze.setdefault(int(m.group(1)), m.group(2)) - sections = [] - for num, entry in entries.items(): - md = merksaetze.get(num) - if md is None: - _log(topic, f"OnePager: Merksatz für Baustein {num} fehlt") - continue - sections.append({"num": num, "title": _titel(entry), "md": md}) - if not sections: - await _fail(guide_id, "OnePager-Antwort nicht parsebar") + recherche = res[0] + + # Schritt 2: Bauen — Karten nur aus der Faktenbasis + await _set_progress(guide_id, "Baue OnePager…") + slots = [{ + "key": f"{guide_id}-bauen", + "prompt": _prompt("OnePager-Bauen", topic=topic, recherche=recherche, extra=_extra(instructions)), + "role": "fast", "capabilities": "none", + "payload": (lambda result: _parse_auswahl(result[1]) or None), + }] + res = await _race(topic, "OnePager-Bauen", slots, 1, _timeout("onepager_bauen"), provider, cancelled=is_cancelled) + if is_cancelled(): return None + if res is None: + await _fail(guide_id, "OnePager-Bau fehlgeschlagen") + return None + cards = res[0] + + # Schritt 3: Verifizieren — OK oder vollständig korrigierte Liste (nicht fatal) + await _set_progress(guide_id, "Verifiziere OnePager…") + karten_block = "\n".join(f"{i}. {t}" for i, t in cards.items()) + slots = [{ + "key": f"{guide_id}-verify", + "prompt": _prompt("OnePager-Verifikation", topic=topic, recherche=recherche, karten=karten_block), + "role": "fast", "capabilities": "none", + "payload": (lambda result: result[1].strip() if (_parse_auswahl(result[1]) or "OK" in result[1].upper()) else None), + }] + res = await _race(topic, "OnePager-Verifikation", slots, 1, _timeout("onepager_verify"), provider, cancelled=is_cancelled) + if is_cancelled(): + return None + if res is None: + _log(topic, "OnePager-Verifikation fehlgeschlagen — ungeprüfte Version wird verwendet") + else: + corrected = _parse_auswahl(res[0]) + if corrected: + _log(topic, "OnePager-Verifikation hat Korrekturen geliefert") + cards = corrected + + sections = [ + {"num": i, "title": _titel(text), "md": text.split(" — ", 1)[1].strip() if " — " in text else text} + for i, text in cards.items() + ] return [{"title": topic, "sections": sections}] @@ -431,12 +856,13 @@ async def _generate_sections( # Ein Writer, gliedert selbst in Kapitel plan = None zuteilungen = [bausteine_block] + chunk_sizes = [len(entries)] else: await _set_progress(guide_id, "Plane Gliederung…") returncode, stdout, stderr = await run_agent( f"{guide_id}-plan", _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_block, extra=_extra(instructions)), - AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", + _timeout("plan", len(entries)), provider=provider, role="guide", capabilities="none", ) if guide_id in _cancelled: return None @@ -449,6 +875,7 @@ async def _generate_sections( return None chunks = _split_chunks(plan, WRITER_COUNT[format_name]) zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks] + chunk_sizes = [sum(len(c["nums"]) for c in chunk) for chunk in chunks] writer_count = len(zuteilungen) await _set_progress(guide_id, f"Schreibe Sections ({writer_count} Writer)…" if writer_count > 1 else "Schreibe Sections…") @@ -462,9 +889,9 @@ async def _generate_sections( topic=topic, format_name=format_name, zuteilung=zuteilung, facts=facts, spec=spec, out_path=path, extra=_extra(instructions), ), - AGENT_TIMEOUT, provider=provider, role="guide", capabilities="full", + _timeout("writer", size), provider=provider, role="guide", capabilities="full", ) - for i, (zuteilung, path) in enumerate(zip(zuteilungen, paths), 1) + for i, (zuteilung, path, size) in enumerate(zip(zuteilungen, paths, chunk_sizes), 1) ], return_exceptions=True) if guide_id in _cancelled: return None @@ -515,7 +942,7 @@ async def _generate_sections( async def generate_guide(guide_id: str, topic: str, format_name: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: async with _semaphore: now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, status="generating", progress="Lese Bausteine…", updated_at=now) + await update_guide(guide_id, status="generating", progress="Starte…", updated_at=now) content_path = guide_content_path(topic, format_name) project = project_dir(topic) if project_dir(topic).is_dir() else None @@ -525,18 +952,17 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio if guide_id in _cancelled: return - cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8")) - selected: list[str] = [] - for cat in FORMAT_COVERAGE[format_name]: - selected.extend(cats.get(cat, [])) - if not selected: - await _fail(guide_id, "Keine passenden Bausteine gefunden") - return - entries = {i: text for i, text in enumerate(selected, 1)} - if format_name == "OnePager": - chapters = await _generate_onepager(guide_id, topic, entries, instructions, provider) + chapters = await _generate_onepager(guide_id, topic, instructions, provider, project, content_path, fragment_paths) else: + cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8")) + selected: list[str] = [] + for cat in FORMAT_COVERAGE[format_name]: + selected.extend(cats.get(cat, [])) + if not selected: + await _fail(guide_id, "Keine passenden Bausteine gefunden") + return + entries = {i: text for i, text in enumerate(selected, 1)} facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema") chapters = await _generate_sections( guide_id, topic, format_name, entries, @@ -554,7 +980,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio await update_guide(guide_id, status="done", progress=None, updated_at=now) except asyncio.TimeoutError: - await _fail(guide_id, f"Timeout bei Generierung nach {AGENT_TIMEOUT}s") + await _fail(guide_id, "Timeout bei der Generierung") except FileNotFoundError: await _fail(guide_id, "Bausteine fehlen") except Exception as e: diff --git a/backend/models.py b/backend/models.py index 09e5913..a768982 100644 --- a/backend/models.py +++ b/backend/models.py @@ -18,17 +18,28 @@ class GuideCreateRequest(BaseModel): provider: ProviderType = "claude" +class TopicCreateRequest(BaseModel): + name: str = Field(min_length=1, max_length=100) + + class BausteineCreateRequest(BaseModel): topic: str = Field(min_length=1, max_length=100) instructions: str = Field(default="", max_length=2000) provider: ProviderType = "claude" +class BausteineStep(BaseModel): + label: str + state: Literal["done", "active", "pending"] + + class BausteineStatusResponse(BaseModel): ready: bool generating: bool progress: str | None = None error: str | None = None + partial: bool = False + steps: list[BausteineStep] = [] class ProjectResponse(BaseModel): diff --git a/backend/paths.py b/backend/paths.py index a5d02e2..a52c033 100644 --- a/backend/paths.py +++ b/backend/paths.py @@ -23,7 +23,7 @@ def bausteine_topics() -> list[str]: return [] return [ p.stem for p in bdir.glob("*.md") - if not re.search(r"\.(recherche-\d+|auswahl)$", p.stem) + if not re.search(r"\.(recherche-\d+|auswahl(-\d+|-check)?|einordnung-\d+|final-check|sortierung)$", p.stem) ] diff --git a/backend/routes.py b/backend/routes.py index 37f4d3c..541f00c 100644 --- a/backend/routes.py +++ b/backend/routes.py @@ -10,14 +10,16 @@ from agents import provider_available from config import PROJECTS_DIR, PROVIDERS from database import ( create_guide, delete_guide, get_guide, list_guides, + create_topic, list_topics as db_list_topics, delete_topic, list_progress, set_progress, delete_progress, ) from generator import ( generate_guide, cancel_guide, chat_with_guide, - generate_bausteine, bausteine_status, active_bausteine, reset_bausteine, + generate_bausteine, cancel_bausteine, bausteine_status, active_bausteine, reset_bausteine, ) from models import ( GuideCreateRequest, GuideResponse, + TopicCreateRequest, BausteineCreateRequest, BausteineStatusResponse, GuideChatRequest, GuideChatResponse, ProgressUpdate, ProgressResponse, ProjectResponse, ProviderInfo, @@ -33,12 +35,26 @@ async def get_providers(): @router.get("/topics") -async def list_topics(): +async def get_topics(): + db_topics = await db_list_topics() guides = await list_guides() - topics = {g["topic"] for g in guides} - topics.update(bausteine_topics()) - topics.update(job["topic"] for job in active_bausteine()) - return sorted(topics) + derived = {g["topic"] for g in guides} + derived.update(bausteine_topics()) + derived.update(job["topic"] for job in active_bausteine()) + # DB ist führend (Reihenfolge: neueste zuerst); Abgeleitetes ohne DB-Eintrag hinten anhängen + return db_topics + sorted(derived - set(db_topics)) + + +@router.post("/topics") +async def add_topic(req: TopicCreateRequest): + await create_topic(req.name.strip()) + return {"ok": True} + + +@router.delete("/topics") +async def remove_topic(topic: str): + await delete_topic(topic) + return {"ok": True} def _safe_project_name(name: str) -> str: @@ -81,10 +97,18 @@ async def create_bausteine(req: BausteineCreateRequest): topic = req.topic.strip() if bausteine_status(topic)["generating"]: return {"ok": True, "status": "already_generating"} + await create_topic(topic) asyncio.create_task(generate_bausteine(topic, req.instructions.strip(), req.provider)) return {"ok": True} +@router.post("/bausteine/cancel") +async def cancel_bausteine_route(topic: str): + if not cancel_bausteine(topic): + raise HTTPException(404, "Keine laufende Generierung") + return {"ok": True} + + @router.delete("/bausteine") async def remove_bausteine(topic: str): reset_bausteine(topic) @@ -95,8 +119,9 @@ async def remove_bausteine(topic: str): @router.post("/guides", response_model=GuideResponse) async def create(req: GuideCreateRequest): - if not bausteine_path(req.topic.strip()).exists(): + if req.format != "OnePager" and not bausteine_path(req.topic.strip()).exists(): raise HTTPException(400, "Erst Bausteine erstellen") + await create_topic(req.topic.strip()) now = datetime.now(timezone.utc).isoformat() guide = { "id": str(uuid.uuid4()), diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 5ce4e74..0031a76 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -1,17 +1,12 @@