diff --git a/.gitignore b/.gitignore index 1d0fe65..4989913 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ __pycache__/ *.pyc .claude-data/ .env +.staging/ diff --git a/Makefile b/Makefile index 1f6e18b..5571352 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,8 @@ remove: stop @echo "Fertig." sync: - @mkdir -p storage/html + @mkdir -p storage/guides storage/bausteine rsync -avz --progress root@178.104.67.87:/var/www/creator/storage/creator.db storage/ - rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/html/ storage/html/ + rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/guides/ storage/guides/ + rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/bausteine/ storage/bausteine/ @echo "Sync abgeschlossen." diff --git a/backend/agents.py b/backend/agents.py index 411893c..7476f69 100644 --- a/backend/agents.py +++ b/backend/agents.py @@ -44,10 +44,11 @@ def provider_available(provider: str) -> bool: return True -def kill_process(agent_key: str) -> None: - process = _active_processes.get(agent_key) - if process and process.returncode is None: - process.kill() +def kill_process(agent_key_prefix: str) -> None: + """Killt alle aktiven Prozesse, deren Key mit dem Prefix beginnt (deckt -plan/-w1… ab).""" + for key, process in list(_active_processes.items()): + if key.startswith(agent_key_prefix) and process.returncode is None: + process.kill() async def run_agent( diff --git a/backend/config.py b/backend/config.py index e79bc53..2dc8fbe 100644 --- a/backend/config.py +++ b/backend/config.py @@ -7,12 +7,6 @@ FRONTEND_DIST = PROJECT_ROOT / "frontend" / "dist" DB_PATH = STORAGE_DIR / "creator.db" PROJECTS_DIR = PROJECT_ROOT / "projects" -FORMAT_META = { - "OnePager": {"pages": "1 Seite", "time": "~5 Min"}, - "MiniGuide": {"pages": "3-5 Seiten", "time": "~15-25 Min"}, - "Guide": {"pages": "10-30 Seiten", "time": "variabel"}, -} - AGENT_TIMEOUT = 3600 MAX_CONCURRENT_GENERATIONS = 10 diff --git a/backend/generator.py b/backend/generator.py index cd70208..c6ec892 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -1,4 +1,6 @@ import asyncio +import json +import re import uuid from datetime import datetime, timezone from pathlib import Path @@ -11,7 +13,7 @@ from config import ( MAX_CONCURRENT_GENERATIONS, ) from database import update_guide -from paths import final_html_path, project_dir +from paths import bausteine_path, guide_content_path, project_dir _semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS) _cancelled: set[str] = set() @@ -30,14 +32,6 @@ async def _set_progress(guide_id: str, progress: str) -> None: await update_guide(guide_id, progress=progress, updated_at=now) -# Welche Baustein-Kategorien jedes Format abdeckt. -FORMAT_COVERAGE = { - "OnePager": "NUR die KERN-Bausteine, maximal verdichtet", - "MiniGuide": "NUR die KERN-Bausteine", - "Guide": "die KERN- und WICHTIG-Bausteine", -} - - def _prompt(name: str, **kwargs) -> str: template = (TEMPLATES_DIR / "Prompt" / f"{name}.md").read_text(encoding="utf-8") return template.format(**kwargs) @@ -47,92 +41,8 @@ def _extra(instructions: str) -> str: return f"\n\nZUSÄTZLICHE ANWEISUNGEN VOM NUTZER:\n{instructions}\n" if instructions else "" -def _build_bausteine_prompt(topic: str, bausteine_path: Path, instructions: str = "", project: Path | None = None) -> str: - if project: - source = _prompt("Bausteine-Quelle-Projekt", project=project) - else: - source = _prompt("Bausteine-Quelle-Thema", topic=topic) - return _prompt( - "Bausteine", - topic=topic, source=source, bausteine_path=bausteine_path, extra=_extra(instructions), - ) - - -def _build_guide_prompt(topic: str, format_name: str, html_path: Path, bausteine: str, instructions: str = "", project: Path | None = None) -> str: - spec = (TEMPLATES_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") - reference = (TEMPLATES_DIR / "Referenz" / f"{format_name}.md").read_text(encoding="utf-8") - - if project: - facts = _prompt("Guide-Fakten-Projekt", project=project) - else: - facts = _prompt("Guide-Fakten-Thema") - - return _prompt( - "Guide", - topic=topic, format_name=format_name, html_path=html_path, - bausteine=bausteine, coverage=FORMAT_COVERAGE[format_name], - facts=facts, spec=spec, reference=reference, extra=_extra(instructions), - ) - - -async def generate_guide(guide_id: str, topic: str, format_name: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: - async with _semaphore: - now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, status="generating", progress="Ermittle Bausteine…", updated_at=now) - - html_path = final_html_path(topic, format_name) - bausteine_path = html_path.with_suffix(".bausteine.md") - project = project_dir(topic) if project_dir(topic).is_dir() else None - - try: - if guide_id in _cancelled: - return - - # Step 1: Bausteine ermitteln (Thema: Websuche, Projekt: Dateien lesen) - current_step = "Bausteine" - bs_prompt = _build_bausteine_prompt(topic, bausteine_path, instructions, project) - returncode, bs_out, bs_err = await run_agent( - guide_id, bs_prompt, AGENT_TIMEOUT, - provider=provider, role="fast", capabilities="files" if project else "full", - ) - - if guide_id in _cancelled: - return - if returncode != 0: - await _fail(guide_id, _claude_error("Baustein-Fehler", returncode, bs_out, bs_err)) - return - if not bausteine_path.exists(): - await _fail(guide_id, "Baustein-Datei wurde nicht erstellt") - return - bausteine = bausteine_path.read_text(encoding="utf-8") - - # Step 2: Generator-Agent erstellt HTML nach Bausteinen - await _set_progress(guide_id, "Generiere HTML…") - current_step = "Generierung" - gen_prompt = _build_guide_prompt(topic, format_name, html_path, bausteine, instructions, project) - returncode, stdout, stderr = await run_agent(guide_id, gen_prompt, AGENT_TIMEOUT, provider=provider, role="guide", capabilities="full") - - if guide_id in _cancelled: - return - if returncode != 0: - await _fail(guide_id, _claude_error("Generator-Fehler", returncode, stdout, stderr)) - return - - if not html_path.exists(): - await _fail(guide_id, "HTML-Datei wurde nicht erstellt") - return - - now = datetime.now(timezone.utc).isoformat() - await update_guide( - guide_id, status="done", progress=None, updated_at=now, - ) - - except asyncio.TimeoutError: - await _fail(guide_id, f"Timeout bei {current_step} nach {AGENT_TIMEOUT}s") - except Exception as e: - await _fail(guide_id, str(e)[:2000]) - finally: - _cancelled.discard(guide_id) +def _log(topic: str, msg: str) -> None: + print(f"[generator] {topic}: {msg}", flush=True) def _claude_error(label: str, returncode: int, stdout: str, stderr: str) -> str: @@ -145,11 +55,518 @@ def _claude_error(label: str, returncode: int, stdout: str, stderr: str) -> str: return f"{label} (exit {returncode}, ohne Ausgabe)" +def _gather_error(label: str, results: list) -> str: + for r in results: + if isinstance(r, BaseException): + return f"{label}: {type(r).__name__}: {r}" + returncode, stdout, stderr = r + if returncode != 0: + return _claude_error(label, returncode, stdout, stderr) + return f"{label}: kein verwertbares Ergebnis" + + async def _fail(guide_id: str, msg: str) -> None: now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, status="error", progress=None, error_msg=msg, updated_at=now) +# --- Bausteine-Pipeline: 3x Recherche → Auswahl → 2x Einordnung → finale Einordnung --- + +_bausteine_progress: dict[str, str] = {} +_bausteine_errors: dict[str, str] = {} + +_CATEGORIES = ("KERN", "WICHTIG", "REST") + + +def bausteine_status(topic: str) -> dict: + return { + "ready": bausteine_path(topic).exists(), + "generating": topic in _bausteine_progress, + "progress": _bausteine_progress.get(topic), + "error": _bausteine_errors.get(topic), + } + + +def active_bausteine() -> list[dict]: + return [{"topic": t, "progress": p} for t, p in _bausteine_progress.items()] + + +def reset_bausteine(topic: str) -> None: + bausteine_path(topic).unlink(missing_ok=True) + _bausteine_errors.pop(topic, None) + + +def _build_recherche_prompt(topic: str, out_path: Path, instructions: str = "", project: Path | None = None) -> str: + if project: + source = _prompt("Bausteine-Quelle-Projekt", project=project) + else: + source = _prompt("Bausteine-Quelle-Thema", topic=topic) + return _prompt( + "Bausteine-Recherche", + topic=topic, source=source, bausteine_path=out_path, extra=_extra(instructions), + ) + + +def _parse_auswahl(text: str) -> dict[int, str]: + """Parst die konsolidierte Liste: `N. Titel — Kurzbeschreibung` pro Zeile.""" + entries: dict[int, str] = {} + last = None + for line in text.splitlines(): + m = re.match(r"\s*(\d+)[.)]\s+(.*\S)", line) + if m: + last = int(m.group(1)) + entries[last] = m.group(2) + elif last is not None and line.strip(): + entries[last] += " " + line.strip() + return entries + + +def _parse_einordnung(text: str) -> dict[int, str]: + """Parst eine Einordnung (`KERN:` gefolgt von `N Titel`-Zeilen) zu Nummer→Kategorie.""" + mapping: dict[int, str] = {} + current = None + for line in text.splitlines(): + s = line.strip().lstrip("-*# ").strip() + if not s: + continue + m = re.match(r"(KERN|WICHTIG|REST)\b[:\s]*(.*)$", s, re.IGNORECASE) + if m: + current = m.group(1).upper() + for num in re.findall(r"\b\d+\b", m.group(2)): + mapping.setdefault(int(num), current) + continue + if current: + m = re.match(r"(\d+)\b", s) + if m: + mapping.setdefault(int(m.group(1)), current) + return mapping + + +def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str]) -> str: + """Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung.""" + grouped: dict[str, list[str]] = {c: [] for c in _CATEGORIES} + for num in sorted(entries): + cat = mapping.get(num) + if cat is None: + _log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST") + cat = "REST" + grouped[cat].append(entries[num]) + unknown = sorted(set(mapping) - set(entries)) + if unknown: + _log(topic, f"finale Einordnung enthält unbekannte Nummern (ignoriert): {unknown}") + parts = [] + for cat in _CATEGORIES: + lines = "\n".join(f"{i}. {text}" for i, text in enumerate(grouped[cat], 1)) + parts.append(f"## {cat}\n{lines}") + return "\n\n".join(parts) + "\n" + + +async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: + if topic in _bausteine_progress: + return + _bausteine_progress[topic] = "Wartend…" + _bausteine_errors.pop(topic, None) + + final_path = bausteine_path(topic) + project = project_dir(topic) if project_dir(topic).is_dir() else None + stem = final_path.stem + recherche_paths = [final_path.parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3)] + auswahl_path = final_path.parent / f"{stem}.auswahl.md" + + try: + async with _semaphore: + # Schritt 1: 3 Recherche-Agenten parallel (Thema: Websuche, Projekt: Dateien lesen) + _bausteine_progress[topic] = "Recherche läuft (3 Agenten)…" + caps = "files" if project else "full" + results = await asyncio.gather(*[ + run_agent( + f"bausteine-{topic}-recherche-{i}", + _build_recherche_prompt(topic, path, instructions, project), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities=caps, + ) + for i, path in enumerate(recherche_paths, 1) + ], return_exceptions=True) + for i, (r, p) in enumerate(zip(results, recherche_paths), 1): + if isinstance(r, BaseException): + _log(topic, f"Recherche {i}: {type(r).__name__}: {r}") + elif r[0] != 0: + _log(topic, f"Recherche {i}: {_claude_error('Fehler', *r)}") + elif not p.exists(): + _log(topic, f"Recherche {i}: keine Ausgabedatei erstellt") + recherchen = [p.read_text(encoding="utf-8") for p in recherche_paths if p.exists()] + if not recherchen: + _bausteine_errors[topic] = _gather_error("Recherche-Fehler", results) + return + + # Schritt 2: Auswahl-Agent konsolidiert die Ergebnisse (ohne Quellen) + _bausteine_progress[topic] = f"Konsolidiere Recherche ({len(recherchen)}/3 erfolgreich)…" + results_block = "\n\n".join(f"### Recherche {i}\n\n{text}" for i, text in enumerate(recherchen, 1)) + returncode, stdout, stderr = await run_agent( + f"bausteine-{topic}-auswahl", + _prompt("Bausteine-Auswahl", topic=topic, results=results_block, out_path=auswahl_path), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities="files", + ) + if returncode != 0 or not auswahl_path.exists(): + _bausteine_errors[topic] = _claude_error("Auswahl-Fehler", returncode, stdout, stderr) + return + flat = auswahl_path.read_text(encoding="utf-8") + entries = _parse_auswahl(flat) + if not entries: + _bausteine_errors[topic] = "Auswahl-Liste nicht parsebar" + return + + # Schritt 3: 2 Einordnungs-Agenten parallel (antworten nur mit Nummer+Titel je Kategorie) + _bausteine_progress[topic] = "Einordnung läuft (2 Agenten)…" + results = await asyncio.gather(*[ + run_agent( + f"bausteine-{topic}-einordnung-{i}", + _prompt("Bausteine-Einordnung", topic=topic, bausteine=flat), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", + ) + for i in (1, 2) + ], return_exceptions=True) + einordnungen = [] + for i, r in enumerate(results, 1): + if isinstance(r, BaseException): + _log(topic, f"Einordnung {i}: {type(r).__name__}: {r}") + elif r[0] != 0: + _log(topic, f"Einordnung {i}: {_claude_error('Fehler', *r)}") + elif not _parse_einordnung(r[1]): + _log(topic, f"Einordnung {i}: Antwort nicht parsebar") + else: + einordnungen.append(r[1].strip()) + if not einordnungen: + _bausteine_errors[topic] = _gather_error("Einordnungs-Fehler", results) + return + + # Schritt 4: finale Einordnung — Python validiert und baut die Datei + _bausteine_progress[topic] = f"Finale Einordnung ({len(einordnungen)}/2 erfolgreich)…" + returncode, stdout, stderr = await run_agent( + f"bausteine-{topic}-final", + _prompt( + "Bausteine-Einordnung-Final", + topic=topic, bausteine=flat, + einordnung_1=einordnungen[0], einordnung_2=einordnungen[-1], + ), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", + ) + if returncode != 0: + _bausteine_errors[topic] = _claude_error("Finale-Einordnungs-Fehler", returncode, stdout, stderr) + return + mapping = _parse_einordnung(stdout) + if not mapping: + _bausteine_errors[topic] = "Finale Einordnung nicht parsebar" + return + final_path.write_text(_build_final_bausteine(topic, entries, mapping), encoding="utf-8") + except Exception as e: + _bausteine_errors[topic] = str(e)[:2000] + finally: + _bausteine_progress.pop(topic, None) + for p in [*recherche_paths, auswahl_path]: + p.unlink(missing_ok=True) + + +# --- Guide-Generierung: Bausteine → (Plan) → Writer → JSON --- + +# Welche Baustein-Kategorien jedes Format abdeckt. +FORMAT_COVERAGE = { + "OnePager": ("KERN",), + "MiniGuide": ("KERN",), + "Guide": ("KERN", "WICHTIG"), + "FullGuide": ("KERN", "WICHTIG", "REST"), +} + +# Parallele Writer pro Format (OnePager hat einen eigenen Weg). +WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4} + + +def _parse_kategorien(text: str) -> dict[str, list[str]]: + """Parst die finale Baustein-Datei (## KERN/WICHTIG/REST mit nummerierten Einträgen).""" + cats: dict[str, list[str]] = {} + current = None + for line in text.splitlines(): + s = line.strip() + m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE) + if m: + current = m.group(1).upper() + cats.setdefault(current, []) + continue + m = re.match(r"(\d+)[.)]\s+(.*\S)", s) + if m and current: + cats[current].append(m.group(2)) + return cats + + +def _titel(entry: str) -> str: + return entry.split(" — ")[0].strip() or entry + + +def _parse_gliederung(text: str, valid: set[int], topic: str) -> list[dict]: + """Parst die Gliederung (`KAPITEL: Titel` + `N Titel`-Zeilen) → [{"title", "nums"}].""" + chapters: list[dict] = [] + seen: set[int] = set() + for line in text.splitlines(): + s = line.strip().lstrip("-*# ").strip() + if not s: + continue + m = re.match(r"KAPITEL\s*:\s*(.+)", s, re.IGNORECASE) + if m: + chapters.append({"title": m.group(1).strip(), "nums": []}) + continue + m = re.match(r"(\d+)\b", s) + if m and chapters: + num = int(m.group(1)) + if num in valid and num not in seen: + chapters[-1]["nums"].append(num) + seen.add(num) + missing = sorted(valid - seen) + if missing: + _log(topic, f"Gliederung: Bausteine {missing} fehlen → Kapitel 'Weitere Themen'") + chapters.append({"title": "Weitere Themen", "nums": missing}) + return [c for c in chapters if c["nums"]] + + +def _split_chunks(chapters: list[dict], n: int) -> list[list[dict]]: + """Teilt Kapitel in bis zu n zusammenhängende Chunks, balanciert nach Section-Anzahl.""" + n = max(1, min(n, len(chapters))) + chunks: list[list[dict]] = [] + current: list[dict] = [] + count = 0 + remaining_total = sum(len(c["nums"]) for c in chapters) + remaining_chunks = n + for ch in chapters: + current.append(ch) + count += len(ch["nums"]) + if remaining_chunks > 1 and count >= remaining_total / remaining_chunks: + chunks.append(current) + remaining_total -= count + remaining_chunks -= 1 + current = [] + count = 0 + if current: + chunks.append(current) + return chunks + + +def _zuteilung_text(chunk: list[dict], entries: dict[int, str]) -> str: + lines = [] + for ch in chunk: + lines.append(f"KAPITEL: {ch['title']}") + lines.extend(f"{num} {entries[num]}" for num in ch["nums"]) + return "\n".join(lines) + + +_FRAGMENT_KAPITEL_RE = re.compile(r"", re.IGNORECASE) +_FRAGMENT_SECTION_RE = re.compile(r"", re.IGNORECASE) + + +def _parse_fragment(text: str) -> list[dict]: + """Parst eine Writer-Datei → [{"kapitel", "num", "title", "md"}] in Datei-Reihenfolge.""" + sections: list[dict] = [] + kapitel = None + current = None + for line in text.splitlines(): + s = line.strip() + m = _FRAGMENT_KAPITEL_RE.match(s) + if m: + kapitel = m.group(1) + current = None + continue + m = _FRAGMENT_SECTION_RE.match(s) + if m: + current = {"kapitel": kapitel, "num": int(m.group(1)), "title": (m.group(2) or "").strip(), "md": []} + sections.append(current) + continue + if current is not None: + current["md"].append(line) + for sec in sections: + sec["md"] = "\n".join(sec["md"]).strip() + return sections + + +def _section_json(sec: dict, entries: dict[int, str]) -> dict: + return {"num": sec["num"], "title": sec["title"] or _titel(entries[sec["num"]]), "md": sec["md"]} + + +async def _generate_onepager(guide_id: str, topic: str, entries: dict[int, str], instructions: str, provider: str) -> list[dict] | None: + await _set_progress(guide_id, "Generiere OnePager…") + bausteine_block = "\n".join(f"{i}. {t}" for i, t in entries.items()) + returncode, stdout, stderr = await run_agent( + f"{guide_id}-onepager", + _prompt("OnePager", topic=topic, bausteine=bausteine_block, extra=_extra(instructions)), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", + ) + if guide_id in _cancelled: + return None + if returncode != 0: + await _fail(guide_id, _claude_error("OnePager-Fehler", returncode, stdout, stderr)) + return None + merksaetze: dict[int, str] = {} + for line in stdout.splitlines(): + m = re.match(r"\s*(\d+)\s*[:.\-–—]\s*(.*\S)", line) + if m: + merksaetze.setdefault(int(m.group(1)), m.group(2)) + sections = [] + for num, entry in entries.items(): + md = merksaetze.get(num) + if md is None: + _log(topic, f"OnePager: Merksatz für Baustein {num} fehlt") + continue + sections.append({"num": num, "title": _titel(entry), "md": md}) + if not sections: + await _fail(guide_id, "OnePager-Antwort nicht parsebar") + return None + return [{"title": topic, "sections": sections}] + + +async def _generate_sections( + guide_id: str, topic: str, format_name: str, entries: dict[int, str], + facts: str, instructions: str, provider: str, + content_path: Path, fragment_paths: list[Path], +) -> list[dict] | None: + spec = (TEMPLATES_DIR / "Format" / "Section.md").read_text(encoding="utf-8") + bausteine_block = "\n".join(f"{i}. {t}" for i, t in entries.items()) + + if format_name == "MiniGuide": + # Ein Writer, gliedert selbst in Kapitel + plan = None + zuteilungen = [bausteine_block] + else: + await _set_progress(guide_id, "Plane Gliederung…") + returncode, stdout, stderr = await run_agent( + f"{guide_id}-plan", + _prompt("Guide-Plan", topic=topic, format_name=format_name, bausteine=bausteine_block, extra=_extra(instructions)), + AGENT_TIMEOUT, provider=provider, role="fast", capabilities="none", + ) + if guide_id in _cancelled: + return None + if returncode != 0: + await _fail(guide_id, _claude_error("Plan-Fehler", returncode, stdout, stderr)) + return None + plan = _parse_gliederung(stdout, set(entries), topic) + if not plan: + await _fail(guide_id, "Gliederung nicht parsebar") + return None + chunks = _split_chunks(plan, WRITER_COUNT[format_name]) + zuteilungen = [_zuteilung_text(chunk, entries) for chunk in chunks] + + writer_count = len(zuteilungen) + await _set_progress(guide_id, f"Schreibe Sections ({writer_count} Writer)…" if writer_count > 1 else "Schreibe Sections…") + paths = [content_path.parent / f"{content_path.stem}.chunk-{i}.md" for i in range(1, writer_count + 1)] + fragment_paths.extend(paths) + results = await asyncio.gather(*[ + run_agent( + f"{guide_id}-w{i}", + _prompt( + "Guide-Writer", + topic=topic, format_name=format_name, zuteilung=zuteilung, + facts=facts, spec=spec, out_path=path, extra=_extra(instructions), + ), + AGENT_TIMEOUT, provider=provider, role="guide", capabilities="full", + ) + for i, (zuteilung, path) in enumerate(zip(zuteilungen, paths), 1) + ], return_exceptions=True) + if guide_id in _cancelled: + return None + for i, (r, p) in enumerate(zip(results, paths), 1): + if isinstance(r, BaseException): + _log(topic, f"Writer {i}: {type(r).__name__}: {r}") + elif r[0] != 0: + _log(topic, f"Writer {i}: {_claude_error('Fehler', *r)}") + elif not p.exists(): + _log(topic, f"Writer {i}: keine Ausgabedatei erstellt") + fragments: list[dict] = [] + for p in paths: + if p.exists(): + fragments.extend(_parse_fragment(p.read_text(encoding="utf-8"))) + if not fragments: + await _fail(guide_id, _gather_error("Writer-Fehler", list(results))) + return None + + await _set_progress(guide_id, "Setze zusammen…") + chapters: list[dict] = [] + if plan is None: + # MiniGuide: Kapitel aus den Fragment-Markern in Datei-Reihenfolge + seen: set[int] = set() + for sec in fragments: + if sec["num"] not in entries or sec["num"] in seen: + continue + seen.add(sec["num"]) + title = sec["kapitel"] or topic + if not chapters or chapters[-1]["title"] != title: + chapters.append({"title": title, "sections": []}) + chapters[-1]["sections"].append(_section_json(sec, entries)) + missing = sorted(set(entries) - seen) + else: + by_num = {sec["num"]: sec for sec in fragments if sec["num"] in entries} + for ch in plan: + sections = [_section_json(by_num[num], entries) for num in ch["nums"] if num in by_num] + if sections: + chapters.append({"title": ch["title"], "sections": sections}) + missing = sorted(set(entries) - set(by_num)) + if missing: + _log(topic, f"Sections fehlen in der Writer-Ausgabe: {missing}") + if not chapters: + await _fail(guide_id, "Keine Sections in der Writer-Ausgabe gefunden") + return None + return chapters + + +async def generate_guide(guide_id: str, topic: str, format_name: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: + async with _semaphore: + now = datetime.now(timezone.utc).isoformat() + await update_guide(guide_id, status="generating", progress="Lese Bausteine…", updated_at=now) + + content_path = guide_content_path(topic, format_name) + project = project_dir(topic) if project_dir(topic).is_dir() else None + fragment_paths: list[Path] = [] + + try: + if guide_id in _cancelled: + return + + cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8")) + selected: list[str] = [] + for cat in FORMAT_COVERAGE[format_name]: + selected.extend(cats.get(cat, [])) + if not selected: + await _fail(guide_id, "Keine passenden Bausteine gefunden") + return + entries = {i: text for i, text in enumerate(selected, 1)} + + if format_name == "OnePager": + chapters = await _generate_onepager(guide_id, topic, entries, instructions, provider) + else: + facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema") + chapters = await _generate_sections( + guide_id, topic, format_name, entries, + facts, instructions, provider, content_path, fragment_paths, + ) + if chapters is None or guide_id in _cancelled: + return + + content_path.write_text( + json.dumps({"topic": topic, "format": format_name, "chapters": chapters}, ensure_ascii=False, indent=1), + encoding="utf-8", + ) + + now = datetime.now(timezone.utc).isoformat() + await update_guide(guide_id, status="done", progress=None, updated_at=now) + + except asyncio.TimeoutError: + await _fail(guide_id, f"Timeout bei Generierung nach {AGENT_TIMEOUT}s") + except FileNotFoundError: + await _fail(guide_id, "Bausteine fehlen") + except Exception as e: + await _fail(guide_id, str(e)[:2000]) + finally: + _cancelled.discard(guide_id) + for p in fragment_paths: + p.unlink(missing_ok=True) + + +# --- Tutor-Chat --- + def _build_guide_chat_prompt(topic: str, format_name: str, section: str, outline: str, messages: list[dict]) -> str: transcript = "\n".join( f"{'Nutzer' if m.get('role') == 'user' else 'Assistent'}: {m.get('content', '')}" diff --git a/backend/main.py b/backend/main.py index ef17940..0a53b2d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -10,7 +10,8 @@ from routes import router @asynccontextmanager async def lifespan(app: FastAPI): - (STORAGE_DIR / "html").mkdir(parents=True, exist_ok=True) + (STORAGE_DIR / "guides").mkdir(parents=True, exist_ok=True) + (STORAGE_DIR / "bausteine").mkdir(parents=True, exist_ok=True) await init_db() yield await close_db() diff --git a/backend/models.py b/backend/models.py index 3aa8421..09e5913 100644 --- a/backend/models.py +++ b/backend/models.py @@ -5,6 +5,7 @@ FormatType = Literal[ "OnePager", "MiniGuide", "Guide", + "FullGuide", ] ProviderType = Literal["claude", "minimax"] @@ -17,6 +18,19 @@ class GuideCreateRequest(BaseModel): provider: ProviderType = "claude" +class BausteineCreateRequest(BaseModel): + topic: str = Field(min_length=1, max_length=100) + instructions: str = Field(default="", max_length=2000) + provider: ProviderType = "claude" + + +class BausteineStatusResponse(BaseModel): + ready: bool + generating: bool + progress: str | None = None + error: str | None = None + + class ProjectResponse(BaseModel): name: str diff --git a/backend/paths.py b/backend/paths.py index b22125b..a5d02e2 100644 --- a/backend/paths.py +++ b/backend/paths.py @@ -1,15 +1,30 @@ +import re from pathlib import Path from config import STORAGE_DIR, PROJECTS_DIR -def safe_basename(topic: str, format_name: str) -> str: - clean = topic.replace("/", "_").replace("\x00", "") - return f"{clean} - {format_name}" +def _safe(name: str) -> str: + return name.replace("/", "_").replace("\x00", "") -def final_html_path(topic: str, format_name: str) -> Path: - return STORAGE_DIR / "html" / f"{safe_basename(topic, format_name)}.html" +def guide_content_path(topic: str, format_name: str) -> Path: + return STORAGE_DIR / "guides" / f"{_safe(topic)} - {format_name}.json" + + +def bausteine_path(topic: str) -> Path: + return STORAGE_DIR / "bausteine" / f"{_safe(topic)}.md" + + +def bausteine_topics() -> list[str]: + """Themen, für die eine finale Baustein-Datei existiert (ohne Zwischendateien).""" + bdir = STORAGE_DIR / "bausteine" + if not bdir.is_dir(): + return [] + return [ + p.stem for p in bdir.glob("*.md") + if not re.search(r"\.(recherche-\d+|auswahl)$", p.stem) + ] def project_dir(name: str) -> Path: diff --git a/backend/routes.py b/backend/routes.py index da0578f..37f4d3c 100644 --- a/backend/routes.py +++ b/backend/routes.py @@ -7,32 +7,40 @@ from fastapi import APIRouter, HTTPException from fastapi.responses import FileResponse from agents import provider_available -from config import FORMAT_META, PROJECTS_DIR, PROVIDERS +from config import PROJECTS_DIR, PROVIDERS from database import ( create_guide, delete_guide, get_guide, list_guides, list_progress, set_progress, delete_progress, ) -from generator import generate_guide, cancel_guide, chat_with_guide +from generator import ( + generate_guide, cancel_guide, chat_with_guide, + generate_bausteine, bausteine_status, active_bausteine, reset_bausteine, +) from models import ( GuideCreateRequest, GuideResponse, + BausteineCreateRequest, BausteineStatusResponse, GuideChatRequest, GuideChatResponse, ProgressUpdate, ProgressResponse, ProjectResponse, ProviderInfo, ) -from paths import final_html_path, project_dir +from paths import bausteine_path, bausteine_topics, guide_content_path, project_dir router = APIRouter(prefix="/api") -@router.get("/formats") -async def get_formats(): - return FORMAT_META - - @router.get("/providers", response_model=list[ProviderInfo]) async def get_providers(): return [{"id": pid, "available": provider_available(pid)} for pid in PROVIDERS] +@router.get("/topics") +async def list_topics(): + guides = await list_guides() + topics = {g["topic"] for g in guides} + topics.update(bausteine_topics()) + topics.update(job["topic"] for job in active_bausteine()) + return sorted(topics) + + def _safe_project_name(name: str) -> str: if not name or "/" in name or "\\" in name or ".." in name or "\x00" in name: raise HTTPException(400, "Ungültiger Projektname") @@ -56,8 +64,39 @@ async def remove_project(name: str): return {"ok": True} +# --- Bausteine --- + +@router.get("/bausteine/status", response_model=BausteineStatusResponse) +async def get_bausteine_status(topic: str): + return bausteine_status(topic) + + +@router.get("/bausteine/active") +async def get_active_bausteine(): + return active_bausteine() + + +@router.post("/bausteine") +async def create_bausteine(req: BausteineCreateRequest): + topic = req.topic.strip() + if bausteine_status(topic)["generating"]: + return {"ok": True, "status": "already_generating"} + asyncio.create_task(generate_bausteine(topic, req.instructions.strip(), req.provider)) + return {"ok": True} + + +@router.delete("/bausteine") +async def remove_bausteine(topic: str): + reset_bausteine(topic) + return {"ok": True} + + +# --- Guides --- + @router.post("/guides", response_model=GuideResponse) async def create(req: GuideCreateRequest): + if not bausteine_path(req.topic.strip()).exists(): + raise HTTPException(400, "Erst Bausteine erstellen") now = datetime.now(timezone.utc).isoformat() guide = { "id": str(uuid.uuid4()), @@ -87,17 +126,17 @@ async def get_one(guide_id: str): return guide -@router.get("/guides/{guide_id}/html") -async def download_html(guide_id: str): +@router.get("/guides/{guide_id}/content") +async def guide_content(guide_id: str): guide = await get_guide(guide_id) if guide is None: raise HTTPException(404, "Guide nicht gefunden") if guide["status"] != "done": - raise HTTPException(404, "HTML nicht verfügbar") - html_path = final_html_path(guide["topic"], guide["format"]) - if not html_path.exists(): + raise HTTPException(404, "Inhalt nicht verfügbar") + path = guide_content_path(guide["topic"], guide["format"]) + if not path.exists(): raise HTTPException(404, "Datei nicht gefunden") - return FileResponse(html_path, media_type="text/html", content_disposition_type="inline") + return FileResponse(path, media_type="application/json") @router.post("/guides/{guide_id}/chat", response_model=GuideChatResponse) @@ -126,9 +165,7 @@ async def remove(guide_id: str): guide = await get_guide(guide_id) if guide is None: raise HTTPException(404, "Guide nicht gefunden") - html_path = final_html_path(guide["topic"], guide["format"]) - html_path.unlink(missing_ok=True) - html_path.with_suffix(".bausteine.md").unlink(missing_ok=True) + guide_content_path(guide["topic"], guide["format"]).unlink(missing_ok=True) await delete_progress(guide_id) await delete_guide(guide_id) return {"ok": True} diff --git a/frontend/src/App.vue b/frontend/src/App.vue index cdcbf19..5ce4e74 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -1,12 +1,17 @@