diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7affc33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +storage/ +guides.db +node_modules/ +frontend/dist/ +__pycache__/ +*.pyc diff --git a/backend/__pycache__/config.cpython-312.pyc b/backend/__pycache__/config.cpython-312.pyc deleted file mode 100644 index 997e394..0000000 Binary files a/backend/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/backend/__pycache__/database.cpython-312.pyc b/backend/__pycache__/database.cpython-312.pyc deleted file mode 100644 index d1682c0..0000000 Binary files a/backend/__pycache__/database.cpython-312.pyc and /dev/null differ diff --git a/backend/__pycache__/generator.cpython-312.pyc b/backend/__pycache__/generator.cpython-312.pyc deleted file mode 100644 index d276111..0000000 Binary files a/backend/__pycache__/generator.cpython-312.pyc and /dev/null differ diff --git a/backend/__pycache__/main.cpython-312.pyc b/backend/__pycache__/main.cpython-312.pyc deleted file mode 100644 index ca895a8..0000000 Binary files a/backend/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/backend/__pycache__/models.cpython-312.pyc b/backend/__pycache__/models.cpython-312.pyc deleted file mode 100644 index 1882a78..0000000 Binary files a/backend/__pycache__/models.cpython-312.pyc and /dev/null differ diff --git a/backend/__pycache__/routes.cpython-312.pyc b/backend/__pycache__/routes.cpython-312.pyc deleted file mode 100644 index b525073..0000000 Binary files a/backend/__pycache__/routes.cpython-312.pyc and /dev/null differ diff --git a/backend/config.py b/backend/config.py index 951a25e..b29a9c9 100644 --- a/backend/config.py +++ b/backend/config.py @@ -33,4 +33,12 @@ GENERATION_TIMEOUTS = { } MAX_CONCURRENT_GENERATIONS = 10 +MAX_ITERATIONS = { + "OnePager": 3, + "Cheatsheet": 3, + "MiniGuide": 3, + "BeginnerGuide": 5, + "IntermediateGuide": 5, + "ExtendedGuide": 5, +} CLAUDE_CLI = "claude" diff --git a/backend/generator.py b/backend/generator.py index ad3738f..7c3e9e6 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -1,4 +1,5 @@ import asyncio +import subprocess import tempfile from datetime import datetime, timezone from pathlib import Path @@ -8,6 +9,7 @@ from config import ( DOC_DIR, GENERATION_TIMEOUTS, MAX_CONCURRENT_GENERATIONS, + MAX_ITERATIONS, STORAGE_DIR, ) from database import update_guide @@ -26,7 +28,58 @@ async def cancel_guide(guide_id: str) -> bool: return False -def _build_prompt(topic: str, format_name: str, html_path: Path, pdf_path: Path) -> str: +async def _set_progress(guide_id: str, progress: str) -> None: + now = datetime.now(timezone.utc).isoformat() + await update_guide(guide_id, progress=progress, updated_at=now) + + +async def _run_claude(guide_id: str, prompt: str, timeout: int) -> tuple[int, str, str]: + process = await asyncio.create_subprocess_exec( + CLAUDE_CLI, + "-p", + "--allowedTools", "Write,Bash,Read,WebSearch,WebFetch", + "--dangerously-skip-permissions", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _active_processes[guide_id] = process + try: + stdout, stderr = await asyncio.wait_for( + process.communicate(input=prompt.encode("utf-8")), + timeout=timeout, + ) + return process.returncode, stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace") + finally: + _active_processes.pop(guide_id, None) + + +async def _render_pdf(html_path: Path, pdf_path: Path) -> tuple[bool, str]: + proc = await asyncio.create_subprocess_exec( + "weasyprint", str(html_path), str(pdf_path), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await asyncio.wait_for(proc.communicate(), timeout=120) + if proc.returncode != 0: + return False, stderr.decode("utf-8", errors="replace")[:1000] + return True, "" + + +async def _render_pngs(pdf_path: Path, preview_dir: Path) -> list[Path]: + preview_dir.mkdir(parents=True, exist_ok=True) + proc = await asyncio.create_subprocess_exec( + "python3", "-c", + f"from pdf2image import convert_from_path; pages = convert_from_path('{pdf_path}', dpi=120); [p.save('{preview_dir}/page_{{i}}.png') for i, p in enumerate(pages)]; print(len(pages))", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=60) + pngs = sorted(preview_dir.glob("page_*.png")) + return pngs + + +def _build_generator_prompt(topic: str, format_name: str, html_path: Path) -> str: spec = (DOC_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") reference = (DOC_DIR / "Referenz" / f"{format_name}.md").read_text(encoding="utf-8") @@ -35,7 +88,8 @@ def _build_prompt(topic: str, format_name: str, html_path: Path, pdf_path: Path) Recherchiere zuerst die aktuelle Version und aktuelle Fakten zu "{topic}" per Websuche, damit Versionsnummern und Angaben stimmen. Schreibe die HTML-Datei nach: {html_path} -Erstelle die PDF-Datei nach: {pdf_path} + +Schreibe NUR die HTML-Datei. Führe KEIN weasyprint aus, erzeuge KEINE PDF. Das übernimmt ein anderer Prozess. FORMAT-SPEZIFIKATION: {spec} @@ -45,84 +99,114 @@ REFERENZ-IMPLEMENTIERUNG (Stil-Vorlage, adaptiere für "{topic}"): """ -async def _set_progress(guide_id: str, progress: str) -> None: - now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, progress=progress, updated_at=now) +def _build_fix_prompt(topic: str, format_name: str, html_path: Path, feedback: str) -> str: + return f"""Die HTML-Datei {html_path} für den "{format_name}" zum Thema "{topic}" hat Probleme. + +FEEDBACK VOM PRÜFER: +{feedback} + +Behebe die Probleme in der HTML-Datei {html_path}. Schreibe die korrigierte Version in dieselbe Datei. +Führe KEIN weasyprint aus, erzeuge KEINE PDF. +""" -async def _watch_files(guide_id: str, html_path: Path, pdf_path: Path, stop_event: asyncio.Event) -> None: - html_seen = False - pdf_mtime = 0.0 - iteration = 0 +def _build_review_prompt(format_name: str, png_paths: list[Path], page_count: int) -> str: + spec = (DOC_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") - while not stop_event.is_set(): - await asyncio.sleep(2) + png_list = "\n".join(str(p) for p in png_paths) - if not html_seen and html_path.exists(): - html_seen = True - await _set_progress(guide_id, "HTML generiert…") + return f"""Prüfe die folgenden Preview-Bilder eines generierten "{format_name}" Guides. - if pdf_path.exists(): - current_mtime = pdf_path.stat().st_mtime - if current_mtime > pdf_mtime: - pdf_mtime = current_mtime - iteration += 1 - await _set_progress(guide_id, f"Iteration {iteration}…") +Das PDF hat {page_count} Seite(n). Lies die Preview-Bilder und prüfe sie: +{png_list} + +FORMAT-SPEZIFIKATION (Prüfkriterien): +{spec} + +Prüfe anhand der Spezifikation: +- Stimmt die Seitenanzahl? (OnePager/Cheatsheet = exakt 1 Seite) +- Sind Elemente abgeschnitten oder überlappend? +- Fehlen Pflicht-Elemente (Cover, TOC, Recall-Boxen, Callouts, etc.)? +- Sind Code-Blöcke über Seitenumbrüche zerrissen? +- Ist das Layout korrekt (Spalten, Grid, Footer)? + +Antworte mit GENAU EINEM der folgenden Formate: + +Bei Bestehen: +PASS + +Bei Nicht-Bestehen: +FAIL +- Problem 1 +- Problem 2 +- ... +""" async def generate_guide(guide_id: str, topic: str, format_name: str) -> None: async with _semaphore: now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, status="generating", progress="Lesen…", updated_at=now) + await update_guide(guide_id, status="generating", progress="Recherche…", updated_at=now) html_path = STORAGE_DIR / "html" / f"{guide_id}.html" pdf_path = STORAGE_DIR / "pdf" / f"{guide_id}.pdf" - - prompt = _build_prompt(topic, format_name, html_path, pdf_path) - await _set_progress(guide_id, "Generiere HTML…") - - with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f: - f.write(prompt) - prompt_file = f.name - - stop_event = asyncio.Event() - watcher = asyncio.create_task(_watch_files(guide_id, html_path, pdf_path, stop_event)) + preview_dir = STORAGE_DIR / "preview" / guide_id + timeout = GENERATION_TIMEOUTS.get(format_name, 600) + max_iter = MAX_ITERATIONS.get(format_name, 3) try: - timeout = GENERATION_TIMEOUTS.get(format_name, 600) - process = await asyncio.create_subprocess_exec( - CLAUDE_CLI, - "-p", - "--allowedTools", "Write,Bash,Read,WebSearch,WebFetch", - "--dangerously-skip-permissions", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - _active_processes[guide_id] = process - stdout, stderr = await asyncio.wait_for( - process.communicate(input=prompt.encode("utf-8")), - timeout=timeout, - ) + # Step 1: Generator-Agent erstellt HTML + await _set_progress(guide_id, "Generiere HTML…") + gen_prompt = _build_generator_prompt(topic, format_name, html_path) + returncode, stdout, stderr = await _run_claude(guide_id, gen_prompt, timeout) - stop_event.set() - await watcher - - now = datetime.now(timezone.utc).isoformat() - - if process.returncode != 0: - error = stderr.decode("utf-8", errors="replace")[:2000] - await update_guide(guide_id, status="error", progress=None, error_msg=error, updated_at=now) + if returncode != 0: + await _fail(guide_id, f"Generator-Fehler: {stderr[:1000]}") return if not html_path.exists(): - await update_guide(guide_id, status="error", progress=None, error_msg="HTML-Datei wurde nicht erstellt", updated_at=now) + await _fail(guide_id, "HTML-Datei wurde nicht erstellt") return - if not pdf_path.exists(): - await update_guide(guide_id, status="error", progress=None, error_msg="PDF-Datei wurde nicht erstellt", updated_at=now) - return + # Step 2-N: Render → Review → Fix Loop + for iteration in range(1, max_iter + 1): + await _set_progress(guide_id, f"Rendere PDF… (Iteration {iteration})") + ok, err = await _render_pdf(html_path, pdf_path) + if not ok: + await _fail(guide_id, f"WeasyPrint-Fehler: {err}") + return + await _set_progress(guide_id, f"Prüfe… (Iteration {iteration})") + pngs = await _render_pngs(pdf_path, preview_dir) + page_count = len(pngs) + + review_prompt = _build_review_prompt(format_name, pngs, page_count) + returncode, review_out, review_err = await _run_claude(guide_id, review_prompt, 120) + + if returncode != 0: + await _fail(guide_id, f"Review-Fehler: {review_err[:1000]}") + return + + review_text = review_out.strip() + + if review_text.startswith("PASS"): + break + + if iteration == max_iter: + break + + # Fix-Agent + feedback = review_text.replace("FAIL", "").strip() + await _set_progress(guide_id, f"Korrigiere… (Iteration {iteration})") + fix_prompt = _build_fix_prompt(topic, format_name, html_path, feedback) + returncode, _, fix_err = await _run_claude(guide_id, fix_prompt, timeout) + + if returncode != 0: + await _fail(guide_id, f"Fix-Fehler: {fix_err[:1000]}") + return + + # Final: PDF existiert bereits vom letzten Render + now = datetime.now(timezone.utc).isoformat() await update_guide( guide_id, status="done", @@ -133,15 +217,18 @@ async def generate_guide(guide_id: str, topic: str, format_name: str) -> None: ) except asyncio.TimeoutError: - stop_event.set() - await watcher - now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, status="error", progress=None, error_msg=f"Timeout nach {timeout}s", updated_at=now) + await _fail(guide_id, f"Timeout nach {timeout}s") except Exception as e: - stop_event.set() - await watcher - now = datetime.now(timezone.utc).isoformat() - await update_guide(guide_id, status="error", progress=None, error_msg=str(e)[:2000], updated_at=now) + await _fail(guide_id, str(e)[:2000]) finally: _active_processes.pop(guide_id, None) - Path(prompt_file).unlink(missing_ok=True) + # Preview-PNGs aufräumen + if preview_dir.exists(): + for f in preview_dir.glob("*"): + f.unlink() + preview_dir.rmdir() + + +async def _fail(guide_id: str, msg: str) -> None: + now = datetime.now(timezone.utc).isoformat() + await update_guide(guide_id, status="error", progress=None, error_msg=msg, updated_at=now) diff --git a/backend/main.py b/backend/main.py index 283fc46..b0ff769 100644 --- a/backend/main.py +++ b/backend/main.py @@ -12,6 +12,7 @@ from routes import router async def lifespan(app: FastAPI): (STORAGE_DIR / "html").mkdir(parents=True, exist_ok=True) (STORAGE_DIR / "pdf").mkdir(parents=True, exist_ok=True) + (STORAGE_DIR / "preview").mkdir(parents=True, exist_ok=True) await init_db() yield diff --git a/guides.db b/guides.db deleted file mode 100644 index faab46e..0000000 Binary files a/guides.db and /dev/null differ