import asyncio import subprocess import tempfile from datetime import datetime, timezone from pathlib import Path from config import ( CLAUDE_CLI, TEMPLATES_DIR, GENERATION_TIMEOUTS, MAX_CONCURRENT_GENERATIONS, MAX_ITERATIONS, STORAGE_DIR, ) from database import update_guide _semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS) _active_processes: dict[str, asyncio.subprocess.Process] = {} _cancelled: set[str] = set() async def cancel_guide(guide_id: str) -> bool: _cancelled.add(guide_id) process = _active_processes.get(guide_id) if process and process.returncode is None: process.kill() now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, status="error", progress=None, error_msg="Abgebrochen", updated_at=now) return True async def _set_progress(guide_id: str, progress: str) -> None: now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, progress=progress, updated_at=now) async def _run_claude(guide_id: str, prompt: str, timeout: int) -> tuple[int, str, str]: process = await asyncio.create_subprocess_exec( CLAUDE_CLI, "-p", "--allowedTools", "Write,Bash,Read,WebSearch,WebFetch", "--dangerously-skip-permissions", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) _active_processes[guide_id] = process try: try: stdout, stderr = await asyncio.wait_for( process.communicate(input=prompt.encode("utf-8")), timeout=timeout, ) except asyncio.TimeoutError: process.kill() try: await asyncio.wait_for(process.wait(), timeout=5) except asyncio.TimeoutError: pass raise return process.returncode, stdout.decode("utf-8", errors="replace"), stderr.decode("utf-8", errors="replace") finally: _active_processes.pop(guide_id, None) async def _render_pdf(html_path: Path, pdf_path: Path) -> tuple[bool, str]: proc = await asyncio.create_subprocess_exec( "weasyprint", str(html_path), str(pdf_path), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) _, stderr = await asyncio.wait_for(proc.communicate(), timeout=120) if proc.returncode != 0: return False, stderr.decode("utf-8", errors="replace")[:1000] return True, "" async def _render_pngs(pdf_path: Path, preview_dir: Path) -> list[Path]: preview_dir.mkdir(parents=True, exist_ok=True) proc = await asyncio.create_subprocess_exec( "python3", "-c", f"from pdf2image import convert_from_path; pages = convert_from_path('{pdf_path}', dpi=120); [p.save('{preview_dir}/page_' + str(i) + '.png') for i, p in enumerate(pages)]; print(len(pages))", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=60) pngs = sorted(preview_dir.glob("page_*.png")) return pngs def _build_generator_prompt(topic: str, format_name: str, html_path: Path, instructions: str = "") -> str: spec = (TEMPLATES_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") reference = (TEMPLATES_DIR / "Referenz" / f"{format_name}.md").read_text(encoding="utf-8") extra = f"\n\nZUSÄTZLICHE ANWEISUNGEN VOM NUTZER:\n{instructions}\n" if instructions else "" return f"""Erstelle einen Lern-Guide zum Thema "{topic}" im Format "{format_name}". Recherchiere zuerst die aktuelle Version und aktuelle Fakten zu "{topic}" per Websuche, damit Versionsnummern und Angaben stimmen. Schreibe die HTML-Datei nach: {html_path} Schreibe NUR die HTML-Datei. Führe KEIN weasyprint aus, erzeuge KEINE PDF. Das übernimmt ein anderer Prozess. FORMAT-SPEZIFIKATION: {spec} REFERENZ-IMPLEMENTIERUNG (Stil-Vorlage, adaptiere für "{topic}"): {reference} {extra}""" def _build_rework_prompt(topic: str, format_name: str, html_path: Path, instructions: str) -> str: spec = (TEMPLATES_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") return f"""Überarbeite die bestehende HTML-Datei {html_path} für den "{format_name}" zum Thema "{topic}". Lies zuerst die aktuelle HTML-Datei mit dem Read-Tool. ANWEISUNGEN VOM NUTZER: {instructions} FORMAT-SPEZIFIKATION (muss weiterhin eingehalten werden): {spec} Schreibe die überarbeitete Version in dieselbe Datei: {html_path} Führe KEIN weasyprint aus, erzeuge KEINE PDF. """ def _build_fix_prompt(topic: str, format_name: str, html_path: Path, feedback: str) -> str: return f"""Die HTML-Datei {html_path} für den "{format_name}" zum Thema "{topic}" hat Probleme. FEEDBACK VOM PRÜFER: {feedback} Behebe die Probleme in der HTML-Datei {html_path}. Schreibe die korrigierte Version in dieselbe Datei. Führe KEIN weasyprint aus, erzeuge KEINE PDF. """ def _build_review_prompt(format_name: str, png_paths: list[Path], page_count: int) -> str: spec = (TEMPLATES_DIR / "Format" / f"{format_name}.md").read_text(encoding="utf-8") read_instructions = "\n".join( f"- Öffne mit dem Read-Tool: {p}" for p in png_paths ) return f"""Prüfe visuell einen generierten "{format_name}" Guide. SCHRITT 1 — Bilder laden: Das PDF hat {page_count} Seite(n), gerendert als PNG-Screenshots. Nutze das Read-Tool, um JEDE der folgenden Dateien zu öffnen und visuell zu inspizieren: {read_instructions} SCHRITT 2 — Visuell prüfen anhand dieser Spezifikation: {spec} Prüfkriterien (basierend auf dem, was du in den Bildern SIEHST): - Stimmt die Seitenanzahl? (OnePager/Cheatsheet = exakt 1 Seite) - Ist Text abgeschnitten, überlappt oder läuft aus dem sichtbaren Bereich? - Fehlen Pflicht-Elemente (Cover, TOC, Recall-Boxen, Callouts, etc.)? - Sind Code-Blöcke über Seitenumbrüche zerrissen? - Ist das Layout korrekt (Spalten, Grid, Footer)? SCHRITT 3 — Antworte mit GENAU EINEM der folgenden Formate: Bei Bestehen: PASS Bei Nicht-Bestehen: FAIL - Problem 1 - Problem 2 - ... """ async def generate_guide(guide_id: str, topic: str, format_name: str, instructions: str = "") -> None: async with _semaphore: now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, status="generating", progress="Recherche…", updated_at=now) html_path = STORAGE_DIR / "html" / f"{guide_id}.html" pdf_path = STORAGE_DIR / "pdf" / f"{guide_id}.pdf" preview_dir = STORAGE_DIR / "preview" / guide_id timeout = GENERATION_TIMEOUTS.get(format_name, 600) max_iter = MAX_ITERATIONS.get(format_name, 3) try: if guide_id in _cancelled: return # Step 1: Generator-Agent erstellt HTML await _set_progress(guide_id, "Generiere HTML…") gen_prompt = _build_generator_prompt(topic, format_name, html_path, instructions) returncode, stdout, stderr = await _run_claude(guide_id, gen_prompt, timeout) if guide_id in _cancelled: return if returncode != 0: await _fail(guide_id, f"Generator-Fehler: {stderr[:1000]}") return if not html_path.exists(): await _fail(guide_id, "HTML-Datei wurde nicht erstellt") return # Step 2-N: Render → Review → Fix Loop for iteration in range(1, max_iter + 1): if guide_id in _cancelled: return await _set_progress(guide_id, f"Rendere PDF… (Iteration {iteration})") ok, err = await _render_pdf(html_path, pdf_path) if not ok: await _fail(guide_id, f"WeasyPrint-Fehler: {err}") return await _set_progress(guide_id, f"Prüfe… (Iteration {iteration})") pngs = await _render_pngs(pdf_path, preview_dir) page_count = len(pngs) review_prompt = _build_review_prompt(format_name, pngs, page_count) returncode, review_out, review_err = await _run_claude(guide_id, review_prompt, 120) if returncode != 0: await _fail(guide_id, f"Review-Fehler: {review_err[:1000]}") return review_text = review_out.strip() if review_text.startswith("PASS"): break if iteration == max_iter: break # Fix-Agent feedback = review_text.replace("FAIL", "").strip() await _set_progress(guide_id, f"Korrigiere… (Iteration {iteration})") fix_prompt = _build_fix_prompt(topic, format_name, html_path, feedback) returncode, _, fix_err = await _run_claude(guide_id, fix_prompt, timeout) if returncode != 0: await _fail(guide_id, f"Fix-Fehler: {fix_err[:1000]}") return # Final: PDF existiert bereits vom letzten Render now = datetime.now(timezone.utc).isoformat() await update_guide( guide_id, status="done", progress=None, html_path=str(html_path), pdf_path=str(pdf_path), updated_at=now, ) except asyncio.TimeoutError: await _fail(guide_id, f"Timeout nach {timeout}s") except Exception as e: await _fail(guide_id, str(e)[:2000]) finally: _active_processes.pop(guide_id, None) _cancelled.discard(guide_id) # Preview-PNGs aufräumen if preview_dir.exists(): for f in preview_dir.glob("*"): f.unlink() preview_dir.rmdir() async def rework_guide(guide_id: str, topic: str, format_name: str, instructions: str) -> None: async with _semaphore: now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, status="generating", progress="Überarbeite…", updated_at=now) html_path = STORAGE_DIR / "html" / f"{guide_id}.html" pdf_path = STORAGE_DIR / "pdf" / f"{guide_id}.pdf" preview_dir = STORAGE_DIR / "preview" / guide_id timeout = GENERATION_TIMEOUTS.get(format_name, 600) max_iter = MAX_ITERATIONS.get(format_name, 3) try: if guide_id in _cancelled: return rework_prompt = _build_rework_prompt(topic, format_name, html_path, instructions) returncode, stdout, stderr = await _run_claude(guide_id, rework_prompt, timeout) if guide_id in _cancelled: return if returncode != 0: await _fail(guide_id, f"Rework-Fehler: {stderr[:1000]}") return if not html_path.exists(): await _fail(guide_id, "HTML-Datei wurde nicht erstellt") return for iteration in range(1, max_iter + 1): if guide_id in _cancelled: return await _set_progress(guide_id, f"Rendere PDF… (Iteration {iteration})") ok, err = await _render_pdf(html_path, pdf_path) if not ok: await _fail(guide_id, f"WeasyPrint-Fehler: {err}") return await _set_progress(guide_id, f"Prüfe… (Iteration {iteration})") pngs = await _render_pngs(pdf_path, preview_dir) page_count = len(pngs) review_prompt = _build_review_prompt(format_name, pngs, page_count) returncode, review_out, review_err = await _run_claude(guide_id, review_prompt, 120) if returncode != 0: await _fail(guide_id, f"Review-Fehler: {review_err[:1000]}") return review_text = review_out.strip() if review_text.startswith("PASS"): break if iteration == max_iter: break feedback = review_text.replace("FAIL", "").strip() await _set_progress(guide_id, f"Korrigiere… (Iteration {iteration})") fix_prompt = _build_fix_prompt(topic, format_name, html_path, feedback) returncode, _, fix_err = await _run_claude(guide_id, fix_prompt, timeout) if returncode != 0: await _fail(guide_id, f"Fix-Fehler: {fix_err[:1000]}") return now = datetime.now(timezone.utc).isoformat() await update_guide( guide_id, status="done", progress=None, html_path=str(html_path), pdf_path=str(pdf_path), updated_at=now, ) except asyncio.TimeoutError: await _fail(guide_id, f"Timeout nach {timeout}s") except Exception as e: await _fail(guide_id, str(e)[:2000]) finally: _active_processes.pop(guide_id, None) _cancelled.discard(guide_id) if preview_dir.exists(): for f in preview_dir.glob("*"): f.unlink() preview_dir.rmdir() async def _fail(guide_id: str, msg: str) -> None: now = datetime.now(timezone.utc).isoformat() await update_guide(guide_id, status="error", progress=None, error_msg=msg, updated_at=now)