From c0b7d236bb5a55b3dcde69b9b9f36c655c0de9e3 Mon Sep 17 00:00:00 2001 From: team3 Date: Fri, 12 Jun 2026 07:54:57 +0200 Subject: [PATCH] =?UTF-8?q?Backend:=20WAL+busy=5Ftimeout,=20DB=E2=86=94Dat?= =?UTF-8?q?ei-Reconcile=20beim=20Start,=20zentraler=20JSON-Parser=20(jsoni?= =?UTF-8?q?o)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- backend/database.py | 3 +++ backend/generator.py | 55 ++++++++++++-------------------------------- backend/jsonio.py | 49 +++++++++++++++++++++++++++++++++++++++ backend/main.py | 2 ++ 4 files changed, 69 insertions(+), 40 deletions(-) create mode 100644 backend/jsonio.py diff --git a/backend/database.py b/backend/database.py index 151acdf..abf4bd0 100644 --- a/backend/database.py +++ b/backend/database.py @@ -60,6 +60,9 @@ async def get_db() -> aiosqlite.Connection: async def init_db(): db = await get_db() + # WAL übersteht Crashes deutlich besser; busy_timeout fängt kurze Locks ab. + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA busy_timeout=5000") await db.execute(CREATE_GUIDES) await db.execute(CREATE_PROGRESS) await db.execute(CREATE_TOPICS) diff --git a/backend/generator.py b/backend/generator.py index 917e073..1e7c133 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -17,8 +17,9 @@ from config import ( TIMEOUTS, MAX_CONCURRENT_GENERATIONS, ) -from database import update_guide +from database import list_guides, update_guide from fsutil import atomic_write_json, atomic_write_text +from jsonio import parse_json_text as _parse_json_text, read_json_file as _json_datei from paths import arbeit_dir, bausteine_path, guide_content_path, project_dir _semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS) @@ -109,19 +110,6 @@ def _titel_index(entries: dict[int, str]) -> dict[str, int]: return {_norm_titel(_titel(text)): num for num, text in entries.items()} -def _json_datei(path: Path): - """Liest eine JSON-Datei (Code-Fences tolerant); None bei fehlend/ungültig.""" - if not path.exists(): - return None - try: - text = path.read_text(encoding="utf-8").strip() - text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text) - return json.loads(text) - except Exception as e: - log.debug("JSON-Datei ungültig: %s (%s)", path, e) - return None - - def _timeout(step: str, n: int = 0) -> int: base, per = TIMEOUTS[step] return base + per * n @@ -1267,6 +1255,19 @@ async def _generate_sections( return chapters +async def reconcile_guides() -> None: + """DB↔Dateisystem abgleichen: status=done ohne Content-Datei → error. + + Läuft beim Server-Start (nach init_db) — fängt Crashes zwischen + Datei-Write und Status-Update ab. + """ + for g in await list_guides(): + if g["status"] == "done" and not guide_content_path(g["topic"], g["format"]).exists(): + log.warning("[%s] Guide %s: done ohne Content-Datei — auf error gesetzt", g["topic"], g["id"]) + now = datetime.now(timezone.utc).isoformat() + await update_guide(g["id"], status="error", error_msg="Inhalt fehlt — neu generieren", updated_at=now) + + async def generate_guide(guide_id: str, topic: str, format_name: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: async with _semaphore: now = datetime.now(timezone.utc).isoformat() @@ -1354,32 +1355,6 @@ async def chat_with_guide(topic: str, format_name: str, section: str, outline: s # --- Elemente (persönliche Zusammenfassung) --- -def _parse_json_text(text: str): - """Parst JSON aus KI-Output (Code-Fences und Drumherum-Text tolerant). - - Repariert unescapte Anführungszeichen in Strings (z. B. MiniMax: "Titel „p" geändert"): - das letzte `"` vor der Fehlerstelle escapen und erneut parsen. - """ - text = re.sub(r"^```(?:json)?\s*|\s*```$", "", (text or "").strip()) - start, end = text.find("{"), text.rfind("}") - if start == -1 or end <= start: - return None - candidate = text[start:end + 1] - for _ in range(20): - try: - return json.loads(candidate) - except json.JSONDecodeError as e: - if not e.msg.startswith(("Expecting ',' delimiter", "Expecting ':' delimiter")): - return None - q = candidate.rfind('"', 0, e.pos) - if q <= 0: - return None - candidate = candidate[:q] + '\\"' + candidate[q + 1:] - except Exception: - return None - return None - - def _element_fields(data: dict) -> dict | None: """Validiert KI-Element-JSON und normalisiert auf die DB-Felder.""" if not isinstance(data, dict): diff --git a/backend/jsonio.py b/backend/jsonio.py new file mode 100644 index 0000000..3a2ea61 --- /dev/null +++ b/backend/jsonio.py @@ -0,0 +1,49 @@ +"""Toleranter JSON-Parser für KI-Output — als Text oder aus Dateien. + +Verkraftet Code-Fences, Drumherum-Text und unescapte Anführungszeichen in +Strings (z. B. MiniMax: "Titel „p" geändert"): das letzte `"` vor der +Fehlerstelle wird escapet und erneut geparst. +""" + +import json +import logging +import re +from pathlib import Path + +log = logging.getLogger("creator.jsonio") + + +def parse_json_text(text: str): + """Parst JSON aus KI-Output; None bei nicht reparierbarem Input.""" + text = re.sub(r"^```(?:json)?\s*|\s*```$", "", (text or "").strip()) + start, end = text.find("{"), text.rfind("}") + if start == -1 or end <= start: + return None + candidate = text[start:end + 1] + for _ in range(20): + try: + return json.loads(candidate) + except json.JSONDecodeError as e: + if not e.msg.startswith(("Expecting ',' delimiter", "Expecting ':' delimiter")): + return None + q = candidate.rfind('"', 0, e.pos) + if q <= 0: + return None + candidate = candidate[:q] + '\\"' + candidate[q + 1:] + except Exception: + return None + return None + + +def read_json_file(path: Path): + """Liest eine JSON-Datei mit derselben Toleranz; None bei fehlend/ungültig.""" + if not path.exists(): + return None + try: + data = parse_json_text(path.read_text(encoding="utf-8")) + except Exception as e: + log.debug("JSON-Datei nicht lesbar: %s (%s)", path, e) + return None + if data is None: + log.debug("JSON-Datei ungültig: %s", path) + return data diff --git a/backend/main.py b/backend/main.py index cae9709..1d9f4f6 100644 --- a/backend/main.py +++ b/backend/main.py @@ -9,6 +9,7 @@ setup_logging() from config import FRONTEND_DIST, STORAGE_DIR from database import init_db, close_db +from generator import reconcile_guides from routes import router @@ -16,6 +17,7 @@ from routes import router async def lifespan(app: FastAPI): (STORAGE_DIR / "themen").mkdir(parents=True, exist_ok=True) await init_db() + await reconcile_guides() yield await close_db()