From b2486a73a1eec604160dac54553fce4f697672b6 Mon Sep 17 00:00:00 2001 From: Team3 Date: Sat, 6 Jun 2026 17:40:06 +0200 Subject: [PATCH] update --- Makefile | 17 +- backend/agents.py | 15 +- backend/config.py | 25 +- backend/generator.py | 352 +++++------------- backend/main.py | 3 +- backend/models.py | 2 +- backend/paths.py | 27 +- backend/routes.py | 3 +- dev-ops/opencode.json | 31 +- frontend/src/components/TopicSidebar.vue | 8 +- .../Prompt/Bausteine-Einordnung-Final.md | 23 -- templates/Prompt/Bausteine-Einordnung.md | 20 - templates/Prompt/Bausteine-Sortierung.md | 19 +- 13 files changed, 203 insertions(+), 342 deletions(-) delete mode 100644 templates/Prompt/Bausteine-Einordnung-Final.md delete mode 100644 templates/Prompt/Bausteine-Einordnung.md diff --git a/Makefile b/Makefile index 5571352..b862bbc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install dev prod stop logs remove auth sync +.PHONY: install dev prod stop logs remove auth sync searxng ollama COMPOSE = docker compose @@ -47,9 +47,18 @@ remove: stop rm -rf storage/* @echo "Fertig." +searxng: + docker run -d --name searxng --restart unless-stopped -p 8888:8080 searxng/searxng + @echo "SearxNG läuft auf http://localhost:8888 (Websuche für den Lokal-Provider)." + +ollama: + @which ollama >/dev/null 2>&1 || curl -fsSL https://ollama.com/install.sh | sh + ollama pull qwen3:14b + ollama pull qwen3:8b + @echo "Ollama bereit — Provider 'Lokal' ist aktiv (Modelle anpassen: backend/config.py + dev-ops/opencode.json)." + sync: - @mkdir -p storage/guides storage/bausteine + @mkdir -p storage/themen rsync -avz --progress root@178.104.67.87:/var/www/creator/storage/creator.db storage/ - rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/guides/ storage/guides/ - rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/bausteine/ storage/bausteine/ + rsync -avz --progress --delete root@178.104.67.87:/var/www/creator/storage/themen/ storage/themen/ @echo "Sync abgeschlossen." diff --git a/backend/agents.py b/backend/agents.py index 4a9ce97..d81ff7c 100644 --- a/backend/agents.py +++ b/backend/agents.py @@ -9,6 +9,7 @@ import os import re import shutil import tempfile +import urllib.request from pathlib import Path from config import PROVIDERS, DEFAULT_PROVIDER @@ -41,6 +42,12 @@ def provider_available(provider: str) -> bool: env_key = cfg.get("env_key") if env_key and not os.environ.get(env_key): return False + check_url = cfg.get("check_url") + if check_url: + try: + urllib.request.urlopen(check_url, timeout=1) + except Exception: + return False return True @@ -63,8 +70,8 @@ async def run_agent( return 1, "", f"Unbekannter Provider: {provider}" if shutil.which(PROVIDERS[provider]["cli"]) is None: return 1, "", f"CLI '{PROVIDERS[provider]['cli']}' nicht installiert (Provider: {provider})" - if provider == "minimax": - return await _run_opencode(agent_key, prompt, timeout, role, capabilities) + if PROVIDERS[provider]["cli"] == "opencode": + return await _run_opencode(agent_key, prompt, timeout, provider, role, capabilities) return await _run_claude_cli(agent_key, prompt, timeout, role, capabilities) @@ -104,8 +111,8 @@ async def _run_claude_cli(agent_key: str, prompt: str, timeout: int, role: str, return await _communicate(agent_key, cmd, prompt.encode("utf-8"), timeout) -async def _run_opencode(agent_key: str, prompt: str, timeout: int, role: str, capabilities: str) -> tuple[int, str, str]: - cfg = PROVIDERS["minimax"] +async def _run_opencode(agent_key: str, prompt: str, timeout: int, provider: str, role: str, capabilities: str) -> tuple[int, str, str]: + cfg = PROVIDERS[provider] # Prompt über Tempdatei statt argv (ARG_MAX-Schutz bei großen Projekt-Prompts) with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8", dir=tempfile.gettempdir()) as f: f.write(prompt) diff --git a/backend/config.py b/backend/config.py index 7f350f3..fad3734 100644 --- a/backend/config.py +++ b/backend/config.py @@ -15,9 +15,7 @@ TIMEOUTS = { "recherche": (1800, 0), # fix 30 min "auswahl": (600, 10), "auswahl_check": (300, 2), - "einordnung": (300, 5), - "final": (300, 2), # verifiziert nur noch, kleiner Output - "sortierung": (300, 2), + "sortierung": (300, 5), "plan": (300, 5), "writer": (600, 120), # pro Section im Chunk "onepager_recherche": (900, 0), @@ -25,6 +23,13 @@ TIMEOUTS = { "onepager_verify": (300, 0), } +# Welcher Anteil der sortierten Baustein-Liste in welches Format fließt: (Anteil, Mindestanzahl). +FORMAT_ANTEIL = { + "MiniGuide": (0.10, 8), + "Guide": (0.50, 20), + "FullGuide": (1.00, 0), +} + # Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden. # Rollen: "quick" = Massenarbeit (Recherche, Einordnung), # "fast" = Urteilsaufgaben mit kleinem Output (Auswahl, Final, OnePager, Chat), @@ -35,14 +40,22 @@ PROVIDERS = { "cli": "claude", "guide": "claude-opus-4-8[1m]", "fast": "claude-sonnet-4-6", - "quick": "claude-haiku-4-5", + "quick": "claude-sonnet-4-6", "env_key": None, # Auth via CLAUDE_CODE_OAUTH_TOKEN oder ~/.claude }, "minimax": { "cli": "opencode", "guide": "minimax/MiniMax-M3", - "fast": "minimax/MiniMax-M2.7-highspeed", - "quick": "minimax/MiniMax-M2.7-highspeed", + "fast": "minimax/MiniMax-M3", + "quick": "minimax/MiniMax-M3", "env_key": "MINIMAX_API_KEY", }, + "lokal": { + "cli": "opencode", + "guide": "ollama/qwen3:14b", + "fast": "ollama/qwen3:8b", + "quick": "ollama/qwen3:8b", + "env_key": None, + "check_url": "http://localhost:11434/api/tags", # Ollama erreichbar? + }, } diff --git a/backend/generator.py b/backend/generator.py index 39da7d5..870197e 100644 --- a/backend/generator.py +++ b/backend/generator.py @@ -1,20 +1,22 @@ import asyncio import json +import math +import shutil import re import uuid -from collections import Counter from datetime import datetime, timezone from pathlib import Path from agents import run_agent, kill_process from config import ( DEFAULT_PROVIDER, + FORMAT_ANTEIL, TEMPLATES_DIR, TIMEOUTS, MAX_CONCURRENT_GENERATIONS, ) from database import update_guide -from paths import bausteine_path, guide_content_path, project_dir +from paths import arbeit_dir, bausteine_path, guide_content_path, project_dir _semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS) _cancelled: set[str] = set() @@ -113,55 +115,48 @@ def _json_datei(path: Path): return None -def _resolve_kategorien(data, entries: dict[int, str], min_match: float = 0.85): - """{"KERN": [Titel], …} → {num: Kategorie}; None bei zu vielen unbekannten Titeln +def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None: + """{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln oder zu geringer Abdeckung der Einträge.""" - if not isinstance(data, dict): + if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list): return None idx = _titel_index(entries) - mapping: dict[int, str] = {} + nums: list[int] = [] total = unknown = 0 - for cat in _CATEGORIES: - items = data.get(cat, []) - if not isinstance(items, list): + for t in data["reihenfolge"]: + if not isinstance(t, str): return None - for t in items: - if not isinstance(t, str): - return None - total += 1 - num = _titel_aufloesen(idx, t) - if num is None: - unknown += 1 - elif num not in mapping: - mapping[num] = cat + total += 1 + num = _titel_aufloesen(idx, t) + if num is None: + unknown += 1 + elif num not in nums: + nums.append(num) if total == 0: return None - if (total - unknown) / total < min_match or len(mapping) / len(entries) < min_match: + if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match: return None - return mapping + return nums -def _resolve_reihenfolge(data, entries: dict[int, str], min_match: float = 0.85): - """Wie _resolve_kategorien, aber liefert die Reihenfolge: {Kategorie: [nums]}.""" - mapping = _resolve_kategorien(data, entries, min_match) - if mapping is None: - return None - idx = _titel_index(entries) - order: dict[str, list[int]] = {c: [] for c in _CATEGORIES} - for cat in _CATEGORIES: - for t in data.get(cat, []): - num = _titel_aufloesen(idx, t) if isinstance(t, str) else None - if num is not None and num not in order[cat]: - order[cat].append(num) - return order +def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]: + """Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende.""" + raenge: dict[int, list[int]] = {num: [] for num in entries} + for liste in listen: + for rang, num in enumerate(liste): + if num in raenge: + raenge[num].append(rang) + ohne = [num for num, r in raenge.items() if not r] + if ohne: + _log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende") + def key(num: int): + r = sorted(raenge[num]) + if not r: + return (10**9, 10**9, num) + return (r[len(r) // 2], sum(r) / len(r), num) -def _kategorien_block(mapping: dict[int, str], entries: dict[int, str]) -> str: - parts = [] - for cat in _CATEGORIES: - titel = [_titel(entries[n]) for n in sorted(entries) if mapping.get(n) == cat] - parts.append(f"{cat}:\n" + ("\n".join(f"- {t}" for t in titel) if titel else "(leer)")) - return "\n".join(parts) + return sorted(entries, key=key) def _timeout(step: str, n: int = 0) -> int: @@ -239,36 +234,31 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout: await asyncio.gather(*tasks.keys(), return_exceptions=True) -# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 4x Einordnung (3) → Mehrheit+Verifikation → Sortierung --- +# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) --- _bausteine_progress: dict[str, str] = {} _bausteine_errors: dict[str, str] = {} _bausteine_cancelled: set[str] = set() _bausteine_step: dict[str, int] = {} -BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Einordnung", "Verifikation", "Sortierung") -_CATEGORIES = ("KERN", "WICHTIG", "REST") +BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung") +_CATEGORIES = ("KERN", "WICHTIG", "REST") # nur noch für den Altformat-Reader def _bausteine_files(topic: str) -> dict: - final_path = bausteine_path(topic) - stem, parent = final_path.stem, final_path.parent + arbeit = arbeit_dir(topic) return { - "final": final_path, - "recherche": [parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)], - "auswahl": [parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)], - "auswahl_check": parent / f"{stem}.auswahl-check.json", - "einordnung": [parent / f"{stem}.einordnung-{i}.json" for i in (1, 2, 3, 4)], - "final_check": parent / f"{stem}.final-check.json", - "sortierung": parent / f"{stem}.sortierung.json", + "final": bausteine_path(topic), + "arbeit": arbeit, + "recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)], + "auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)], + "auswahl_check": arbeit / "auswahl-check.json", + "sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)], } def _alle_slot_dateien(files: dict) -> list[Path]: - return [ - *files["recherche"], *files["auswahl"], files["auswahl_check"], - *files["einordnung"], files["final_check"], files["sortierung"], - ] + return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]] def cancel_bausteine(topic: str) -> bool: @@ -288,11 +278,7 @@ def _resume_step(topic: str) -> int: return 1 if not files["auswahl_check"].exists(): return 2 - if sum(p.exists() for p in files["einordnung"]) < 3: - return 3 - if not files["final_check"].exists(): - return 4 - return 5 + return 3 def bausteine_status(topic: str) -> dict: @@ -307,8 +293,6 @@ def bausteine_status(topic: str) -> dict: ] elif ready: states = ["done"] * len(BAUSTEINE_STEPS) - if not _bausteine_files(topic)["sortierung"].exists(): - states[-1] = "pending" else: nxt = _resume_step(topic) partial = nxt > 0 @@ -330,12 +314,7 @@ def active_bausteine() -> list[dict]: def reset_bausteine(topic: str) -> None: files = _bausteine_files(topic) files["final"].unlink(missing_ok=True) - for p in _alle_slot_dateien(files): - p.unlink(missing_ok=True) - # Altlasten früherer Formatversionen - stem, parent = files["final"].stem, files["final"].parent - for alt in parent.glob(f"{stem}.*.md"): - alt.unlink(missing_ok=True) + shutil.rmtree(files["arbeit"], ignore_errors=True) _bausteine_errors.pop(topic, None) @@ -351,7 +330,7 @@ def _build_recherche_prompt(topic: str, out_path: Path, instructions: str = "", def _parse_auswahl(text: str) -> dict[int, str]: - """Parst die konsolidierte Liste: `N. Titel — Kurzbeschreibung` pro Zeile.""" + """Parst eine Baustein-Liste: `N. Titel — Kurzbeschreibung` pro Zeile.""" entries: dict[int, str] = {} last = None for line in text.splitlines(): @@ -364,46 +343,30 @@ def _parse_auswahl(text: str) -> dict[int, str]: return entries -def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]: - """Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall.""" - mapping: dict[int, str] = {} - disputes: list[int] = [] - for num in entries: - votes = [m[num] for m in mappings if num in m] - if not votes: - disputes.append(num) +def _parse_kategorien(text: str) -> dict[str, list[str]]: + """Altformat-Reader: finale Baustein-Datei mit ## KERN/WICHTIG/REST-Abschnitten.""" + cats: dict[str, list[str]] = {} + current = None + for line in text.splitlines(): + s = line.strip() + m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE) + if m: + current = m.group(1).upper() + cats.setdefault(current, []) continue - cat, count = Counter(votes).most_common(1)[0] - if count >= 2: - mapping[num] = cat - else: - disputes.append(num) - return mapping, disputes + m = re.match(r"(\d+)[.)]\s+(.*\S)", s) + if m and current: + cats[current].append(m.group(2)) + return cats -def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str], order: dict[str, list[int]] | None = None) -> str: - """Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung. - - `order` (Kategorie → Nummern in Lernreihenfolge) sortiert innerhalb der - Kategorien; nicht gelistete Nummern hängen in Originalreihenfolge hinten an. - """ - grouped: dict[str, list[int]] = {c: [] for c in _CATEGORIES} - for num in sorted(entries): - cat = mapping.get(num) - if cat is None: - _log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST") - cat = "REST" - grouped[cat].append(num) - if order: - for cat in _CATEGORIES: - wanted = set(grouped[cat]) - seq = [n for n in order.get(cat, []) if n in wanted] - grouped[cat] = seq + [n for n in grouped[cat] if n not in seq] - parts = [] - for cat in _CATEGORIES: - lines = "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(grouped[cat], 1)) - parts.append(f"## {cat}\n{lines}") - return "\n\n".join(parts) + "\n" +def _lade_bausteine(text: str) -> dict[int, str]: + """Lädt die finale Baustein-Datei — sortierte Liste (neu) oder Kategorien (Altformat).""" + if re.search(r"^#+\s*KERN\b", text, re.IGNORECASE | re.MULTILINE): + cats = _parse_kategorien(text) + texts = [t for cat in _CATEGORIES for t in cats.get(cat, [])] + return {i: t for i, t in enumerate(texts, 1)} + return _parse_auswahl(text) def _file_payload(path: Path): @@ -442,23 +405,6 @@ def _titel_aufloesen(idx: dict[str, int], t: str) -> int | None: return idx.get(_norm_titel(t)) or idx.get(_norm_titel(_titel(t))) -async def _run_sortierung(topic: str, entries: dict[int, str], mapping: dict[int, str], provider: str, cancelled) -> dict[str, list[int]] | None: - """Sortiert innerhalb der Kategorien; die JSON-Datei des Agenten ist zugleich der Marker.""" - out = _bausteine_files(topic)["sortierung"] - out.unlink(missing_ok=True) - slots = [{ - "key": f"bausteine-{topic}-sortierung-1", - "prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_kategorien_block(mapping, entries), out_path=out), - "role": "quick", "capabilities": "files", - "payload": (lambda result: _resolve_reihenfolge(_json_datei(out), entries)), - }] - res = await _race(topic, "Sortierung", slots, 1, _timeout("sortierung", len(entries)), provider, cancelled=cancelled) - if res is None: - out.unlink(missing_ok=True) - return None - return res[0] - - async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None: if topic in _bausteine_progress: return @@ -482,31 +428,8 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = try: async with _semaphore: - # Fertig, aber ohne Sortier-Marker (ältere Version): nur die Sortierung nachholen. - if final_path.exists() and not files["sortierung"].exists(): - cats = _parse_kategorien(final_path.read_text(encoding="utf-8")) - entries: dict[int, str] = {} - mapping: dict[int, str] = {} - i = 0 - for cat in _CATEGORIES: - for text in cats.get(cat, []): - i += 1 - entries[i] = text - mapping[i] = cat - entries = _eindeutige_titel(entries) - if entries: - set_p("Sortiere Bausteine…", step=5) - order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled) - if is_cancelled(): - abgebrochen() - return - if order is None: - _bausteine_errors[topic] = "Sortierung fehlgeschlagen" - return - final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8") - return - - # „Neu erstellen": fertige (sortierte) Bausteine → kompletter Frischstart. + files["arbeit"].mkdir(parents=True, exist_ok=True) + # „Neu erstellen": fertige Bausteine → kompletter Frischstart. # Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume. if final_path.exists(): for p_alt in _alle_slot_dateien(files): @@ -612,103 +535,47 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = entries = _eindeutige_titel(entries) bausteine_liste = "\n".join(f"- {t}" for t in entries.values()) - # Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig (JSON-Dateien, Titel-validiert) + # Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang n = len(entries) - einordnungen: list[dict[int, str]] = [] + sortierungen: list[list[int]] = [] offen = [] - for i, path in enumerate(files["einordnung"], 1): - m = _resolve_kategorien(_json_datei(path), entries) - if m is not None and len(einordnungen) < 3: - einordnungen.append(m) + for i, path in enumerate(files["sortierung"], 1): + liste = _resolve_liste(_json_datei(path), entries) + if liste is not None and len(sortierungen) < 3: + sortierungen.append(liste) else: path.unlink(missing_ok=True) offen.append((i, path)) - vorhanden = len(einordnungen) - set_p(f"Einordnung läuft ({vorhanden}/3 gültig)…", step=3) + vorhanden = len(sortierungen) + set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3) if vorhanden < 3: slots = [ { - "key": f"bausteine-{topic}-einordnung-{i}", - "prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=bausteine_liste, out_path=path), + "key": f"bausteine-{topic}-sortierung-{i}", + "prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path), "role": "quick", "capabilities": "files", - "payload": (lambda result, p=path: _resolve_kategorien(_json_datei(p), entries)), + "payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)), } for i, path in offen ] neue = await _race( - topic, "Einordnung", slots, 3 - vorhanden, _timeout("einordnung", n), provider, - on_update=lambda c: set_p(f"Einordnung läuft ({vorhanden + c}/3 gültig)…"), + topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider, + on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"), cancelled=is_cancelled, ) if is_cancelled(): abgebrochen() return if neue is None: - _bausteine_errors[topic] = "Einordnung fehlgeschlagen (Quorum nicht erreicht)" + _bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)" return - einordnungen += neue + sortierungen += neue - # Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent (antwortet nur mit Deltas, JSON) - set_p("Verifiziere Einordnung…", step=4) - mapping, disputes = _majority(einordnungen, entries) - if disputes: - _log(topic, f"Keine Mehrheit bei: {disputes}") - - def _final_schema(data): - if not isinstance(data, dict): - return None - idx = _titel_index(entries) - out: dict[int, str] = {} - for t, cat in data.items(): - if not isinstance(t, str) or cat not in _CATEGORIES: - return None - num = _titel_aufloesen(idx, t) - if num is not None: - out[num] = cat - return out # leeres Dict = alles bestätigt - - fc_path = files["final_check"] - overrides = _final_schema(_json_datei(fc_path)) - if overrides is None: - fc_path.unlink(missing_ok=True) - streit_block = "\n".join(f"- {entries[n]}" for n in disputes) or "(keine)" - slots = [{ - "key": f"bausteine-{topic}-final-1", - "prompt": _prompt( - "Bausteine-Einordnung-Final", - topic=topic, einordnung=_kategorien_block(mapping, entries), - streitfaelle=streit_block, out_path=fc_path, - ), - "role": "fast", "capabilities": "files", - "payload": (lambda result: _final_schema(_json_datei(fc_path))), - }] - finals = await _race(topic, "Final", slots, 1, _timeout("final", n), provider, cancelled=is_cancelled) - if is_cancelled(): - abgebrochen() - return - if finals is None: - _log(topic, "Final-Verifikation fehlgeschlagen — Mehrheitsentscheid bleibt unverändert") - overrides = {} - else: - overrides = finals[0] - korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes} - if korrekturen: - _log(topic, f"Final-Verifikation korrigiert: { {_titel(entries[n]): c for n, c in korrekturen.items()} }") - mapping.update(overrides) - for num in disputes: - if num not in mapping: - _log(topic, f"Streitfall '{_titel(entries[num])}' unentschieden → REST") - mapping[num] = "REST" - - # Schritt 5: Sortierung innerhalb der Kategorien (einfach → komplex, nicht fatal) - set_p("Sortiere Bausteine…", step=5) - order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled) - if is_cancelled(): - abgebrochen() - return - if order is None: - _log(topic, "Sortierung fehlgeschlagen — Originalreihenfolge bleibt (Nachholen über ▶)") - final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8") + reihenfolge = _merge_sortierungen(topic, sortierungen, entries) + final_path.write_text( + "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n", + encoding="utf-8", + ) except Exception as e: _bausteine_errors[topic] = str(e)[:2000] finally: @@ -720,34 +587,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str = # --- Guide-Generierung: Bausteine → (Plan) → Writer → JSON --- -# Welche Baustein-Kategorien jedes Format abdeckt. -FORMAT_COVERAGE = { - "MiniGuide": ("KERN",), - "Guide": ("KERN", "WICHTIG"), - "FullGuide": ("KERN", "WICHTIG", "REST"), -} - # Parallele Writer pro Format (OnePager hat einen eigenen Weg). WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4} -def _parse_kategorien(text: str) -> dict[str, list[str]]: - """Parst die finale Baustein-Datei (## KERN/WICHTIG/REST mit nummerierten Einträgen).""" - cats: dict[str, list[str]] = {} - current = None - for line in text.splitlines(): - s = line.strip() - m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE) - if m: - current = m.group(1).upper() - cats.setdefault(current, []) - continue - m = re.match(r"(\d+)[.)]\s+(.*\S)", s) - if m and current: - cats[current].append(m.group(2)) - return cats - - def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None: """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern.""" if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list): @@ -1046,6 +889,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio await update_guide(guide_id, status="generating", progress="Starte…", updated_at=now) content_path = guide_content_path(topic, format_name) + content_path.parent.mkdir(parents=True, exist_ok=True) project = project_dir(topic) if project_dir(topic).is_dir() else None fragment_paths: list[Path] = [] @@ -1056,13 +900,13 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio if format_name == "OnePager": chapters = await _generate_onepager(guide_id, topic, instructions, provider, project, content_path, fragment_paths) else: - cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8")) - selected: list[str] = [] - for cat in FORMAT_COVERAGE[format_name]: - selected.extend(cats.get(cat, [])) - if not selected: - await _fail(guide_id, "Keine passenden Bausteine gefunden") + alle = _lade_bausteine(bausteine_path(topic).read_text(encoding="utf-8")) + if not alle: + await _fail(guide_id, "Keine Bausteine gefunden") return + anteil, minimum = FORMAT_ANTEIL[format_name] + k = min(len(alle), max(minimum, math.ceil(anteil * len(alle)))) + selected = [text for _, text in sorted(alle.items())][:k] entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)}) facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema") chapters = await _generate_sections( diff --git a/backend/main.py b/backend/main.py index 0a53b2d..bd63594 100644 --- a/backend/main.py +++ b/backend/main.py @@ -10,8 +10,7 @@ from routes import router @asynccontextmanager async def lifespan(app: FastAPI): - (STORAGE_DIR / "guides").mkdir(parents=True, exist_ok=True) - (STORAGE_DIR / "bausteine").mkdir(parents=True, exist_ok=True) + (STORAGE_DIR / "themen").mkdir(parents=True, exist_ok=True) await init_db() yield await close_db() diff --git a/backend/models.py b/backend/models.py index a768982..ee03b10 100644 --- a/backend/models.py +++ b/backend/models.py @@ -8,7 +8,7 @@ FormatType = Literal[ "FullGuide", ] -ProviderType = Literal["claude", "minimax"] +ProviderType = Literal["claude", "minimax", "lokal"] class GuideCreateRequest(BaseModel): diff --git a/backend/paths.py b/backend/paths.py index a52c033..e84a9c1 100644 --- a/backend/paths.py +++ b/backend/paths.py @@ -1,30 +1,35 @@ -import re from pathlib import Path from config import STORAGE_DIR, PROJECTS_DIR +THEMEN_DIR = STORAGE_DIR / "themen" + def _safe(name: str) -> str: return name.replace("/", "_").replace("\x00", "") -def guide_content_path(topic: str, format_name: str) -> Path: - return STORAGE_DIR / "guides" / f"{_safe(topic)} - {format_name}.json" +def topic_dir(topic: str) -> Path: + return THEMEN_DIR / _safe(topic) + + +def arbeit_dir(topic: str) -> Path: + return topic_dir(topic) / "arbeit" def bausteine_path(topic: str) -> Path: - return STORAGE_DIR / "bausteine" / f"{_safe(topic)}.md" + return topic_dir(topic) / "bausteine.md" + + +def guide_content_path(topic: str, format_name: str) -> Path: + return topic_dir(topic) / "guides" / f"{format_name}.json" def bausteine_topics() -> list[str]: - """Themen, für die eine finale Baustein-Datei existiert (ohne Zwischendateien).""" - bdir = STORAGE_DIR / "bausteine" - if not bdir.is_dir(): + """Themen, für die ein Themen-Ordner existiert.""" + if not THEMEN_DIR.is_dir(): return [] - return [ - p.stem for p in bdir.glob("*.md") - if not re.search(r"\.(recherche-\d+|auswahl(-\d+|-check)?|einordnung-\d+|final-check|sortierung)$", p.stem) - ] + return [d.name for d in THEMEN_DIR.iterdir() if d.is_dir()] def project_dir(name: str) -> Path: diff --git a/backend/routes.py b/backend/routes.py index 541f00c..0a29feb 100644 --- a/backend/routes.py +++ b/backend/routes.py @@ -24,7 +24,7 @@ from models import ( GuideChatRequest, GuideChatResponse, ProgressUpdate, ProgressResponse, ProjectResponse, ProviderInfo, ) -from paths import bausteine_path, bausteine_topics, guide_content_path, project_dir +from paths import bausteine_path, bausteine_topics, guide_content_path, project_dir, topic_dir router = APIRouter(prefix="/api") @@ -54,6 +54,7 @@ async def add_topic(req: TopicCreateRequest): @router.delete("/topics") async def remove_topic(topic: str): await delete_topic(topic) + shutil.rmtree(topic_dir(topic), ignore_errors=True) return {"ok": True} diff --git a/dev-ops/opencode.json b/dev-ops/opencode.json index 5e4f819..97bccb5 100644 --- a/dev-ops/opencode.json +++ b/dev-ops/opencode.json @@ -10,6 +10,21 @@ "name": "MiniMax M3" } } + }, + "ollama": { + "npm": "@ai-sdk/openai-compatible", + "name": "Ollama (lokal)", + "options": { + "baseURL": "http://localhost:11434/v1" + }, + "models": { + "qwen3:14b": { + "name": "Qwen3 14B" + }, + "qwen3:8b": { + "name": "Qwen3 8B" + } + } } }, "mcp": { @@ -20,6 +35,13 @@ "MINIMAX_API_KEY": "{env:MINIMAX_API_KEY}", "MINIMAX_API_HOST": "https://api.minimax.io" } + }, + "searxng": { + "type": "local", + "command": ["npx", "-y", "mcp-searxng"], + "environment": { + "SEARXNG_URL": "http://localhost:8888" + } } }, "agent": { @@ -39,7 +61,8 @@ "webfetch": "deny" }, "tools": { - "minimax-search*": false + "minimax-search*": false, + "searxng*": false } }, "readonly": { @@ -53,7 +76,8 @@ "write": false, "edit": false, "bash": false, - "minimax-search*": false + "minimax-search*": false, + "searxng*": false } }, "text": { @@ -70,7 +94,8 @@ "read": false, "glob": false, "grep": false, - "minimax-search*": false + "minimax-search*": false, + "searxng*": false } } } diff --git a/frontend/src/components/TopicSidebar.vue b/frontend/src/components/TopicSidebar.vue index c659db9..c87ca9b 100644 --- a/frontend/src/components/TopicSidebar.vue +++ b/frontend/src/components/TopicSidebar.vue @@ -23,7 +23,7 @@ function providerAvailable(id) { return p ? p.available : true } -const PROVIDER_LABELS = { claude: 'Claude', minimax: 'MiniMax' } +const PROVIDER_LABELS = { claude: 'Claude', minimax: 'MiniMax', lokal: 'Lokal' } const formats = [ { key: 'OnePager', label: 'OnePager' }, @@ -34,10 +34,6 @@ const formats = [ const BAUSTEINE_KEY = '__bausteine__' -const bausteineUnsortiert = computed( - () => props.bausteine.ready && props.bausteine.steps?.at(-1)?.state === 'pending', -) - const bausteineState = computed(() => { if (props.bausteine.generating) return 'generating' return props.bausteine.ready ? 'done' : 'none' @@ -214,7 +210,7 @@ function confirmDeleteProject(name) {