update

2026-06-06 17:40:06 +02:00
parent c84fbbb484
commit b2486a73a1
13 changed files with 203 additions and 342 deletions
--- a/backend/agents.py
+++ b/backend/agents.py
@@ -9,6 +9,7 @@ import os
 import re
 import shutil
 import tempfile
+import urllib.request
 from pathlib import Path

 from config import PROVIDERS, DEFAULT_PROVIDER
@@ -41,6 +42,12 @@ def provider_available(provider: str) -> bool:
    env_key = cfg.get("env_key")
    if env_key and not os.environ.get(env_key):
        return False
+    check_url = cfg.get("check_url")
+    if check_url:
+        try:
+            urllib.request.urlopen(check_url, timeout=1)
+        except Exception:
+            return False
    return True


@@ -63,8 +70,8 @@ async def run_agent(
        return 1, "", f"Unbekannter Provider: {provider}"
    if shutil.which(PROVIDERS[provider]["cli"]) is None:
        return 1, "", f"CLI '{PROVIDERS[provider]['cli']}' nicht installiert (Provider: {provider})"
-    if provider == "minimax":
-        return await _run_opencode(agent_key, prompt, timeout, role, capabilities)
+    if PROVIDERS[provider]["cli"] == "opencode":
+        return await _run_opencode(agent_key, prompt, timeout, provider, role, capabilities)
    return await _run_claude_cli(agent_key, prompt, timeout, role, capabilities)


@@ -104,8 +111,8 @@ async def _run_claude_cli(agent_key: str, prompt: str, timeout: int, role: str,
    return await _communicate(agent_key, cmd, prompt.encode("utf-8"), timeout)


-async def _run_opencode(agent_key: str, prompt: str, timeout: int, role: str, capabilities: str) -> tuple[int, str, str]:
-    cfg = PROVIDERS["minimax"]
+async def _run_opencode(agent_key: str, prompt: str, timeout: int, provider: str, role: str, capabilities: str) -> tuple[int, str, str]:
+    cfg = PROVIDERS[provider]
    # Prompt über Tempdatei statt argv (ARG_MAX-Schutz bei großen Projekt-Prompts)
    with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8", dir=tempfile.gettempdir()) as f:
        f.write(prompt)
--- a/backend/config.py
+++ b/backend/config.py
@@ -15,9 +15,7 @@ TIMEOUTS = {
    "recherche":     (1800, 0),   # fix 30 min
    "auswahl":       (600, 10),
    "auswahl_check": (300, 2),
-    "einordnung":    (300, 5),
-    "final":         (300, 2),    # verifiziert nur noch, kleiner Output
-    "sortierung":    (300, 2),
+    "sortierung":    (300, 5),
    "plan":          (300, 5),
    "writer":        (600, 120),  # pro Section im Chunk
    "onepager_recherche": (900, 0),
@@ -25,6 +23,13 @@ TIMEOUTS = {
    "onepager_verify":    (300, 0),
 }

+# Welcher Anteil der sortierten Baustein-Liste in welches Format fließt: (Anteil, Mindestanzahl).
+FORMAT_ANTEIL = {
+    "MiniGuide": (0.10, 8),
+    "Guide":     (0.50, 20),
+    "FullGuide": (1.00, 0),
+}
+
 # Provider-Stacks: komplett unabhängig, einer kann jederzeit entfernt werden.
 # Rollen: "quick" = Massenarbeit (Recherche, Einordnung),
 #         "fast"  = Urteilsaufgaben mit kleinem Output (Auswahl, Final, OnePager, Chat),
@@ -35,14 +40,22 @@ PROVIDERS = {
        "cli": "claude",
        "guide": "claude-opus-4-8[1m]",
        "fast": "claude-sonnet-4-6",
-        "quick": "claude-haiku-4-5",
+        "quick": "claude-sonnet-4-6",
        "env_key": None,  # Auth via CLAUDE_CODE_OAUTH_TOKEN oder ~/.claude
    },
    "minimax": {
        "cli": "opencode",
        "guide": "minimax/MiniMax-M3",
-        "fast": "minimax/MiniMax-M2.7-highspeed",
-        "quick": "minimax/MiniMax-M2.7-highspeed",
+        "fast": "minimax/MiniMax-M3",
+        "quick": "minimax/MiniMax-M3",
        "env_key": "MINIMAX_API_KEY",
    },
+    "lokal": {
+        "cli": "opencode",
+        "guide": "ollama/qwen3:14b",
+        "fast": "ollama/qwen3:8b",
+        "quick": "ollama/qwen3:8b",
+        "env_key": None,
+        "check_url": "http://localhost:11434/api/tags",  # Ollama erreichbar?
+    },
 }
--- a/backend/generator.py
+++ b/backend/generator.py
@@ -1,20 +1,22 @@
 import asyncio
 import json
+import math
+import shutil
 import re
 import uuid
-from collections import Counter
 from datetime import datetime, timezone
 from pathlib import Path

 from agents import run_agent, kill_process
 from config import (
    DEFAULT_PROVIDER,
+    FORMAT_ANTEIL,
    TEMPLATES_DIR,
    TIMEOUTS,
    MAX_CONCURRENT_GENERATIONS,
 )
 from database import update_guide
-from paths import bausteine_path, guide_content_path, project_dir
+from paths import arbeit_dir, bausteine_path, guide_content_path, project_dir

 _semaphore = asyncio.Semaphore(MAX_CONCURRENT_GENERATIONS)
 _cancelled: set[str] = set()
@@ -113,55 +115,48 @@ def _json_datei(path: Path):
        return None


-def _resolve_kategorien(data, entries: dict[int, str], min_match: float = 0.85):
-    """{"KERN": [Titel], …} → {num: Kategorie}; None bei zu vielen unbekannten Titeln
+def _resolve_liste(data, entries: dict[int, str], min_match: float = 0.85) -> list[int] | None:
+    """{"reihenfolge": [Titel, …]} → [nums]; None bei zu vielen unbekannten Titeln
    oder zu geringer Abdeckung der Einträge."""
-    if not isinstance(data, dict):
+    if not isinstance(data, dict) or not isinstance(data.get("reihenfolge"), list):
        return None
    idx = _titel_index(entries)
-    mapping: dict[int, str] = {}
+    nums: list[int] = []
    total = unknown = 0
-    for cat in _CATEGORIES:
-        items = data.get(cat, [])
-        if not isinstance(items, list):
+    for t in data["reihenfolge"]:
+        if not isinstance(t, str):
            return None
-        for t in items:
-            if not isinstance(t, str):
-                return None
-            total += 1
-            num = _titel_aufloesen(idx, t)
-            if num is None:
-                unknown += 1
-            elif num not in mapping:
-                mapping[num] = cat
+        total += 1
+        num = _titel_aufloesen(idx, t)
+        if num is None:
+            unknown += 1
+        elif num not in nums:
+            nums.append(num)
    if total == 0:
        return None
-    if (total - unknown) / total < min_match or len(mapping) / len(entries) < min_match:
+    if (total - unknown) / total < min_match or len(nums) / len(entries) < min_match:
        return None
-    return mapping
+    return nums


-def _resolve_reihenfolge(data, entries: dict[int, str], min_match: float = 0.85):
-    """Wie _resolve_kategorien, aber liefert die Reihenfolge: {Kategorie: [nums]}."""
-    mapping = _resolve_kategorien(data, entries, min_match)
-    if mapping is None:
-        return None
-    idx = _titel_index(entries)
-    order: dict[str, list[int]] = {c: [] for c in _CATEGORIES}
-    for cat in _CATEGORIES:
-        for t in data.get(cat, []):
-            num = _titel_aufloesen(idx, t) if isinstance(t, str) else None
-            if num is not None and num not in order[cat]:
-                order[cat].append(num)
-    return order
+def _merge_sortierungen(topic: str, listen: list[list[int]], entries: dict[int, str]) -> list[int]:
+    """Median-Rang über mehrere Sortierungen; Bausteine ohne Stimmen ans Ende."""
+    raenge: dict[int, list[int]] = {num: [] for num in entries}
+    for liste in listen:
+        for rang, num in enumerate(liste):
+            if num in raenge:
+                raenge[num].append(rang)
+    ohne = [num for num, r in raenge.items() if not r]
+    if ohne:
+        _log(topic, f"Sortierung: keine Stimmen für {[_titel(entries[n]) for n in ohne]} → ans Ende")

+    def key(num: int):
+        r = sorted(raenge[num])
+        if not r:
+            return (10**9, 10**9, num)
+        return (r[len(r) // 2], sum(r) / len(r), num)

-def _kategorien_block(mapping: dict[int, str], entries: dict[int, str]) -> str:
-    parts = []
-    for cat in _CATEGORIES:
-        titel = [_titel(entries[n]) for n in sorted(entries) if mapping.get(n) == cat]
-        parts.append(f"{cat}:\n" + ("\n".join(f"- {t}" for t in titel) if titel else "(leer)"))
-    return "\n".join(parts)
+    return sorted(entries, key=key)


 def _timeout(step: str, n: int = 0) -> int:
@@ -239,36 +234,31 @@ async def _race(topic: str, label: str, slots: list[dict], quorum: int, timeout:
            await asyncio.gather(*tasks.keys(), return_exceptions=True)


-# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 4x Einordnung (3) → Mehrheit+Verifikation → Sortierung ---
+# --- Bausteine-Pipeline: 4x Recherche (3) → 2x Auswahl (1) → Check → 3x Sortierung (Median-Rang) ---

 _bausteine_progress: dict[str, str] = {}
 _bausteine_errors: dict[str, str] = {}
 _bausteine_cancelled: set[str] = set()
 _bausteine_step: dict[str, int] = {}

-BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Einordnung", "Verifikation", "Sortierung")
-_CATEGORIES = ("KERN", "WICHTIG", "REST")
+BAUSTEINE_STEPS = ("Recherche", "Auswahl", "Prüfung", "Sortierung")
+_CATEGORIES = ("KERN", "WICHTIG", "REST")  # nur noch für den Altformat-Reader


 def _bausteine_files(topic: str) -> dict:
-    final_path = bausteine_path(topic)
-    stem, parent = final_path.stem, final_path.parent
+    arbeit = arbeit_dir(topic)
    return {
-        "final": final_path,
-        "recherche": [parent / f"{stem}.recherche-{i}.md" for i in (1, 2, 3, 4)],
-        "auswahl": [parent / f"{stem}.auswahl-{i}.md" for i in (1, 2)],
-        "auswahl_check": parent / f"{stem}.auswahl-check.json",
-        "einordnung": [parent / f"{stem}.einordnung-{i}.json" for i in (1, 2, 3, 4)],
-        "final_check": parent / f"{stem}.final-check.json",
-        "sortierung": parent / f"{stem}.sortierung.json",
+        "final": bausteine_path(topic),
+        "arbeit": arbeit,
+        "recherche": [arbeit / f"recherche-{i}.md" for i in (1, 2, 3, 4)],
+        "auswahl": [arbeit / f"auswahl-{i}.md" for i in (1, 2)],
+        "auswahl_check": arbeit / "auswahl-check.json",
+        "sortierung": [arbeit / f"sortierung-{i}.json" for i in (1, 2, 3)],
    }


 def _alle_slot_dateien(files: dict) -> list[Path]:
-    return [
-        *files["recherche"], *files["auswahl"], files["auswahl_check"],
-        *files["einordnung"], files["final_check"], files["sortierung"],
-    ]
+    return [*files["recherche"], *files["auswahl"], files["auswahl_check"], *files["sortierung"]]


 def cancel_bausteine(topic: str) -> bool:
@@ -288,11 +278,7 @@ def _resume_step(topic: str) -> int:
        return 1
    if not files["auswahl_check"].exists():
        return 2
-    if sum(p.exists() for p in files["einordnung"]) < 3:
-        return 3
-    if not files["final_check"].exists():
-        return 4
-    return 5
+    return 3


 def bausteine_status(topic: str) -> dict:
@@ -307,8 +293,6 @@ def bausteine_status(topic: str) -> dict:
        ]
    elif ready:
        states = ["done"] * len(BAUSTEINE_STEPS)
-        if not _bausteine_files(topic)["sortierung"].exists():
-            states[-1] = "pending"
    else:
        nxt = _resume_step(topic)
        partial = nxt > 0
@@ -330,12 +314,7 @@ def active_bausteine() -> list[dict]:
 def reset_bausteine(topic: str) -> None:
    files = _bausteine_files(topic)
    files["final"].unlink(missing_ok=True)
-    for p in _alle_slot_dateien(files):
-        p.unlink(missing_ok=True)
-    # Altlasten früherer Formatversionen
-    stem, parent = files["final"].stem, files["final"].parent
-    for alt in parent.glob(f"{stem}.*.md"):
-        alt.unlink(missing_ok=True)
+    shutil.rmtree(files["arbeit"], ignore_errors=True)
    _bausteine_errors.pop(topic, None)


@@ -351,7 +330,7 @@ def _build_recherche_prompt(topic: str, out_path: Path, instructions: str = "",


 def _parse_auswahl(text: str) -> dict[int, str]:
-    """Parst die konsolidierte Liste: `N. Titel — Kurzbeschreibung` pro Zeile."""
+    """Parst eine Baustein-Liste: `N. Titel — Kurzbeschreibung` pro Zeile."""
    entries: dict[int, str] = {}
    last = None
    for line in text.splitlines():
@@ -364,46 +343,30 @@ def _parse_auswahl(text: str) -> dict[int, str]:
    return entries


-def _majority(mappings: list[dict[int, str]], entries: dict[int, str]) -> tuple[dict[int, str], list[int]]:
-    """Mehrheitsentscheid über die Einordnungen; ohne Mehrheit → Streitfall."""
-    mapping: dict[int, str] = {}
-    disputes: list[int] = []
-    for num in entries:
-        votes = [m[num] for m in mappings if num in m]
-        if not votes:
-            disputes.append(num)
+def _parse_kategorien(text: str) -> dict[str, list[str]]:
+    """Altformat-Reader: finale Baustein-Datei mit ## KERN/WICHTIG/REST-Abschnitten."""
+    cats: dict[str, list[str]] = {}
+    current = None
+    for line in text.splitlines():
+        s = line.strip()
+        m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE)
+        if m:
+            current = m.group(1).upper()
+            cats.setdefault(current, [])
            continue
-        cat, count = Counter(votes).most_common(1)[0]
-        if count >= 2:
-            mapping[num] = cat
-        else:
-            disputes.append(num)
-    return mapping, disputes
+        m = re.match(r"(\d+)[.)]\s+(.*\S)", s)
+        if m and current:
+            cats[current].append(m.group(2))
+    return cats


-def _build_final_bausteine(topic: str, entries: dict[int, str], mapping: dict[int, str], order: dict[str, list[int]] | None = None) -> str:
-    """Baut die finale Baustein-Datei aus konsolidierter Liste + finaler Zuordnung.
-
-    `order` (Kategorie → Nummern in Lernreihenfolge) sortiert innerhalb der
-    Kategorien; nicht gelistete Nummern hängen in Originalreihenfolge hinten an.
-    """
-    grouped: dict[str, list[int]] = {c: [] for c in _CATEGORIES}
-    for num in sorted(entries):
-        cat = mapping.get(num)
-        if cat is None:
-            _log(topic, f"Baustein {num} fehlt in finaler Einordnung → REST")
-            cat = "REST"
-        grouped[cat].append(num)
-    if order:
-        for cat in _CATEGORIES:
-            wanted = set(grouped[cat])
-            seq = [n for n in order.get(cat, []) if n in wanted]
-            grouped[cat] = seq + [n for n in grouped[cat] if n not in seq]
-    parts = []
-    for cat in _CATEGORIES:
-        lines = "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(grouped[cat], 1))
-        parts.append(f"## {cat}\n{lines}")
-    return "\n\n".join(parts) + "\n"
+def _lade_bausteine(text: str) -> dict[int, str]:
+    """Lädt die finale Baustein-Datei — sortierte Liste (neu) oder Kategorien (Altformat)."""
+    if re.search(r"^#+\s*KERN\b", text, re.IGNORECASE | re.MULTILINE):
+        cats = _parse_kategorien(text)
+        texts = [t for cat in _CATEGORIES for t in cats.get(cat, [])]
+        return {i: t for i, t in enumerate(texts, 1)}
+    return _parse_auswahl(text)


 def _file_payload(path: Path):
@@ -442,23 +405,6 @@ def _titel_aufloesen(idx: dict[str, int], t: str) -> int | None:
    return idx.get(_norm_titel(t)) or idx.get(_norm_titel(_titel(t)))


-async def _run_sortierung(topic: str, entries: dict[int, str], mapping: dict[int, str], provider: str, cancelled) -> dict[str, list[int]] | None:
-    """Sortiert innerhalb der Kategorien; die JSON-Datei des Agenten ist zugleich der Marker."""
-    out = _bausteine_files(topic)["sortierung"]
-    out.unlink(missing_ok=True)
-    slots = [{
-        "key": f"bausteine-{topic}-sortierung-1",
-        "prompt": _prompt("Bausteine-Sortierung", topic=topic, einordnung=_kategorien_block(mapping, entries), out_path=out),
-        "role": "quick", "capabilities": "files",
-        "payload": (lambda result: _resolve_reihenfolge(_json_datei(out), entries)),
-    }]
-    res = await _race(topic, "Sortierung", slots, 1, _timeout("sortierung", len(entries)), provider, cancelled=cancelled)
-    if res is None:
-        out.unlink(missing_ok=True)
-        return None
-    return res[0]
-
-
 async def generate_bausteine(topic: str, instructions: str = "", provider: str = DEFAULT_PROVIDER) -> None:
    if topic in _bausteine_progress:
        return
@@ -482,31 +428,8 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =

    try:
        async with _semaphore:
-            # Fertig, aber ohne Sortier-Marker (ältere Version): nur die Sortierung nachholen.
-            if final_path.exists() and not files["sortierung"].exists():
-                cats = _parse_kategorien(final_path.read_text(encoding="utf-8"))
-                entries: dict[int, str] = {}
-                mapping: dict[int, str] = {}
-                i = 0
-                for cat in _CATEGORIES:
-                    for text in cats.get(cat, []):
-                        i += 1
-                        entries[i] = text
-                        mapping[i] = cat
-                entries = _eindeutige_titel(entries)
-                if entries:
-                    set_p("Sortiere Bausteine…", step=5)
-                    order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled)
-                    if is_cancelled():
-                        abgebrochen()
-                        return
-                    if order is None:
-                        _bausteine_errors[topic] = "Sortierung fehlgeschlagen"
-                        return
-                    final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8")
-                return
-
-            # „Neu erstellen": fertige (sortierte) Bausteine → kompletter Frischstart.
+            files["arbeit"].mkdir(parents=True, exist_ok=True)
+            # „Neu erstellen": fertige Bausteine → kompletter Frischstart.
            # Sonst sind Slot-Dateien Reste eines Abbruchs/Fehlers → Resume.
            if final_path.exists():
                for p_alt in _alle_slot_dateien(files):
@@ -612,103 +535,47 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =
            entries = _eindeutige_titel(entries)
            bausteine_liste = "\n".join(f"- {t}" for t in entries.values())

-            # Schritt 3: 4 Einordnungs-Agenten, 3 gültige nötig (JSON-Dateien, Titel-validiert)
+            # Schritt 3: 3 Sortier-Agenten, ALLE nötig — Merge per Median-Rang
            n = len(entries)
-            einordnungen: list[dict[int, str]] = []
+            sortierungen: list[list[int]] = []
            offen = []
-            for i, path in enumerate(files["einordnung"], 1):
-                m = _resolve_kategorien(_json_datei(path), entries)
-                if m is not None and len(einordnungen) < 3:
-                    einordnungen.append(m)
+            for i, path in enumerate(files["sortierung"], 1):
+                liste = _resolve_liste(_json_datei(path), entries)
+                if liste is not None and len(sortierungen) < 3:
+                    sortierungen.append(liste)
                else:
                    path.unlink(missing_ok=True)
                    offen.append((i, path))
-            vorhanden = len(einordnungen)
-            set_p(f"Einordnung läuft ({vorhanden}/3 gültig)…", step=3)
+            vorhanden = len(sortierungen)
+            set_p(f"Sortierung läuft ({vorhanden}/3 gültig)…", step=3)
            if vorhanden < 3:
                slots = [
                    {
-                        "key": f"bausteine-{topic}-einordnung-{i}",
-                        "prompt": _prompt("Bausteine-Einordnung", topic=topic, bausteine=bausteine_liste, out_path=path),
+                        "key": f"bausteine-{topic}-sortierung-{i}",
+                        "prompt": _prompt("Bausteine-Sortierung", topic=topic, bausteine=bausteine_liste, out_path=path),
                        "role": "quick", "capabilities": "files",
-                        "payload": (lambda result, p=path: _resolve_kategorien(_json_datei(p), entries)),
+                        "payload": (lambda result, p=path: _resolve_liste(_json_datei(p), entries)),
                    }
                    for i, path in offen
                ]
                neue = await _race(
-                    topic, "Einordnung", slots, 3 - vorhanden, _timeout("einordnung", n), provider,
-                    on_update=lambda c: set_p(f"Einordnung läuft ({vorhanden + c}/3 gültig)…"),
+                    topic, "Sortierung", slots, 3 - vorhanden, _timeout("sortierung", n), provider,
+                    on_update=lambda c: set_p(f"Sortierung läuft ({vorhanden + c}/3 gültig)…"),
                    cancelled=is_cancelled,
                )
                if is_cancelled():
                    abgebrochen()
                    return
                if neue is None:
-                    _bausteine_errors[topic] = "Einordnung fehlgeschlagen (Quorum nicht erreicht)"
+                    _bausteine_errors[topic] = "Sortierung fehlgeschlagen (Quorum nicht erreicht)"
                    return
-                einordnungen += neue
+                sortierungen += neue

-            # Schritt 4: Python-Mehrheitsentscheid + Verifikations-Agent (antwortet nur mit Deltas, JSON)
-            set_p("Verifiziere Einordnung…", step=4)
-            mapping, disputes = _majority(einordnungen, entries)
-            if disputes:
-                _log(topic, f"Keine Mehrheit bei: {disputes}")
-
-            def _final_schema(data):
-                if not isinstance(data, dict):
-                    return None
-                idx = _titel_index(entries)
-                out: dict[int, str] = {}
-                for t, cat in data.items():
-                    if not isinstance(t, str) or cat not in _CATEGORIES:
-                        return None
-                    num = _titel_aufloesen(idx, t)
-                    if num is not None:
-                        out[num] = cat
-                return out  # leeres Dict = alles bestätigt
-
-            fc_path = files["final_check"]
-            overrides = _final_schema(_json_datei(fc_path))
-            if overrides is None:
-                fc_path.unlink(missing_ok=True)
-                streit_block = "\n".join(f"- {entries[n]}" for n in disputes) or "(keine)"
-                slots = [{
-                    "key": f"bausteine-{topic}-final-1",
-                    "prompt": _prompt(
-                        "Bausteine-Einordnung-Final",
-                        topic=topic, einordnung=_kategorien_block(mapping, entries),
-                        streitfaelle=streit_block, out_path=fc_path,
-                    ),
-                    "role": "fast", "capabilities": "files",
-                    "payload": (lambda result: _final_schema(_json_datei(fc_path))),
-                }]
-                finals = await _race(topic, "Final", slots, 1, _timeout("final", n), provider, cancelled=is_cancelled)
-                if is_cancelled():
-                    abgebrochen()
-                    return
-                if finals is None:
-                    _log(topic, "Final-Verifikation fehlgeschlagen — Mehrheitsentscheid bleibt unverändert")
-                    overrides = {}
-                else:
-                    overrides = finals[0]
-            korrekturen = {num: cat for num, cat in overrides.items() if mapping.get(num) != cat and num not in disputes}
-            if korrekturen:
-                _log(topic, f"Final-Verifikation korrigiert: { {_titel(entries[n]): c for n, c in korrekturen.items()} }")
-            mapping.update(overrides)
-            for num in disputes:
-                if num not in mapping:
-                    _log(topic, f"Streitfall '{_titel(entries[num])}' unentschieden → REST")
-                    mapping[num] = "REST"
-
-            # Schritt 5: Sortierung innerhalb der Kategorien (einfach → komplex, nicht fatal)
-            set_p("Sortiere Bausteine…", step=5)
-            order = await _run_sortierung(topic, entries, mapping, provider, is_cancelled)
-            if is_cancelled():
-                abgebrochen()
-                return
-            if order is None:
-                _log(topic, "Sortierung fehlgeschlagen — Originalreihenfolge bleibt (Nachholen über ▶)")
-            final_path.write_text(_build_final_bausteine(topic, entries, mapping, order), encoding="utf-8")
+            reihenfolge = _merge_sortierungen(topic, sortierungen, entries)
+            final_path.write_text(
+                "\n".join(f"{i}. {entries[num]}" for i, num in enumerate(reihenfolge, 1)) + "\n",
+                encoding="utf-8",
+            )
    except Exception as e:
        _bausteine_errors[topic] = str(e)[:2000]
    finally:
@@ -720,34 +587,10 @@ async def generate_bausteine(topic: str, instructions: str = "", provider: str =

 # --- Guide-Generierung: Bausteine → (Plan) → Writer → JSON ---

-# Welche Baustein-Kategorien jedes Format abdeckt.
-FORMAT_COVERAGE = {
-    "MiniGuide": ("KERN",),
-    "Guide": ("KERN", "WICHTIG"),
-    "FullGuide": ("KERN", "WICHTIG", "REST"),
-}
-
 # Parallele Writer pro Format (OnePager hat einen eigenen Weg).
 WRITER_COUNT = {"MiniGuide": 1, "Guide": 2, "FullGuide": 4}


-def _parse_kategorien(text: str) -> dict[str, list[str]]:
-    """Parst die finale Baustein-Datei (## KERN/WICHTIG/REST mit nummerierten Einträgen)."""
-    cats: dict[str, list[str]] = {}
-    current = None
-    for line in text.splitlines():
-        s = line.strip()
-        m = re.match(r"#+\s*(KERN|WICHTIG|REST)\b", s, re.IGNORECASE)
-        if m:
-            current = m.group(1).upper()
-            cats.setdefault(current, [])
-            continue
-        m = re.match(r"(\d+)[.)]\s+(.*\S)", s)
-        if m and current:
-            cats[current].append(m.group(2))
-    return cats
-
-
 def _resolve_gliederung(data, entries: dict[int, str]) -> list[dict] | None:
    """{"kapitel": [{"titel", "bausteine": [Titel]}]} → [{"title", "nums"}]; None bei Schema-/Titel-Fehlern."""
    if not isinstance(data, dict) or not isinstance(data.get("kapitel"), list):
@@ -1046,6 +889,7 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
        await update_guide(guide_id, status="generating", progress="Starte…", updated_at=now)

        content_path = guide_content_path(topic, format_name)
+        content_path.parent.mkdir(parents=True, exist_ok=True)
        project = project_dir(topic) if project_dir(topic).is_dir() else None
        fragment_paths: list[Path] = []

@@ -1056,13 +900,13 @@ async def generate_guide(guide_id: str, topic: str, format_name: str, instructio
            if format_name == "OnePager":
                chapters = await _generate_onepager(guide_id, topic, instructions, provider, project, content_path, fragment_paths)
            else:
-                cats = _parse_kategorien(bausteine_path(topic).read_text(encoding="utf-8"))
-                selected: list[str] = []
-                for cat in FORMAT_COVERAGE[format_name]:
-                    selected.extend(cats.get(cat, []))
-                if not selected:
-                    await _fail(guide_id, "Keine passenden Bausteine gefunden")
+                alle = _lade_bausteine(bausteine_path(topic).read_text(encoding="utf-8"))
+                if not alle:
+                    await _fail(guide_id, "Keine Bausteine gefunden")
                    return
+                anteil, minimum = FORMAT_ANTEIL[format_name]
+                k = min(len(alle), max(minimum, math.ceil(anteil * len(alle))))
+                selected = [text for _, text in sorted(alle.items())][:k]
                entries = _eindeutige_titel({i: text for i, text in enumerate(selected, 1)})
                facts = _prompt("Guide-Fakten-Projekt", project=project) if project else _prompt("Guide-Fakten-Thema")
                chapters = await _generate_sections(
--- a/backend/main.py
+++ b/backend/main.py
@@ -10,8 +10,7 @@ from routes import router

@asynccontextmanager
 async def lifespan(app: FastAPI):
-    (STORAGE_DIR / "guides").mkdir(parents=True, exist_ok=True)
-    (STORAGE_DIR / "bausteine").mkdir(parents=True, exist_ok=True)
+    (STORAGE_DIR / "themen").mkdir(parents=True, exist_ok=True)
    await init_db()
    yield
    await close_db()
--- a/backend/models.py
+++ b/backend/models.py
@@ -8,7 +8,7 @@ FormatType = Literal[
    "FullGuide",
 ]

-ProviderType = Literal["claude", "minimax"]
+ProviderType = Literal["claude", "minimax", "lokal"]


 class GuideCreateRequest(BaseModel):
--- a/backend/paths.py
+++ b/backend/paths.py
@@ -1,30 +1,35 @@
-import re
 from pathlib import Path

 from config import STORAGE_DIR, PROJECTS_DIR

+THEMEN_DIR = STORAGE_DIR / "themen"
+

 def _safe(name: str) -> str:
    return name.replace("/", "_").replace("\x00", "")


-def guide_content_path(topic: str, format_name: str) -> Path:
-    return STORAGE_DIR / "guides" / f"{_safe(topic)} - {format_name}.json"
+def topic_dir(topic: str) -> Path:
+    return THEMEN_DIR / _safe(topic)
+
+
+def arbeit_dir(topic: str) -> Path:
+    return topic_dir(topic) / "arbeit"


 def bausteine_path(topic: str) -> Path:
-    return STORAGE_DIR / "bausteine" / f"{_safe(topic)}.md"
+    return topic_dir(topic) / "bausteine.md"
+
+
+def guide_content_path(topic: str, format_name: str) -> Path:
+    return topic_dir(topic) / "guides" / f"{format_name}.json"


 def bausteine_topics() -> list[str]:
-    """Themen, für die eine finale Baustein-Datei existiert (ohne Zwischendateien)."""
-    bdir = STORAGE_DIR / "bausteine"
-    if not bdir.is_dir():
+    """Themen, für die ein Themen-Ordner existiert."""
+    if not THEMEN_DIR.is_dir():
        return []
-    return [
-        p.stem for p in bdir.glob("*.md")
-        if not re.search(r"\.(recherche-\d+|auswahl(-\d+|-check)?|einordnung-\d+|final-check|sortierung)$", p.stem)
-    ]
+    return [d.name for d in THEMEN_DIR.iterdir() if d.is_dir()]


 def project_dir(name: str) -> Path:
--- a/backend/routes.py
+++ b/backend/routes.py
@@ -24,7 +24,7 @@ from models import (
    GuideChatRequest, GuideChatResponse,
    ProgressUpdate, ProgressResponse, ProjectResponse, ProviderInfo,
 )
-from paths import bausteine_path, bausteine_topics, guide_content_path, project_dir
+from paths import bausteine_path, bausteine_topics, guide_content_path, project_dir, topic_dir

 router = APIRouter(prefix="/api")

@@ -54,6 +54,7 @@ async def add_topic(req: TopicCreateRequest):
@router.delete("/topics")
 async def remove_topic(topic: str):
    await delete_topic(topic)
+    shutil.rmtree(topic_dir(topic), ignore_errors=True)
    return {"ok": True}