VectifyAI · gwokhou · Jun 18, 2026 · Jun 18, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/README.md b/README.md
@@ -358,6 +358,7 @@ OpenKB settings are initialized by `openkb init` and stored in `.openkb/config.y
 model: gpt-5.4                   # LLM model (any LiteLLM-supported provider)
 language: en                     # Wiki output language
 pageindex_threshold: 20          # PDF pages threshold for PageIndex
+file_processing_jobs: 2          # Files to prepare concurrently during `openkb add <dir>`
 ```
 
 Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/providers) (OpenAI models can omit the prefix):
@@ -372,6 +373,8 @@ Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/p
 <summary><i>Advanced options (<code>entity_types</code>, OAuth):</i></summary>
 <br>
 
+`file_processing_jobs` (default `2`): number of files prepared concurrently during `openkb add <dir>`. Only the preparation stage is parallelized (hashing, duplicate prefiltering, raw/source staging, conversion); live-KB mutation stays serialized under the mutation lock, so raising it helps mainly when conversion is the bottleneck.
+
 `entity_types` (optional): a YAML list overriding the entity-type vocabulary used for entity pages; omit it to use the default `person`, `organization`, `place`, `product`, `work`, `event`, `other`.
 
 Subscription-based providers that authenticate via OAuth device flow (e.g. `chatgpt/*`, `github_copilot/*`) need no API key; OpenKB skips the missing-key warning for them.

diff --git a/config.yaml.example b/config.yaml.example
@@ -1,6 +1,10 @@
 model: gpt-5.4                   # LLM model (any LiteLLM-supported provider)
 language: en                     # Wiki output language
 pageindex_threshold: 20          # PDF pages threshold for PageIndex
+file_processing_jobs: 2          # Number of files to prepare concurrently during `openkb add <dir>`
+# Note: this parallelizes hashing/conversion/staging only. Live KB publish,
+# PageIndex indexing, LLM compilation, registry updates, and log writes remain
+# serialized under the KB mutation lock.
 
 # Optional: override the entity-type vocabulary used for entity pages.
 # Omit this key to use the default 7 types

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
@@ -36,6 +36,7 @@
     resolve_entity_types,
 )
 from openkb.lint import list_existing_wiki_targets, strip_ghost_wikilinks
+from openkb.locks import atomic_write_text
 from openkb.schema import INDEX_SEED, get_agents_md
 
 logger = logging.getLogger(__name__)
@@ -779,7 +780,7 @@ def _write_summary(wiki_dir: Path, doc_name: str, summary: str,
     fm_lines.append(f"doc_type: {doc_type}")
     fm_lines.append(_yaml_kv_line("full_text", f"sources/{doc_name}.{ext}"))
     fm_block = "---\n" + "\n".join(fm_lines) + "\n---\n\n"
-    (summaries_dir / f"{doc_name}.md").write_text(fm_block + summary, encoding="utf-8")
+    atomic_write_text(summaries_dir / f"{doc_name}.md", fm_block + summary)
 
 
 _SAFE_NAME_RE = re.compile(r'[^\w\-]')
@@ -839,7 +840,7 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
             if brief:
                 fm_lines.append(_yaml_kv_line("description", brief))
             existing = frontmatter.block(fm_lines) + clean
-            path.write_text(existing, encoding="utf-8")
+            atomic_write_text(path, existing)
             return
         # Guarantee type + refresh description on update; remove legacy brief:.
         ex_parts2 = frontmatter.split(existing)
@@ -851,7 +852,7 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
             # Drop legacy brief: lines (migrated to description:).
             fm_block = frontmatter.drop_line(fm_block, "brief")
             existing = fm_block + body
-        path.write_text(existing, encoding="utf-8")
+        atomic_write_text(path, existing)
     else:
         clean_parts = frontmatter.split(content)
         if clean_parts is not None:
@@ -863,7 +864,7 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
         if brief:
             fm_lines.append(_yaml_kv_line("description", brief))
         fm_block = "---\n" + "\n".join(fm_lines) + "\n---\n\n"
-        path.write_text(fm_block + content, encoding="utf-8")
+        atomic_write_text(path, fm_block + content)
 
 
 def _write_entity(
@@ -927,10 +928,10 @@ def _build_entity_frontmatter(sources: list[str]) -> str:
                     break
             merged = [source_file] + [s for s in recovered if s != source_file]
             existing = _build_entity_frontmatter(merged) + clean
-        path.write_text(existing, encoding="utf-8")
+        atomic_write_text(path, existing)
         return
 
-    path.write_text(_build_entity_frontmatter([source_file]) + clean, encoding="utf-8")
+    atomic_write_text(path, _build_entity_frontmatter([source_file]) + clean)
 
 
 _set_fm_line = frontmatter.set_line
@@ -1041,7 +1042,7 @@ def _add_related_link(
         text = _prepend_source_to_frontmatter(text, source_file)
 
     text += f"\n\nSee also: {link}"
-    path.write_text(text, encoding="utf-8")
+    atomic_write_text(path, text)
     return True
 
 
@@ -1068,7 +1069,7 @@ def _backlink_summary_pages(
     _ensure_h2_section(lines, section, quiet=True)
     for slug in reversed(missing):
         _insert_section_entry(lines, section, f"- [[{page_dir}/{slug}]]")
-    summary_path.write_text("\n".join(lines), encoding="utf-8")
+    atomic_write_text(summary_path, "\n".join(lines))
 
 
 def _backlink_pages(
@@ -1089,7 +1090,7 @@ def _backlink_pages(
         lines = text.split("\n")
         _ensure_h2_section(lines, "## Related Documents", quiet=True)
         _insert_section_entry(lines, "## Related Documents", f"- {link}")
-        path.write_text("\n".join(lines), encoding="utf-8")
+        atomic_write_text(path, "\n".join(lines))
 
 
 def _backlink_summary(wiki_dir: Path, doc_name: str, concept_slugs: list[str]) -> None:
@@ -1195,7 +1196,7 @@ def _remove_doc_from_pages(
             path.unlink()
             deleted.append(path.stem)
         elif new_text != text:
-            path.write_text(new_text, encoding="utf-8")
+            atomic_write_text(path, new_text)
             modified.append(path.stem)
 
     return {"modified": modified, "deleted": deleted}
@@ -1291,7 +1292,7 @@ def remove_doc_from_index(wiki_dir: Path, doc_name: str, concept_slugs_deleted:
         while _remove_section_entry(lines, "## Entities", entity_link):
             pass
 
-    index_path.write_text("\n".join(lines), encoding="utf-8")
+    atomic_write_text(index_path, "\n".join(lines))
 
 
 def _update_index(
@@ -1315,7 +1316,7 @@ def _update_index(
 
     index_path = wiki_dir / "index.md"
     if not index_path.exists():
-        index_path.write_text(INDEX_SEED, encoding="utf-8")
+        atomic_write_text(index_path, INDEX_SEED)
 
     lines = index_path.read_text(encoding="utf-8").split("\n")
 
@@ -1361,7 +1362,7 @@ def _update_index(
         else:
             _insert_section_entry(lines, "## Entities", entry)
 
-    index_path.write_text("\n".join(lines), encoding="utf-8")
+    atomic_write_text(index_path, "\n".join(lines))
 
 
 # ---------------------------------------------------------------------------
@@ -2035,7 +2036,7 @@ async def compile_long_doc(
         updated = fm_block + body
         if updated != summary_content:
             summary_content = updated
-            summary_path.write_text(summary_content, encoding="utf-8")
+            atomic_write_text(summary_path, summary_content)
 
     # Base context A. cache_control marker on the doc message creates a
     # cache breakpoint covering (system + doc) for every concept call.