Skip to content

feat: add bytehouse as graph database backend#1509

Open
white-cc wants to merge 1 commit intoMemTensor:mainfrom
white-cc:main
Open

feat: add bytehouse as graph database backend#1509
white-cc wants to merge 1 commit intoMemTensor:mainfrom
white-cc:main

Conversation

@white-cc
Copy link
Copy Markdown

@white-cc white-cc commented Apr 21, 2026

Description

add ByteHouse as graph database backend

Type of change

Please delete options that are not relevant.

  • New feature (non-breaking change which adds functionality)

How Has This Been Tested?

  • Unit Test

Copilot AI review requested due to automatic review settings April 21, 2026 11:34
Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Adds ByteHouse (ClickHouse-compatible) as an additional memos.graph_dbs backend, wiring it into the config system and backend factory so it can be selected via API/env configuration.

Changes:

  • Introduce ByteHouseGraphDB implementation (DDL, node/edge ops, vector search, counts, export/import).
  • Add ByteHouseGraphDBConfig and register "bytehouse" in graph DB config + factory maps.
  • Add API config builder + .env.example variables and a new ByteHouse test module.

Reviewed changes

Copilot reviewed 7 out of 7 changed files in this pull request and generated 14 comments.

Show a summary per file
File Description
tests/graph_dbs/test_bytehouse.py Adds ByteHouse backend tests (currently unguarded integration-style).
src/memos/graph_dbs/factory.py Registers "bytehouse" backend in GraphStoreFactory.
src/memos/graph_dbs/bytehouse.py Implements ByteHouse graph DB backend (schema, CRUD, search, utilities).
src/memos/configs/graph_db.py Adds ByteHouseGraphDBConfig and registers it in config factory.
src/memos/api/handlers/config_builders.py Exposes ByteHouse config in API graph DB config builder.
src/memos/api/config.py Adds APIConfig.get_bytehouse_config() and includes ByteHouse in backend map.
docker/.env.example Documents ByteHouse-related environment variables.

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

Comment thread src/memos/api/config.py
Comment on lines +864 to +886
def get_bytehouse_config(user_id: str | None = None) -> dict[str, Any]:
"""Get ByteHouse configuration."""
use_multi_db = os.getenv("BYTEHOUSE_USE_MULTI_DB", "false").lower() == "true"

if use_multi_db:
db_name = f"memos{user_id.replace('-', '')}" if user_id else "memos_default"
else:
db_name = os.getenv("BYTEHOUSE_DB_NAME", "shared_memos_db")
user_name = (
f"memos{user_id.replace('-', '')}" if user_id else "memos_default"
)

return {
"host": os.getenv("BYTEHOUSE_HOST", "localhost"),
"port": int(os.getenv("BYTEHOUSE_PORT", "9000")),
"user": os.getenv("BYTEHOUSE_USER", "default"),
"password": os.getenv("BYTEHOUSE_PASSWORD", ""),
"db_name": db_name,
"user_name": user_name,
"use_multi_db": use_multi_db,
"auto_create": True,
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", "1024")),
}
Comment on lines +49 to +58
# Create ClickHouse client
self.client = clickhouse_connect.get_client(
host=config.host,
port=config.port,
username="bytehouse",
password=config.password,
secure=True,
compress=False,
autogenerate_session_id=False,
)
Comment on lines +142 to +149
def _build_user_name_and_kb_ids_condition(
self, user_name: str, knowledgebase_ids: list[str]
) -> str:
"""Build ClickHouse condition for user_name and knowledgebase_ids."""
if not knowledgebase_ids:
return f"user_name = '{user_name}'"
else:
return f"user_name IN ['{'\',\''.join(knowledgebase_ids)}','{user_name}']"
Comment on lines +338 to +346
# Delete edges using INSERT with _delete_flag_ = 1 (only unique key needed)
if edge_result:
edge_values = [(edge_id, user_name, 1) for (edge_id) in edge_result]
self.client.insert(
f"{self.db_name}.edges",
edge_values,
column_names=["id", "user_name", "_delete_flag_"],
column_type_names=["String", "String", "UInt8"],
)
for row in result:
result_set.add(row[0])
result_set.add(row[1])
result_set.remove(id)
Comment on lines +681 to +709
def get_by_metadata(
self,
filters: list[dict[str, Any]],
status: str | None = None,
user_name: str | None = None,
filter: dict | None = None,
knowledgebase_ids: list[str] | None = None,
user_name_flag: bool = True,
) -> list[str]:
"""Get node IDs matching metadata filters."""
user_name = user_name or self.user_name

conditions = []
params = {}

conditions.append(
self._build_user_name_and_kb_ids_condition(user_name, knowledgebase_ids)
)

if status:
conditions.append(f"JSONExtractString(properties, 'status') = '{status}'")

where_clause = " AND ".join(conditions)

try:
result = self.client.query(
f"SELECT id FROM {self.db_name}.memories WHERE {where_clause}"
).result_set
return [row[0] for row in result]
Comment on lines +358 to +363
cols = "id, memory, properties, created_at, updated_at"
if include_embedding:
cols += ", embedding"
result = self.client.query(
f"SELECT {cols} FROM {self.db_name}.memories WHERE id = '{id}' AND user_name = '{user_name}'",
).result_set
Comment on lines +7 to +11
from datetime import datetime
from dotenv import load_dotenv

from memos.configs.graph_db import ByteHouseGraphDBConfig
from memos.graph_dbs.bytehouse import ByteHouseGraphDB, _prepare_node_metadata
Comment on lines +103 to +111
graph_db.add_node(node_id, memory, metadata, user_name="test_delete")

result = graph_db.get_node(node_id, user_name="test_delete")
assert result is not None

graph_db.delete_node(node_id, user_name="test_delete")

result = graph_db.get_node(node_id, user_name="test_delete")
assert result is None
Comment on lines +249 to +250
- memos_memories: Main table for memory nodes (id, memory, properties JSONB, embedding vector)
- memos_edges: Edge table for relationships (source_id, target_id, type)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants