Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/benchmark-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:

- uses: astral-sh/setup-uv@v4

- uses: extractions/setup-just@v2

- name: Install dependencies
run: uv sync --group dev --extra judge

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/benchmark-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ jobs:

- uses: astral-sh/setup-uv@v4

- uses: extractions/setup-just@v2

- name: Install dependencies
run: uv sync --group dev

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Local override:

```bash
uv run bm-bench run retrieval \
--bm-local-path /Users/phernandez/dev/basicmachines/basic-memory
--bm-local-path /path/to/basic-memory
```

## Mem0 local requirements
Expand Down
18 changes: 9 additions & 9 deletions docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ It covers:

### Repositories and paths

- benchmark repo: `/Users/phernandez/dev/basicmachines/basic-memory-benchmarks`
- BM local repo (default in `justfile`): `/Users/phernandez/dev/basicmachines/basic-memory`
- benchmark repo: clone of `basicmachines-co/basic-memory-benchmarks`
- BM local repo: set `BM_LOCAL_PATH` env var (or in `.env`) to your local `basic-memory` checkout

### Environment

Expand All @@ -53,7 +53,7 @@ It covers:
### One-time setup

```bash
cd /Users/phernandez/dev/basicmachines/basic-memory-benchmarks
cd /path/to/basic-memory-benchmarks
just sync
```

Expand Down Expand Up @@ -107,7 +107,7 @@ Top-level commands:
### One-command full retrieval run

```bash
cd /Users/phernandez/dev/basicmachines/basic-memory-benchmarks
cd /path/to/basic-memory-benchmarks
just bench-full
```

Expand All @@ -119,7 +119,7 @@ This runs:
### One-command full retrieval + judge

```bash
cd /Users/phernandez/dev/basicmachines/basic-memory-benchmarks
cd /path/to/basic-memory-benchmarks
just bench-full-judge
```

Expand Down Expand Up @@ -235,8 +235,8 @@ Use this workflow today to compare BM revisions while keeping benchmark tooling
### Step 1: Create BM worktrees for target refs

```bash
BM_REPO=/Users/phernandez/dev/basicmachines/basic-memory
WT_ROOT=/Users/phernandez/dev/basicmachines/basic-memory-benchmarks/benchmarks/worktrees/basic-memory
BM_REPO=/path/to/basic-memory
WT_ROOT=/path/to/basic-memory-benchmarks/benchmarks/worktrees/basic-memory

mkdir -p "$WT_ROOT"

Expand All @@ -250,7 +250,7 @@ git -C "$BM_REPO" worktree add "$WT_ROOT/current" HEAD
### Step 2: Prepare benchmark datasets once

```bash
cd /Users/phernandez/dev/basicmachines/basic-memory-benchmarks
cd /path/to/basic-memory-benchmarks
just sync
just bench-prepare-short
just bench-prepare-long
Expand Down Expand Up @@ -377,7 +377,7 @@ Planned command shape:

```bash
uv run bm-bench run revision-matrix \
--bm-repo-path /Users/phernandez/dev/basicmachines/basic-memory \
--bm-repo-path /path/to/basic-memory \
--revisions pre_fusion=f5a0e942^ \
--revisions fusion=f5a0e942 \
--revisions context_step1=f9b2a075 \
Expand Down
17 changes: 9 additions & 8 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ set dotenv-load := true

# --- Paths and defaults ---

bm_local_path := "/Users/phernandez/dev/basicmachines/basic-memory"
bm_local_path := env_var_or_default("BM_LOCAL_PATH", "")
bm_local_path_flag := if bm_local_path != "" { "--bm-local-path " + bm_local_path } else { "" }
locomo_dataset_path := "benchmarks/datasets/locomo/locomo10.json"
locomo_output_dir := "benchmarks/generated/locomo"
locomo_c1_output_dir := "benchmarks/generated/locomo-c1"
Expand Down Expand Up @@ -84,7 +85,7 @@ bench-run-short:
--corpus-dir benchmarks/generated/locomo-c1/docs \
--queries-path benchmarks/generated/locomo-c1/queries.quick25.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--allow-provider-skip

bench-run-short-strict:
Expand All @@ -94,7 +95,7 @@ bench-run-short-strict:
--corpus-dir benchmarks/generated/locomo-c1/docs \
--queries-path benchmarks/generated/locomo-c1/queries.quick25.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--strict-providers

# Long benchmark: full LoCoMo query set
Expand All @@ -105,7 +106,7 @@ bench-run-long:
--corpus-dir benchmarks/generated/locomo/docs \
--queries-path benchmarks/generated/locomo/queries.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--allow-provider-skip

bench-run-long-strict:
Expand All @@ -115,7 +116,7 @@ bench-run-long-strict:
--corpus-dir benchmarks/generated/locomo/docs \
--queries-path benchmarks/generated/locomo/queries.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--strict-providers

bench-run-bm-local:
Expand All @@ -125,7 +126,7 @@ bench-run-bm-local:
--dataset-path {{locomo_dataset_path}} \
--corpus-dir benchmarks/generated/locomo/docs \
--queries-path benchmarks/generated/locomo/queries.json \
--bm-local-path {{bm_local_path}}
{{bm_local_path_flag}}

bench-run-mem0-local:
uv run bm-bench run retrieval \
Expand All @@ -143,7 +144,7 @@ bench-run-full:
--corpus-dir benchmarks/generated/locomo/docs \
--queries-path benchmarks/generated/locomo/queries.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--allow-provider-skip

bench-run-full-judge model="gpt-4o-mini":
Expand All @@ -153,7 +154,7 @@ bench-run-full-judge model="gpt-4o-mini":
--corpus-dir benchmarks/generated/locomo/docs \
--queries-path benchmarks/generated/locomo/queries.json \
--providers bm-local,mem0-local \
--bm-local-path {{bm_local_path}} \
{{bm_local_path_flag}} \
--allow-provider-skip \
--judge \
--judge-model "{{model}}"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors = [
]
requires-python = ">=3.12"
dependencies = [
"basic-memory @ git+https://github.com/basicmachines-co/basic-memory@main",
"basic-memory>=0.20.2",
"httpx>=0.28.0",
"mcp>=1.23.1",
"mem0ai==1.0.0",
Expand Down
16 changes: 13 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading