Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ COSMOS_DB__accountEndpoint=https://<your-account>.documents.azure.com:443/
# letting you create databases/containers without RBAC role assignments.
# Leave blank to use DefaultAzureCredential / managed identity.
COSMOS_DB_KEY=
# Database name. Must match what the Function App reads (COSMOS_DB_DATABASE there).
# Default is "ai_memory"; change only if your infra deploys a different name.
COSMOS_DB_DATABASE=ai_memory
COSMOS_DB_CONTAINER=memories
COSMOS_DB_MEMORIES_CONTAINER="memories"
COSMOS_DB_SUMMARIES_CONTAINER="memories_summaries"
COSMOS_DB_TURNS_CONTAINER="memories_turns"
COSMOS_DB_COUNTERS_CONTAINER=counter
COSMOS_DB_LEASE_CONTAINER=leases
# Throughput mode for all required Cosmos DB containers created by the toolkit
Expand All @@ -29,6 +33,21 @@ THREAD_SUMMARY_EVERY_N=10
FACT_EXTRACTION_EVERY_N=1
USER_SUMMARY_EVERY_N=20

# ---- Processor ownership (in-process SDK vs. Function App / Durable) ----
# Controls which side runs the auto-trigger to avoid double-firing when both
# the SDK and the Function App are deployed against the same database.
#
# * Unset / blank -> SDK auto-trigger fires; FA change-feed SKIPS.
# (Pure SDK deployments — no env config needed.)
# * "inprocess" -> SDK auto-trigger fires; FA change-feed SKIPS.
# * "durable" -> SDK auto-trigger SKIPS; FA change-feed fires.
#
# The contract is asymmetric by design: the FA defaults to skip (default-deny)
# so a fresh FA deploy next to an existing SDK install does not race. If you
# run the Function App, you MUST set this to "durable" or processing will
# silently no-op.
MEMORY_PROCESSOR_OWNER=

# ---- AI Foundry / Azure OpenAI ----
AI_FOUNDRY_ENDPOINT=https://<your-account>.openai.azure.com/
AI_FOUNDRY_API_KEY=
Expand All @@ -39,3 +58,6 @@ AI_FOUNDRY_EMBEDDING_DISTANCE_FUNCTION=cosine
COSMOS_DB_FULL_TEXT_LANGUAGE=en-US

AI_FOUNDRY_CHAT_DEPLOYMENT_NAME=<your-model-deployment>
# Optional. Pin the Azure OpenAI REST API version used by chat and embeddings
# clients. Leave blank to use the toolkit default ("2024-12-01-preview").
AZURE_OPENAI_API_VERSION=
22 changes: 11 additions & 11 deletions Docs/azure_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ az functionapp config appsettings set \
--settings \
COSMOS_DB_ENDPOINT="https://<cosmos-account-name>.documents.azure.com:443/" \
COSMOS_DB_DATABASE="ai_memory" \
COSMOS_DB_CONTAINER="memories" \
COSMOS_DB_MEMORIES_CONTAINER="memories" \
COSMOS_DB_COUNTERS_CONTAINER="counter" \
COSMOS_DB_LEASE_CONTAINER="leases" \
COSMOS_DB_THROUGHPUT_MODE="serverless" \
Expand All @@ -120,7 +120,7 @@ az functionapp config appsettings set \
MEMORY_PROCESSOR_OWNER="durable"
```

`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers.
`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers.

`MEMORY_PROCESSOR_OWNER=durable` tells the SDK that the deployed Function App owns processing, so any `CosmosMemoryClient` pointed at the same container will skip its in-process auto-trigger and avoid double-extraction. See the README's processor-ownership table for details.

Expand Down Expand Up @@ -178,7 +178,7 @@ Update `.env` to point at Azure instead of localhost:
```env
COSMOS_DB_ENDPOINT=https://<cosmos-account-name>.documents.azure.com:443/
COSMOS_DB_DATABASE=ai_memory
COSMOS_DB_CONTAINER=memories
COSMOS_DB_MEMORIES_CONTAINER=memories
COSMOS_DB_COUNTERS_CONTAINER=counter
COSMOS_DB_LEASE_CONTAINER=leases
COSMOS_DB_THROUGHPUT_MODE=serverless
Expand Down Expand Up @@ -213,7 +213,7 @@ load_dotenv()
memory = CosmosMemoryClient(
cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
cosmos_database=os.getenv("COSMOS_DB_DATABASE", "ai_memory"),
cosmos_container=os.getenv("COSMOS_DB_CONTAINER", "memories"),
cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER", "memories"),
cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
Expand Down Expand Up @@ -242,7 +242,7 @@ load_dotenv()
memory = AsyncCosmosMemoryClient(
cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
cosmos_database=os.getenv("COSMOS_DB_DATABASE", "ai_memory"),
cosmos_container=os.getenv("COSMOS_DB_CONTAINER", "memories"),
cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER", "memories"),
cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
Expand All @@ -258,7 +258,7 @@ await memory.connect_cosmos()
await memory.create_memory_store()
```

This provisions the `memories`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all three containers.
This provisions the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all five containers.

---

Expand Down Expand Up @@ -314,9 +314,9 @@ for i in range(10):

# Wait for the change-feed processor to catch up, then read derived memories.
import time; time.sleep(15)
print(memory.get_memories(user_id="user-1", thread_id="thread-1", memory_types=["summary"]))
print(memory.get_thread_summary(user_id="user-1", thread_id="thread-1"))
print(memory.get_memories(user_id="user-1", memory_types=["fact"]))
print(memory.get_memories(user_id="user-1", memory_types=["user_summary"]))
print(memory.get_user_summary(user_id="user-1"))
```

### Change feed auto-processing
Expand All @@ -339,7 +339,7 @@ for i in range(3):
# Wait a few seconds for the change feed to trigger, then check:
import time
time.sleep(10)
results = memory.get_memories(user_id="user-1", thread_id=thread_id, memory_types=["summary"])
results = memory.get_thread_summary(user_id="user-1", thread_id=thread_id)
print(results) # Should contain an auto-generated summary
```

Expand All @@ -348,9 +348,9 @@ Check the Function App logs to confirm the `on_memory_change` trigger fired and
### Verify stored results

```python
print(memory.get_memories(user_id="user-1", memory_types=["summary"]))
print(memory.get_thread_summary(user_id="user-1", thread_id="thread-1"))
print(memory.get_memories(user_id="user-1", memory_types=["fact"]))
print(memory.get_memories(user_id="user-1", memory_types=["user_summary"]))
print(memory.get_user_summary(user_id="user-1"))
```

---
Expand Down
8 changes: 5 additions & 3 deletions Docs/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,18 @@ Set any value to `0` to disable that processing type. For example, setting `THRE

| Container | Partition Key | Purpose |
|-----------|---------------|---------|
| `memories` | `/user_id`, `/thread_id` (hierarchical) | Existing memory store |
| `memories` | `/user_id`, `/thread_id` (hierarchical) | Durable derived memories (`fact`, `episodic`, `procedural`) |
| `memories_turns` | `/user_id`, `/thread_id` (hierarchical) | Raw conversation turns (`turn`) — append-only, TTL-pruned |
| `memories_summaries` | `/user_id`, `/thread_id` (hierarchical) | Thread + user summaries (`thread_summary`, `user_summary`) |
| `counter` | `/user_id`, `/thread_id` (hierarchical) | Message count tracking for automatic processing |
| `leases` | `/id` | Change feed checkpointing container created by `create_memory_store()` |

### Throughput configuration

The toolkit provisions all required Cosmos containers under one shared throughput mode:

- `serverless` is the default. The toolkit creates the `memories`, `counter`, and `leases` containers without specifying RU/s.
- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all three containers.
- `serverless` is the default. The toolkit creates the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers without specifying RU/s.
- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all five containers.

This keeps the change feed dependencies aligned with the main memory store instead of letting the Functions trigger create the lease container independently.

Expand Down
22 changes: 11 additions & 11 deletions Docs/local_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Minimum `.env` values:
```env
COSMOS_DB_ENDPOINT=https://<your-account>.documents.azure.com:443/
COSMOS_DB_DATABASE=ai_memory
COSMOS_DB_CONTAINER=memories
COSMOS_DB_MEMORIES_CONTAINER=memories
COSMOS_DB_COUNTERS_CONTAINER=counter
COSMOS_DB_LEASE_CONTAINER=leases
COSMOS_DB_THROUGHPUT_MODE=serverless
Expand Down Expand Up @@ -115,9 +115,9 @@ No Azure resources are required for local in-memory operations.

```python
import uuid
from agent_memory_toolkit import AgentMemory
from agent_memory_toolkit import CosmosMemoryClient

memory = AgentMemory(use_default_credential=False)
memory = CosmosMemoryClient(use_default_credential=False)

THREAD_ID = str(uuid.uuid4())

Expand All @@ -133,7 +133,7 @@ memory.delete_local(mem_id)
print(f"Remaining: {len(memory.get_local())}")
```

`AsyncAgentMemory` works the same way for local operations (local methods are synchronous).
`AsyncCosmosMemoryClient` works the same way for local operations (local methods are synchronous).

---

Expand All @@ -153,14 +153,14 @@ Then run a minimal smoke test:
import os, uuid
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from agent_memory_toolkit import AgentMemory
from agent_memory_toolkit import CosmosMemoryClient

load_dotenv()

memory = AgentMemory(
memory = CosmosMemoryClient(
cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
Expand Down Expand Up @@ -196,14 +196,14 @@ for r in results:
import os, uuid
from dotenv import load_dotenv
from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential
from agent_memory_toolkit.aio import AsyncAgentMemory
from agent_memory_toolkit.aio import AsyncCosmosMemoryClient

load_dotenv()

memory = AsyncAgentMemory(
memory = AsyncCosmosMemoryClient(
cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
Expand All @@ -219,7 +219,7 @@ memory = AsyncAgentMemory(
await memory.connect_cosmos(
endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
database=os.getenv("COSMOS_DB_DATABASE"),
container=os.getenv("COSMOS_DB_CONTAINER"),
container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
credential=AsyncDefaultAzureCredential(),
)
await memory.create_memory_store()
Expand Down
Loading
Loading