AzureCosmosDB · aayush3011 · May 31, 2026 · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/.env.template b/.env.template
@@ -10,8 +10,12 @@ COSMOS_DB__accountEndpoint=https://<your-account>.documents.azure.com:443/
 # letting you create databases/containers without RBAC role assignments.
 # Leave blank to use DefaultAzureCredential / managed identity.
 COSMOS_DB_KEY=
+# Database name. Must match what the Function App reads (COSMOS_DB_DATABASE there).
+# Default is "ai_memory"; change only if your infra deploys a different name.
 COSMOS_DB_DATABASE=ai_memory
-COSMOS_DB_CONTAINER=memories
+COSMOS_DB_MEMORIES_CONTAINER="memories"
+COSMOS_DB_SUMMARIES_CONTAINER="memories_summaries"
+COSMOS_DB_TURNS_CONTAINER="memories_turns"
 COSMOS_DB_COUNTERS_CONTAINER=counter
 COSMOS_DB_LEASE_CONTAINER=leases
 # Throughput mode for all required Cosmos DB containers created by the toolkit
@@ -29,6 +33,21 @@ THREAD_SUMMARY_EVERY_N=10
 FACT_EXTRACTION_EVERY_N=1
 USER_SUMMARY_EVERY_N=20
 
+# ---- Processor ownership (in-process SDK vs. Function App / Durable) ----
+# Controls which side runs the auto-trigger to avoid double-firing when both
+# the SDK and the Function App are deployed against the same database.
+#
+#   * Unset / blank        -> SDK auto-trigger fires; FA change-feed SKIPS.
+#                             (Pure SDK deployments — no env config needed.)
+#   * "inprocess"          -> SDK auto-trigger fires; FA change-feed SKIPS.
+#   * "durable"            -> SDK auto-trigger SKIPS; FA change-feed fires.
+#
+# The contract is asymmetric by design: the FA defaults to skip (default-deny)
+# so a fresh FA deploy next to an existing SDK install does not race. If you
+# run the Function App, you MUST set this to "durable" or processing will
+# silently no-op.
+MEMORY_PROCESSOR_OWNER=
+
 # ---- AI Foundry / Azure OpenAI ----
 AI_FOUNDRY_ENDPOINT=https://<your-account>.openai.azure.com/
 AI_FOUNDRY_API_KEY=
@@ -39,3 +58,6 @@ AI_FOUNDRY_EMBEDDING_DISTANCE_FUNCTION=cosine
 COSMOS_DB_FULL_TEXT_LANGUAGE=en-US
 
 AI_FOUNDRY_CHAT_DEPLOYMENT_NAME=<your-model-deployment>
+# Optional. Pin the Azure OpenAI REST API version used by chat and embeddings
+# clients. Leave blank to use the toolkit default ("2024-12-01-preview").
+AZURE_OPENAI_API_VERSION=
diff --git a/Docs/azure_testing.md b/Docs/azure_testing.md
@@ -105,7 +105,7 @@ az functionapp config appsettings set \
   --settings \
     COSMOS_DB_ENDPOINT="https://<cosmos-account-name>.documents.azure.com:443/" \
     COSMOS_DB_DATABASE="ai_memory" \
-    COSMOS_DB_CONTAINER="memories" \
+    COSMOS_DB_MEMORIES_CONTAINER="memories" \
     COSMOS_DB_COUNTERS_CONTAINER="counter" \
     COSMOS_DB_LEASE_CONTAINER="leases" \
     COSMOS_DB_THROUGHPUT_MODE="serverless" \
@@ -120,7 +120,7 @@ az functionapp config appsettings set \
     MEMORY_PROCESSOR_OWNER="durable"
 ```
 
-`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers.
+`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers.
 
 `MEMORY_PROCESSOR_OWNER=durable` tells the SDK that the deployed Function App owns processing, so any `CosmosMemoryClient` pointed at the same container will skip its in-process auto-trigger and avoid double-extraction. See the README's processor-ownership table for details.
 
@@ -178,7 +178,7 @@ Update `.env` to point at Azure instead of localhost:
 ```env
 COSMOS_DB_ENDPOINT=https://<cosmos-account-name>.documents.azure.com:443/
 COSMOS_DB_DATABASE=ai_memory
-COSMOS_DB_CONTAINER=memories
+COSMOS_DB_MEMORIES_CONTAINER=memories
 COSMOS_DB_COUNTERS_CONTAINER=counter
 COSMOS_DB_LEASE_CONTAINER=leases
 COSMOS_DB_THROUGHPUT_MODE=serverless
@@ -213,7 +213,7 @@ load_dotenv()
 memory = CosmosMemoryClient(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE", "ai_memory"),
-    cosmos_container=os.getenv("COSMOS_DB_CONTAINER", "memories"),
+    cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER", "memories"),
     cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
     cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
     cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
@@ -242,7 +242,7 @@ load_dotenv()
 memory = AsyncCosmosMemoryClient(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE", "ai_memory"),
-    cosmos_container=os.getenv("COSMOS_DB_CONTAINER", "memories"),
+    cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER", "memories"),
     cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
     cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
     cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
@@ -258,7 +258,7 @@ await memory.connect_cosmos()
 await memory.create_memory_store()
 ```
 
-This provisions the `memories`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all three containers.
+This provisions the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all five containers.
 
 ---
 
@@ -314,9 +314,9 @@ for i in range(10):
 
 # Wait for the change-feed processor to catch up, then read derived memories.
 import time; time.sleep(15)
-print(memory.get_memories(user_id="user-1", thread_id="thread-1", memory_types=["summary"]))
+print(memory.get_thread_summary(user_id="user-1", thread_id="thread-1"))
 print(memory.get_memories(user_id="user-1", memory_types=["fact"]))
-print(memory.get_memories(user_id="user-1", memory_types=["user_summary"]))
+print(memory.get_user_summary(user_id="user-1"))
 ```
 
 ### Change feed auto-processing
@@ -339,7 +339,7 @@ for i in range(3):
 # Wait a few seconds for the change feed to trigger, then check:
 import time
 time.sleep(10)
-results = memory.get_memories(user_id="user-1", thread_id=thread_id, memory_types=["summary"])
+results = memory.get_thread_summary(user_id="user-1", thread_id=thread_id)
 print(results)  # Should contain an auto-generated summary
 ```
 
@@ -348,9 +348,9 @@ Check the Function App logs to confirm the `on_memory_change` trigger fired and
 ### Verify stored results
 
 ```python
-print(memory.get_memories(user_id="user-1", memory_types=["summary"]))
+print(memory.get_thread_summary(user_id="user-1", thread_id="thread-1"))
 print(memory.get_memories(user_id="user-1", memory_types=["fact"]))
-print(memory.get_memories(user_id="user-1", memory_types=["user_summary"]))
+print(memory.get_user_summary(user_id="user-1"))
 ```
 
 ---

diff --git a/Docs/concepts.md b/Docs/concepts.md
@@ -179,16 +179,18 @@ Set any value to `0` to disable that processing type. For example, setting `THRE
 
 | Container | Partition Key | Purpose |
 |-----------|---------------|---------|
-| `memories` | `/user_id`, `/thread_id` (hierarchical) | Existing memory store |
+| `memories` | `/user_id`, `/thread_id` (hierarchical) | Durable derived memories (`fact`, `episodic`, `procedural`) |
+| `memories_turns` | `/user_id`, `/thread_id` (hierarchical) | Raw conversation turns (`turn`) — append-only, TTL-pruned |
+| `memories_summaries` | `/user_id`, `/thread_id` (hierarchical) | Thread + user summaries (`thread_summary`, `user_summary`) |
 | `counter` | `/user_id`, `/thread_id` (hierarchical) | Message count tracking for automatic processing |
 | `leases` | `/id` | Change feed checkpointing container created by `create_memory_store()` |
 
 ### Throughput configuration
 
 The toolkit provisions all required Cosmos containers under one shared throughput mode:
 
-- `serverless` is the default. The toolkit creates the `memories`, `counter`, and `leases` containers without specifying RU/s.
-- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all three containers.
+- `serverless` is the default. The toolkit creates the `memories`, `memories_turns`, `memories_summaries`, `counter`, and `leases` containers without specifying RU/s.
+- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all five containers.
 
 This keeps the change feed dependencies aligned with the main memory store instead of letting the Functions trigger create the lease container independently.
 

diff --git a/Docs/local_testing.md b/Docs/local_testing.md
@@ -71,7 +71,7 @@ Minimum `.env` values:
 ```env
 COSMOS_DB_ENDPOINT=https://<your-account>.documents.azure.com:443/
 COSMOS_DB_DATABASE=ai_memory
-COSMOS_DB_CONTAINER=memories
+COSMOS_DB_MEMORIES_CONTAINER=memories
 COSMOS_DB_COUNTERS_CONTAINER=counter
 COSMOS_DB_LEASE_CONTAINER=leases
 COSMOS_DB_THROUGHPUT_MODE=serverless
@@ -115,9 +115,9 @@ No Azure resources are required for local in-memory operations.
 
 ```python
 import uuid
-from agent_memory_toolkit import AgentMemory
+from agent_memory_toolkit import CosmosMemoryClient
 
-memory = AgentMemory(use_default_credential=False)
+memory = CosmosMemoryClient(use_default_credential=False)
 
 THREAD_ID = str(uuid.uuid4())
 
@@ -133,7 +133,7 @@ memory.delete_local(mem_id)
 print(f"Remaining: {len(memory.get_local())}")
 ```
 
-`AsyncAgentMemory` works the same way for local operations (local methods are synchronous).
+`AsyncCosmosMemoryClient` works the same way for local operations (local methods are synchronous).
 
 ---
 
@@ -153,14 +153,14 @@ Then run a minimal smoke test:
 import os, uuid
 from dotenv import load_dotenv
 from azure.identity import DefaultAzureCredential
-from agent_memory_toolkit import AgentMemory
+from agent_memory_toolkit import CosmosMemoryClient
 
 load_dotenv()
 
-memory = AgentMemory(
+memory = CosmosMemoryClient(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
-    cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
     cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
     cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
     cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
@@ -196,14 +196,14 @@ for r in results:
 import os, uuid
 from dotenv import load_dotenv
 from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential
-from agent_memory_toolkit.aio import AsyncAgentMemory
+from agent_memory_toolkit.aio import AsyncCosmosMemoryClient
 
 load_dotenv()
 
-memory = AsyncAgentMemory(
+memory = AsyncCosmosMemoryClient(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
-    cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
     cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
     cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
     cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
@@ -219,7 +219,7 @@ memory = AsyncAgentMemory(
 await memory.connect_cosmos(
     endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     database=os.getenv("COSMOS_DB_DATABASE"),
-    container=os.getenv("COSMOS_DB_CONTAINER"),
+    container=os.getenv("COSMOS_DB_MEMORIES_CONTAINER"),
     credential=AsyncDefaultAzureCredential(),
 )
 await memory.create_memory_store()