Skip to content
87 changes: 87 additions & 0 deletions benchmarks/bench_credential_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Benchmark: Credential Instance Caching for Azure AD Authentication

Measures the performance difference between:
1. Creating a new DefaultAzureCredential + get_token() each call (old behavior)
2. Reusing a cached DefaultAzureCredential instance (new behavior)

Prerequisites:
- pip install azure-identity azure-core
- az login (for AzureCliCredential to work)

Usage:
python benchmarks/bench_credential_cache.py
"""

from __future__ import annotations

import time
import statistics


def bench_no_cache(n: int) -> list[float]:
"""Simulate the OLD behavior: new credential per call."""
from azure.identity import DefaultAzureCredential

times = []
for _ in range(n):
start = time.perf_counter()
cred = DefaultAzureCredential()
cred.get_token("https://database.windows.net/.default")
times.append(time.perf_counter() - start)
return times


def bench_with_cache(n: int) -> list[float]:
"""Simulate the NEW behavior: reuse a single credential instance."""
from azure.identity import DefaultAzureCredential

cred = DefaultAzureCredential()
times = []
for _ in range(n):
start = time.perf_counter()
cred.get_token("https://database.windows.net/.default")
times.append(time.perf_counter() - start)
return times


def report(label: str, times: list[float]) -> None:
print(f"\n{'=' * 50}")
print(f" {label}")
print(f"{'=' * 50}")
print(f" Calls: {len(times)}")
print(f" Total: {sum(times):.3f}s")
print(f" Mean: {statistics.mean(times) * 1000:.1f}ms")
print(f" Median: {statistics.median(times) * 1000:.1f}ms")
print(f" Stdev: {statistics.stdev(times) * 1000:.1f}ms" if len(times) > 1 else "")
print(f" Min: {min(times) * 1000:.1f}ms")
print(f" Max: {max(times) * 1000:.1f}ms")


def main() -> None:
N = 10 # number of calls to benchmark

print("Credential Instance Cache Benchmark")
print(f"Running {N} sequential token acquisitions for each scenario...\n")

try:
print(">>> Without cache (new credential each call)...")
no_cache_times = bench_no_cache(N)
report("WITHOUT credential cache (old behavior)", no_cache_times)

print("\n>>> With cache (reuse credential instance)...")
cache_times = bench_with_cache(N)
report("WITH credential cache (new behavior)", cache_times)

speedup = statistics.mean(no_cache_times) / statistics.mean(cache_times)
saved = (statistics.mean(no_cache_times) - statistics.mean(cache_times)) * 1000
print(f"\n{'=' * 50}")
print(f" SPEEDUP: {speedup:.1f}x ({saved:.0f}ms saved per call)")
print(f"{'=' * 50}")
except Exception as e:
print(f"\nBenchmark failed: {e}")
print("Make sure you are logged in via 'az login' and have azure-identity installed.")


if __name__ == "__main__":
main()
31 changes: 25 additions & 6 deletions mssql_python/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,19 @@

import platform
import struct
import threading
from typing import Tuple, Dict, Optional, List

from mssql_python.logging import logger
from mssql_python.constants import AuthType, ConstantsDDBC

# Module-level credential instance cache.
# Reusing credential objects allows the Azure Identity SDK's built-in
# in-memory token cache to work, avoiding redundant token acquisitions.
# See: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/identity/azure-identity/TOKEN_CACHING.md
_credential_cache: Dict[str, object] = {}
_credential_cache_lock = threading.Lock()


class AADAuth:
"""Handles Azure Active Directory authentication"""
Expand All @@ -36,12 +44,11 @@ def get_token(auth_type: str) -> bytes:

@staticmethod
def get_raw_token(auth_type: str) -> str:
"""Acquire a fresh raw JWT for the mssql-py-core connection (bulk copy).
"""Acquire a raw JWT for the mssql-py-core connection (bulk copy).

This deliberately does NOT cache the credential or token — each call
creates a new Azure Identity credential instance and requests a token.
A fresh acquisition avoids expired-token errors when bulkcopy() is
called long after the original DDBC connect().
Uses the cached credential instance so the Azure Identity SDK's
built-in token cache can serve a valid token without a round-trip
when the previous token has not yet expired.
"""
_, raw_token = AADAuth._acquire_token(auth_type)
return raw_token
Expand Down Expand Up @@ -83,7 +90,19 @@ def _acquire_token(auth_type: str) -> Tuple[bytes, str]:
)

try:
credential = credential_class()
with _credential_cache_lock:
if auth_type not in _credential_cache:
logger.debug(
"get_token: Creating new credential instance for auth_type=%s",
auth_type,
)
_credential_cache[auth_type] = credential_class()
else:
logger.debug(
"get_token: Reusing cached credential instance for auth_type=%s",
auth_type,
)
credential = _credential_cache[auth_type]
raw_token = credential.get_token("https://database.windows.net/.default").token
logger.info(
"get_token: Azure AD token acquired successfully - token_length=%d chars",
Expand Down
Loading
Loading