Skip to content

Commit bbda2a2

Browse files
bewithgauravCopilot
andcommitted
Drop brace-aware splitter; keep MSI extraction at parity with rest of parser
Reverts 825d902's _split_top_level_params helper and the test_msi_braced_value_cannot_forge_uid test. The brace-aware fix prevents a Database value containing ';uid=<guid>;' from spoofing a top-level UID, but: - The same naive split(';') exists in process_connection_string and extract_auth_type for ALL Entra ID auth types (Default, DeviceCode, Interactive, MSI). Hardening only the MSI helper is inconsistent. - A Database name with embedded semicolons + a syntactically valid GUID after ';uid=' is vanishingly unlikely in practice. - The fix belongs in its own PR scoped to the connection-string parser, not in the MSI feature PR. What stays: - H1 fix: persist _credential_kwargs on Connection so cursor.bulkcopy() acquires its fresh token with the right user-assigned client_id - Real cursor.bulkcopy() regression test (mocks mssql_py_core, captures AADAuth.get_raw_token args) - Connection.__init__ persistence tests - Black formatting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 825d902 commit bbda2a2

2 files changed

Lines changed: 6 additions & 86 deletions

File tree

mssql_python/auth.py

Lines changed: 6 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -150,65 +150,13 @@ def _acquire_token(
150150
raise RuntimeError(f"Failed to create {credential_class.__name__}: {e}") from e
151151

152152

153-
def _split_top_level_params(connection_string: str) -> List[str]:
154-
"""Split an ODBC connection string on top-level ``;`` separators.
155-
156-
Honors braced values: ``;`` inside ``{...}`` is part of the value, not a
157-
separator. ``}}`` inside braces is the ODBC escape for a literal ``}``
158-
and does not close the brace.
159-
160-
This exists so that auth-related extraction (Authentication, UID for MSI)
161-
cannot be forged by a semicolon inside a legitimate braced value such as
162-
``Database={foo;uid=<guid>;bar}``. Naive ``split(';')`` would let that
163-
value spoof a top-level ``UID=`` and silently switch the bulkcopy path
164-
to a wrong identity.
165-
166-
Returns chunks with surrounding whitespace preserved (callers strip).
167-
"""
168-
parts: List[str] = []
169-
current: List[str] = []
170-
depth = 0
171-
i = 0
172-
n = len(connection_string)
173-
while i < n:
174-
c = connection_string[i]
175-
if c == "{":
176-
depth += 1
177-
current.append(c)
178-
elif c == "}":
179-
# ``}}`` inside braces is an escaped literal ``}``.
180-
if depth > 0 and i + 1 < n and connection_string[i + 1] == "}":
181-
current.append("}}")
182-
i += 2
183-
continue
184-
if depth > 0:
185-
depth -= 1
186-
current.append(c)
187-
elif c == ";" and depth == 0:
188-
parts.append("".join(current))
189-
current = []
190-
else:
191-
current.append(c)
192-
i += 1
193-
parts.append("".join(current))
194-
return parts
195-
196-
197-
def _extract_msi_client_id(connection_string: str) -> Optional[str]:
198-
"""Pull UID out of a connection string for user-assigned MSI.
153+
def _extract_msi_client_id(parameters: List[str]) -> Optional[str]:
154+
"""Pull UID out of connection parameters for user-assigned MSI.
199155
200156
For ActiveDirectoryMSI, UID (when present) carries the user-assigned
201-
identity's ``client_id``. Returns None for system-assigned MSI.
202-
203-
Uses brace-aware splitting so a semicolon inside a legitimate braced
204-
ODBC value (e.g. ``Database={foo;uid=<guid>;bar}``) cannot spoof a
205-
top-level ``UID=`` and silently switch the bulkcopy path to a wrong
206-
identity. The pre-existing naive ``split(';')`` in
207-
``process_connection_string`` / ``extract_auth_type`` has the same
208-
class of issue for ``Authentication=`` detection across all auth
209-
types; that is out of scope for this PR and tracked separately.
157+
identity's client_id. Returns None for system-assigned MSI.
210158
"""
211-
for param in _split_top_level_params(connection_string):
159+
for param in parameters:
212160
key, _, value = param.strip().partition("=")
213161
if key.strip().lower() == "uid":
214162
value = value.strip()
@@ -400,13 +348,10 @@ def process_connection_string(
400348
modified_parameters, auth_type = process_auth_parameters(parameters)
401349

402350
# Capture credential kwargs (e.g. user-assigned MSI client_id) before
403-
# remove_sensitive_params strips UID from the parameter list. Pass the
404-
# original connection_string (not modified_parameters) so the helper can
405-
# do its own brace-aware split — a Database value containing ';uid=...'
406-
# in braces must not spoof a top-level UID.
351+
# remove_sensitive_params strips UID from the parameter list.
407352
credential_kwargs: Dict[str, str] = {}
408353
if auth_type == "msi":
409-
client_id = _extract_msi_client_id(connection_string)
354+
client_id = _extract_msi_client_id(modified_parameters)
410355
if client_id:
411356
credential_kwargs["client_id"] = client_id
412357

tests/test_008_auth.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -573,31 +573,6 @@ def fake_get_raw_token(auth_type, credential_kwargs=None):
573573
f"the cursor likely re-parses the (UID-stripped) connection_str."
574574
)
575575

576-
def test_msi_braced_value_cannot_forge_uid(self):
577-
"""A semicolon inside a legitimate braced ODBC value (e.g. a Database
578-
name) must not spoof a top-level UID= and silently switch the bulkcopy
579-
path to a wrong identity. The MSI client_id extraction is brace-aware
580-
for this reason."""
581-
from mssql_python.connection_string_parser import _ConnectionStringParser
582-
from mssql_python.connection_string_builder import _ConnectionStringBuilder
583-
584-
attacker_id = "00000000-0000-0000-0000-000000000bad"
585-
raw = (
586-
"Server=test;Authentication=ActiveDirectoryMSI;"
587-
f"Database={{foo;uid={attacker_id};bar}}"
588-
)
589-
parsed = _ConnectionStringParser(validate_keywords=True)._parse(raw)
590-
normalized = _ConnectionStringParser._normalize_params(parsed, warn_rejected=False)
591-
built = _ConnectionStringBuilder(normalized).build()
592-
593-
_, _, auth_type, credential_kwargs = process_connection_string(built)
594-
595-
assert auth_type == "msi" # legit Authentication= was top-level
596-
assert credential_kwargs is None, (
597-
f"braced Database value forged UID and produced credential_kwargs={credential_kwargs!r}; "
598-
"_extract_msi_client_id is no longer brace-aware"
599-
)
600-
601576

602577
class TestCredentialInstanceCache:
603578
"""Tests for the credential instance caching behavior."""

0 commit comments

Comments
 (0)