Skip to content

Commit 83ab8ea

Browse files
authored
FEAT: varcharmax streaming support in execute (#206)
### Work Item / Issue Reference <!-- IMPORTANT: Please follow the PR template guidelines below. For mssql-python maintainers: Insert your ADO Work Item ID below (e.g. AB#37452) For external contributors: Insert Github Issue number below (e.g. #149) Only one reference is required - either GitHub issue OR ADO Work Item. --> <!-- mssql-python maintainers: ADO Work Item --> > [AB#33395](https://sqlclientdrivers.visualstudio.com/c6d89619-62de-46a0-8b46-70b92a84d85e/_workitems/edit/33395) <!-- External contributors: GitHub Issue --> > GitHub Issue: #<ISSUE_NUMBER> ------------------------------------------------------------------- ### Summary <!-- Insert your summary of changes below. Minimum 10 characters required. --> This pull request focuses on improving type safety and error handling for parameter binding and execution in the MSSQL Python driver, especially for string and binary types. The changes ensure that only supported Python types are bound to SQL parameters and that errors are raised for unsupported types, preventing silent failures. Additionally, the data-at-execution (DAE) streaming path for strings is now more robust and explicit. **Parameter type safety and error handling:** * Changed `_map_sql_type` in `cursor.py` to raise a `TypeError` for unsupported parameter types instead of defaulting to `SQL_VARCHAR`, preventing silent type mismatches. * In `ddbc_bindings.cpp`, added explicit type checks for `SQL_C_CHAR` and `SQL_C_BINARY` bindings, raising an error if the Python object is not a string or bytes-like object. **DAE (Data At Execution) streaming improvements:** * Enhanced the DAE streaming logic for `SQL_C_CHAR` in `ddbc_bindings.cpp` to handle string parameters more robustly, including chunked streaming via `SQLPutData`. * Added support for chunked DAE streaming of Python `str` objects mapped to `SQL_C_CHAR`, ensuring large strings are sent in manageable pieces. **Platform-specific encoding for wide strings:** * Ensured that wide string (`SQL_C_WCHAR`) parameters are correctly converted to the platform-specific encoding before streaming. <!-- ### PR Title Guide > For feature requests FEAT: (short-description) > For non-feature requests like test case updates, config updates , dependency updates etc CHORE: (short-description) > For Fix requests FIX: (short-description) > For doc update requests DOC: (short-description) > For Formatting, indentation, or styling update STYLE: (short-description) > For Refactor, without any feature changes REFACTOR: (short-description) > For release related changes, without any feature changes RELEASE: #<RELEASE_VERSION> (short-description) ### Contribution Guidelines External contributors: - Create a GitHub issue first: https://github.com/microsoft/mssql-python/issues/new - Link the GitHub issue in the "GitHub Issue" section above - Follow the PR title format and provide a meaningful summary mssql-python maintainers: - Create an ADO Work Item following internal processes - Link the ADO Work Item in the "ADO Work Item" section above - Follow the PR title format and provide a meaningful summary -->
1 parent 88fb651 commit 83ab8ea

File tree

3 files changed

+252
-20
lines changed

3 files changed

+252
-20
lines changed

mssql_python/cursor.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -342,12 +342,15 @@ def _map_sql_type(self, param, parameters_list, i):
342342

343343
# String mapping logic here
344344
is_unicode = self._is_unicode_string(param)
345-
if len(param) > MAX_INLINE_CHAR: # Long strings
345+
346+
# Computes UTF-16 code units (handles surrogate pairs)
347+
utf16_len = sum(2 if ord(c) > 0xFFFF else 1 for c in param)
348+
if utf16_len > MAX_INLINE_CHAR: # Long strings -> DAE
346349
if is_unicode:
347350
return (
348351
ddbc_sql_const.SQL_WLONGVARCHAR.value,
349352
ddbc_sql_const.SQL_C_WCHAR.value,
350-
len(param),
353+
utf16_len,
351354
0,
352355
True,
353356
)
@@ -358,8 +361,9 @@ def _map_sql_type(self, param, parameters_list, i):
358361
0,
359362
True,
360363
)
361-
if is_unicode: # Short Unicode strings
362-
utf16_len = len(param.encode("utf-16-le")) // 2
364+
365+
# Short strings
366+
if is_unicode:
363367
return (
364368
ddbc_sql_const.SQL_WVARCHAR.value,
365369
ddbc_sql_const.SQL_C_WCHAR.value,
@@ -374,7 +378,7 @@ def _map_sql_type(self, param, parameters_list, i):
374378
0,
375379
False,
376380
)
377-
381+
378382
if isinstance(param, bytes):
379383
if len(param) > 8000: # Assuming VARBINARY(MAX) for long byte arrays
380384
return (

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,27 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params,
227227

228228
// TODO: Add more data types like money, guid, interval, TVPs etc.
229229
switch (paramInfo.paramCType) {
230-
case SQL_C_CHAR:
230+
case SQL_C_CHAR: {
231+
if (!py::isinstance<py::str>(param) && !py::isinstance<py::bytearray>(param) &&
232+
!py::isinstance<py::bytes>(param)) {
233+
ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex));
234+
}
235+
if (paramInfo.isDAE) {
236+
LOG("Parameter[{}] is marked for DAE streaming", paramIndex);
237+
dataPtr = const_cast<void*>(reinterpret_cast<const void*>(&paramInfos[paramIndex]));
238+
strLenOrIndPtr = AllocateParamBuffer<SQLLEN>(paramBuffers);
239+
*strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0);
240+
bufferLength = 0;
241+
} else {
242+
std::string* strParam =
243+
AllocateParamBuffer<std::string>(paramBuffers, param.cast<std::string>());
244+
dataPtr = const_cast<void*>(static_cast<const void*>(strParam->c_str()));
245+
bufferLength = strParam->size() + 1;
246+
strLenOrIndPtr = AllocateParamBuffer<SQLLEN>(paramBuffers);
247+
*strLenOrIndPtr = SQL_NTS;
248+
}
249+
break;
250+
}
231251
case SQL_C_BINARY: {
232252
if (!py::isinstance<py::str>(param) && !py::isinstance<py::bytearray>(param) &&
233253
!py::isinstance<py::bytes>(param)) {
@@ -1203,23 +1223,51 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle,
12031223
continue;
12041224
}
12051225
if (py::isinstance<py::str>(pyObj)) {
1206-
std::wstring wstr = pyObj.cast<std::wstring>();
1226+
if (matchedInfo->paramCType == SQL_C_WCHAR) {
1227+
std::wstring wstr = pyObj.cast<std::wstring>();
1228+
const SQLWCHAR* dataPtr = nullptr;
1229+
size_t totalChars = 0;
12071230
#if defined(__APPLE__) || defined(__linux__)
1208-
auto utf16Buf = WStringToSQLWCHAR(wstr);
1209-
const char* dataPtr = reinterpret_cast<const char*>(utf16Buf.data());
1210-
size_t totalBytes = (utf16Buf.size() - 1) * sizeof(SQLWCHAR);
1231+
std::vector<SQLWCHAR> sqlwStr = WStringToSQLWCHAR(wstr);
1232+
totalChars = sqlwStr.size() - 1;
1233+
dataPtr = sqlwStr.data();
12111234
#else
1212-
const char* dataPtr = reinterpret_cast<const char*>(wstr.data());
1213-
size_t totalBytes = wstr.size() * sizeof(wchar_t);
1235+
dataPtr = wstr.c_str();
1236+
totalChars = wstr.size();
12141237
#endif
1215-
const size_t chunkSize = DAE_CHUNK_SIZE;
1216-
for (size_t offset = 0; offset < totalBytes; offset += chunkSize) {
1217-
size_t len = std::min(chunkSize, totalBytes - offset);
1218-
rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast<SQLLEN>(len));
1219-
if (!SQL_SUCCEEDED(rc)) {
1220-
LOG("SQLPutData failed at offset {} of {}", offset, totalBytes);
1221-
return rc;
1238+
size_t offset = 0;
1239+
size_t chunkChars = DAE_CHUNK_SIZE / sizeof(SQLWCHAR);
1240+
while (offset < totalChars) {
1241+
size_t len = std::min(chunkChars, totalChars - offset);
1242+
size_t lenBytes = len * sizeof(SQLWCHAR);
1243+
if (lenBytes > static_cast<size_t>(std::numeric_limits<SQLLEN>::max())) {
1244+
ThrowStdException("Chunk size exceeds maximum allowed by SQLLEN");
1245+
}
1246+
rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast<SQLLEN>(lenBytes));
1247+
if (!SQL_SUCCEEDED(rc)) {
1248+
LOG("SQLPutData failed at offset {} of {}", offset, totalChars);
1249+
return rc;
1250+
}
1251+
offset += len;
12221252
}
1253+
} else if (matchedInfo->paramCType == SQL_C_CHAR) {
1254+
std::string s = pyObj.cast<std::string>();
1255+
size_t totalBytes = s.size();
1256+
const char* dataPtr = s.data();
1257+
size_t offset = 0;
1258+
size_t chunkBytes = DAE_CHUNK_SIZE;
1259+
while (offset < totalBytes) {
1260+
size_t len = std::min(chunkBytes, totalBytes - offset);
1261+
1262+
rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast<SQLLEN>(len));
1263+
if (!SQL_SUCCEEDED(rc)) {
1264+
LOG("SQLPutData failed at offset {} of {}", offset, totalBytes);
1265+
return rc;
1266+
}
1267+
offset += len;
1268+
}
1269+
} else {
1270+
ThrowStdException("Unsupported C type for str in DAE");
12231271
}
12241272
} else {
12251273
ThrowStdException("DAE only supported for str or bytes");

tests/test_004_cursor.py

Lines changed: 181 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from datetime import datetime, date, time
1414
import decimal
1515
from contextlib import closing
16-
from mssql_python import Connection
16+
from mssql_python import Connection, row
1717

1818
# Setup test table
1919
TEST_TABLE = """
@@ -5124,6 +5124,186 @@ def test_emoji_round_trip(cursor, db_connection):
51245124
except Exception as e:
51255125
pytest.fail(f"Error for input {repr(text)}: {e}")
51265126

5127+
def test_varchar_max_insert_non_lob(cursor, db_connection):
5128+
"""Test small VARCHAR(MAX) insert (non-LOB path)."""
5129+
try:
5130+
cursor.execute("CREATE TABLE #pytest_varchar_nonlob (col VARCHAR(MAX))")
5131+
db_connection.commit()
5132+
5133+
small_str = "Hello, world!" # small, non-LOB
5134+
cursor.execute(
5135+
"INSERT INTO #pytest_varchar_nonlob (col) VALUES (?)",
5136+
[small_str]
5137+
)
5138+
db_connection.commit()
5139+
5140+
empty_str = ""
5141+
cursor.execute(
5142+
"INSERT INTO #pytest_varchar_nonlob (col) VALUES (?)",
5143+
[empty_str]
5144+
)
5145+
db_connection.commit()
5146+
5147+
# None value
5148+
cursor.execute(
5149+
"INSERT INTO #pytest_varchar_nonlob (col) VALUES (?)",
5150+
[None]
5151+
)
5152+
db_connection.commit()
5153+
5154+
# Fetch commented for now
5155+
# cursor.execute("SELECT col FROM #pytest_varchar_nonlob")
5156+
# rows = cursor.fetchall()
5157+
# assert rows == [[small_str], [empty_str], [None]]
5158+
5159+
finally:
5160+
pass
5161+
5162+
5163+
def test_varchar_max_insert_lob(cursor, db_connection):
5164+
"""Test large VARCHAR(MAX) insert (LOB path)."""
5165+
try:
5166+
cursor.execute("CREATE TABLE #pytest_varchar_lob (col VARCHAR(MAX))")
5167+
db_connection.commit()
5168+
5169+
large_str = "A" * 100_000 # > 8k to trigger LOB
5170+
cursor.execute(
5171+
"INSERT INTO #pytest_varchar_lob (col) VALUES (?)",
5172+
[large_str]
5173+
)
5174+
db_connection.commit()
5175+
5176+
# Fetch commented for now
5177+
# cursor.execute("SELECT col FROM #pytest_varchar_lob")
5178+
# rows = cursor.fetchall()
5179+
# assert rows == [[large_str]]
5180+
5181+
finally:
5182+
pass
5183+
5184+
5185+
def test_nvarchar_max_insert_non_lob(cursor, db_connection):
5186+
"""Test small NVARCHAR(MAX) insert (non-LOB path)."""
5187+
try:
5188+
cursor.execute("CREATE TABLE #pytest_nvarchar_nonlob (col NVARCHAR(MAX))")
5189+
db_connection.commit()
5190+
5191+
small_str = "Unicode ✨ test"
5192+
cursor.execute(
5193+
"INSERT INTO #pytest_nvarchar_nonlob (col) VALUES (?)",
5194+
[small_str]
5195+
)
5196+
db_connection.commit()
5197+
5198+
empty_str = ""
5199+
cursor.execute(
5200+
"INSERT INTO #pytest_nvarchar_nonlob (col) VALUES (?)",
5201+
[empty_str]
5202+
)
5203+
db_connection.commit()
5204+
5205+
cursor.execute(
5206+
"INSERT INTO #pytest_nvarchar_nonlob (col) VALUES (?)",
5207+
[None]
5208+
)
5209+
db_connection.commit()
5210+
5211+
# Fetch commented for now
5212+
# cursor.execute("SELECT col FROM #pytest_nvarchar_nonlob")
5213+
# rows = cursor.fetchall()
5214+
# assert rows == [[small_str], [empty_str], [None]]
5215+
5216+
finally:
5217+
pass
5218+
5219+
5220+
def test_nvarchar_max_insert_lob(cursor, db_connection):
5221+
"""Test large NVARCHAR(MAX) insert (LOB path)."""
5222+
try:
5223+
cursor.execute("CREATE TABLE #pytest_nvarchar_lob (col NVARCHAR(MAX))")
5224+
db_connection.commit()
5225+
5226+
large_str = "📝" * 50_000 # each emoji = 2 UTF-16 code units, total > 100k bytes
5227+
cursor.execute(
5228+
"INSERT INTO #pytest_nvarchar_lob (col) VALUES (?)",
5229+
[large_str]
5230+
)
5231+
db_connection.commit()
5232+
5233+
# Fetch commented for now
5234+
# cursor.execute("SELECT col FROM #pytest_nvarchar_lob")
5235+
# rows = cursor.fetchall()
5236+
# assert rows == [[large_str]]
5237+
5238+
finally:
5239+
pass
5240+
5241+
def test_nvarchar_max_boundary(cursor, db_connection):
5242+
"""Test NVARCHAR(MAX) at LOB boundary sizes."""
5243+
try:
5244+
cursor.execute("DROP TABLE IF EXISTS #pytest_nvarchar_boundary")
5245+
cursor.execute("CREATE TABLE #pytest_nvarchar_boundary (col NVARCHAR(MAX))")
5246+
db_connection.commit()
5247+
5248+
# 4k BMP chars = 8k bytes
5249+
cursor.execute("INSERT INTO #pytest_nvarchar_boundary (col) VALUES (?)", ["A" * 4096])
5250+
# 4k emojis = 8k UTF-16 code units (16k bytes)
5251+
cursor.execute("INSERT INTO #pytest_nvarchar_boundary (col) VALUES (?)", ["📝" * 4096])
5252+
db_connection.commit()
5253+
5254+
# Fetch commented for now
5255+
# cursor.execute("SELECT col FROM #pytest_nvarchar_boundary")
5256+
# rows = cursor.fetchall()
5257+
# assert rows == [["A" * 4096], ["📝" * 4096]]
5258+
finally:
5259+
pass
5260+
5261+
5262+
def test_nvarchar_max_chunk_edge(cursor, db_connection):
5263+
"""Test NVARCHAR(MAX) insert slightly larger than a chunk."""
5264+
try:
5265+
cursor.execute("DROP TABLE IF EXISTS #pytest_nvarchar_chunk")
5266+
cursor.execute("CREATE TABLE #pytest_nvarchar_chunk (col NVARCHAR(MAX))")
5267+
db_connection.commit()
5268+
5269+
chunk_size = 8192 # bytes
5270+
test_str = "📝" * ((chunk_size // 4) + 3) # slightly > 1 chunk
5271+
cursor.execute("INSERT INTO #pytest_nvarchar_chunk (col) VALUES (?)", [test_str])
5272+
db_connection.commit()
5273+
5274+
# Fetch commented for now
5275+
# cursor.execute("SELECT col FROM #pytest_nvarchar_chunk")
5276+
# row = cursor.fetchone()
5277+
# assert row[0] == test_str
5278+
finally:
5279+
pass
5280+
5281+
def test_empty_string_chunk(cursor, db_connection):
5282+
"""Test inserting empty strings into VARCHAR(MAX) and NVARCHAR(MAX)."""
5283+
try:
5284+
cursor.execute("DROP TABLE IF EXISTS #pytest_empty_string")
5285+
cursor.execute("""
5286+
CREATE TABLE #pytest_empty_string (
5287+
varchar_col VARCHAR(MAX),
5288+
nvarchar_col NVARCHAR(MAX)
5289+
)
5290+
""")
5291+
db_connection.commit()
5292+
5293+
empty_varchar = ""
5294+
empty_nvarchar = ""
5295+
cursor.execute(
5296+
"INSERT INTO #pytest_empty_string (varchar_col, nvarchar_col) VALUES (?, ?)",
5297+
[empty_varchar, empty_nvarchar]
5298+
)
5299+
db_connection.commit()
5300+
5301+
cursor.execute("SELECT LEN(varchar_col), LEN(nvarchar_col) FROM #pytest_empty_string")
5302+
row = tuple(int(x) for x in cursor.fetchone())
5303+
assert row == (0, 0), f"Expected lengths (0,0), got {row}"
5304+
finally:
5305+
cursor.execute("DROP TABLE IF EXISTS #pytest_empty_string")
5306+
db_connection.commit()
51275307

51285308
def test_close(db_connection):
51295309
"""Test closing the cursor"""

0 commit comments

Comments
 (0)