From 3cc826ed05f81d4e245afb5e66d6e889319b6fb6 Mon Sep 17 00:00:00 2001 From: Jim Blythe Date: Fri, 20 Feb 2026 11:56:03 -0800 Subject: [PATCH 1/4] Modify SqlStreamingXml XmlWriter to internally use a MemoryStream instead of a StringBuilder. (#1877) Note: UTF8Encoding(false) addition in s_writerSettings is consistent with prior default used within StringWriter/StringBuilder --- .../src/Microsoft/Data/SqlClient/SqlStream.cs | 94 ++++++++++--------- ...icrosoft.Data.SqlClient.ManualTests.csproj | 1 + .../SqlStreamingXmlTest.cs | 91 ++++++++++++++++++ 3 files changed, 140 insertions(+), 46 deletions(-) create mode 100644 src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs index 653faf7213..b317d0e238 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs @@ -224,9 +224,9 @@ private int ReadBytes(byte[] buffer, int offset, int count) // we are guaranteed that cb is < Int32.Max since we always pass in count which is of type Int32 to // our getbytes interface - count -= (int)cb; - offset += (int)cb; - intCount += (int)cb; + count -= cb; + offset += cb; + intCount += cb; } else { @@ -387,9 +387,9 @@ public override int Read(byte[] buffer, int offset, int count) Buffer.BlockCopy(_cachedBytes[_currentArrayIndex], _currentPosition, buffer, offset, cb); _currentPosition += cb; - count -= (int)cb; - offset += (int)cb; - intCount += (int)cb; + count -= cb; + offset += cb; + intCount += cb; } return intCount; @@ -477,13 +477,14 @@ private long TotalLength sealed internal class SqlStreamingXml { - private static readonly XmlWriterSettings s_writerSettings = new() { CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment }; + private static readonly XmlWriterSettings s_writerSettings = new() { CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment, Encoding = new UTF8Encoding(false) }; private readonly int _columnOrdinal; private SqlDataReader _reader; private XmlReader _xmlReader; + private bool _canReadChunk; private XmlWriter _xmlWriter; - private StringWriter _strWriter; + private MemoryStream _memoryStream; private long _charsRemoved; public SqlStreamingXml(int i, SqlDataReader reader) @@ -495,11 +496,12 @@ public SqlStreamingXml(int i, SqlDataReader reader) public void Close() { ((IDisposable)_xmlWriter).Dispose(); + ((IDisposable)_memoryStream).Dispose(); ((IDisposable)_xmlReader).Dispose(); _reader = null; _xmlReader = null; _xmlWriter = null; - _strWriter = null; + _memoryStream = null; } public int ColumnOrdinal => _columnOrdinal; @@ -508,14 +510,15 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) { if (_xmlReader == null) { - SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows:false, advanceReader:false); + SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows: false, advanceReader: false); _xmlReader = sqlStream.ToXmlReader(); - _strWriter = new StringWriter((System.IFormatProvider)null); - _xmlWriter = XmlWriter.Create(_strWriter, s_writerSettings); + _canReadChunk = _xmlReader.CanReadValueChunk; + _memoryStream = new MemoryStream(); + _xmlWriter = XmlWriter.Create(_memoryStream, s_writerSettings); } - int charsToSkip = 0; - int cnt = 0; + long charsToSkip = 0; + long cnt = 0; if (dataIndex < _charsRemoved) { throw ADP.NonSeqByteAccess(dataIndex, _charsRemoved, nameof(GetChars)); @@ -529,72 +532,73 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) // total size up front without reading and converting the XML. if (buffer == null) { - return (long)(-1); + return -1; } - StringBuilder strBldr = _strWriter.GetStringBuilder(); - while (!_xmlReader.EOF) + long memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position; + while (memoryStreamRemaining < (length + charsToSkip) && !_xmlReader.EOF) { - if (strBldr.Length >= (length + charsToSkip)) + // Check whether the MemoryStream has been fully read. + // If so, reset the MemoryStream for reuse and to avoid growing size too much. + if (_memoryStream.Length > 0 && memoryStreamRemaining == 0) { - break; + // This also sets the Position back to 0. + _memoryStream.SetLength(0); } // Can't call _xmlWriter.WriteNode here, since it reads all of the data in before returning the first char. // Do own implementation of WriteNode instead that reads just enough data to return the required number of chars //_xmlWriter.WriteNode(_xmlReader, true); // _xmlWriter.Flush(); WriteXmlElement(); + // Update memoryStreamRemaining based on the number of chars just written to the MemoryStream + memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position; if (charsToSkip > 0) { - // Aggressively remove the characters we want to skip to avoid growing StringBuilder size too much - cnt = strBldr.Length < charsToSkip ? strBldr.Length : charsToSkip; - strBldr.Remove(0, cnt); + cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip; + // Move the Position forward + _memoryStream.Seek(cnt, SeekOrigin.Current); + memoryStreamRemaining -= cnt; charsToSkip -= cnt; - _charsRemoved += (long)cnt; + _charsRemoved += cnt; } } if (charsToSkip > 0) { - cnt = strBldr.Length < charsToSkip ? strBldr.Length : charsToSkip; - strBldr.Remove(0, cnt); + cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip; + // Move the Position forward + _memoryStream.Seek(cnt, SeekOrigin.Current); + memoryStreamRemaining -= cnt; charsToSkip -= cnt; - _charsRemoved += (long)cnt; + _charsRemoved += cnt; } - if (strBldr.Length == 0) + if (memoryStreamRemaining == 0) { return 0; } // At this point charsToSkip must be 0 Debug.Assert(charsToSkip == 0); - cnt = strBldr.Length < length ? strBldr.Length : length; + cnt = memoryStreamRemaining < length ? memoryStreamRemaining : length; for (int i = 0; i < cnt; i++) { - buffer[bufferIndex + i] = strBldr[i]; + buffer[bufferIndex + i] = (char)_memoryStream.ReadByte(); } - // Remove the characters we have already returned - strBldr.Remove(0, cnt); - _charsRemoved += (long)cnt; - return (long)cnt; + _charsRemoved += cnt; + return cnt; } // This method duplicates the work of XmlWriter.WriteNode except that it reads one element at a time // instead of reading the entire node like XmlWriter. + // Caller already ensures !_xmlReader.EOF private void WriteXmlElement() { - if (_xmlReader.EOF) - { - return; - } - - bool canReadChunk = _xmlReader.CanReadValueChunk; - char[] writeNodeBuffer = null; - // Constants const int WriteNodeBufferSize = 1024; + long memoryStreamPosition = _memoryStream.Position; + _xmlReader.Read(); switch (_xmlReader.NodeType) { @@ -608,12 +612,9 @@ private void WriteXmlElement() } break; case XmlNodeType.Text: - if (canReadChunk) + if (_canReadChunk) { - if (writeNodeBuffer == null) - { - writeNodeBuffer = new char[WriteNodeBufferSize]; - } + char[] writeNodeBuffer = new char[WriteNodeBufferSize]; int read; while ((read = _xmlReader.ReadValueChunk(writeNodeBuffer, 0, WriteNodeBufferSize)) > 0) { @@ -650,6 +651,7 @@ private void WriteXmlElement() break; } _xmlWriter.Flush(); + _memoryStream.Position = memoryStreamPosition; } } } diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj index 44bb79cbc9..b68f2847a0 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj @@ -218,6 +218,7 @@ + diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs new file mode 100644 index 0000000000..610023f18d --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs @@ -0,0 +1,91 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Data; +using System.Diagnostics; +using System.Globalization; +using Xunit; + +namespace Microsoft.Data.SqlClient.ManualTesting.Tests +{ + public static class SqlStreamingXmlTest + { + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void LinearSingleNode() + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Use a literal XML column of the specified size. The XML is constructed by replicating a string of 'B' characters to reach the desired size, and wrapping it in XML tags. + const string commandTextBase = "SELECT Convert(xml, N'' + REPLICATE(CAST('' AS nvarchar(max)) +N'B', ({0} * 1024 * 1024) - 11) + N'')"; + + TimeSpan time1 = TimedExecution(commandTextBase, 1); + TimeSpan time5 = TimedExecution(commandTextBase, 5); + + // Compare linear time for 1MB vs 5MB. We expect the time to be at most 6 times higher for 5MB, which permits additional 20% for any noise in the measurements. + Assert.True(time5.TotalMilliseconds <= (time1.TotalMilliseconds * 6), $"Execution time did not follow linear scale: 1MB={time1.TotalMilliseconds}ms vs. 5MB={time5.TotalMilliseconds}ms"); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void LinearMultipleNodes() + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Use a literal XML column with the specified number of 1MB elements. The XML is constructed by replicating a string of 'B' characters to reach 1MB, then replicating to the desired number of elements. + const string commandTextBase = "SELECT Convert(xml, REPLICATE(N'' + REPLICATE(CAST('' AS nvarchar(max)) + N'B', (1024 * 1024) - 11) + N'', {0}))"; + + TimeSpan time1 = TimedExecution(commandTextBase, 1); + TimeSpan time5 = TimedExecution(commandTextBase, 5); + + // Compare linear time for 1MB vs 5MB. We expect the time to be at most 6 times higher for 5MB, which permits additional 20% for any noise in the measurements. + Assert.True(time5.TotalMilliseconds <= (time1.TotalMilliseconds * 6), $"Execution time did not follow linear scale: 1x={time1.TotalMilliseconds}ms vs. 5x={time5.TotalMilliseconds}ms"); + } + + private static TimeSpan TimedExecution(string commandTextBase, int scale) + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + var stopwatch = new Stopwatch(); + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = string.Format(CultureInfo.InvariantCulture, commandTextBase, scale); + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + stopwatch.Start(); + ReadAllChars(sqlDataReader, scale); + stopwatch.Stop(); + } + connection.Close(); + } + + return stopwatch.Elapsed; + } + + /// + /// Replicate the reading approach used with issue #1877 + /// + private static void ReadAllChars(SqlDataReader sqlDataReader, int expectedMB) + { + var expectedSize = expectedMB * 1024 * 1024; + var text = new char[expectedSize]; + var buffer = new char[1]; + + long position = 0; + long numCharsRead; + do + { + numCharsRead = sqlDataReader.GetChars(0, position, buffer, 0, 1); + if (numCharsRead > 0) + { + text[position] = buffer[0]; + position += numCharsRead; + } + } + while (numCharsRead > 0); + + Assert.Equal(expectedSize, position); + } + } +} From e2dec42ec756d9cca0c242f0d566da3e9d69b471 Mon Sep 17 00:00:00 2001 From: Jim Blythe Date: Thu, 26 Feb 2026 16:14:11 -0800 Subject: [PATCH 2/4] Fix MemoryStream to allow appending when GetChars request spans multiple elements Enhance comments within SqlStreamingXml Extend Manual tests to fully cover GetChars WriteXmlElement includes uncovered paths not accessible for SQL XML column types which normalize Whitespace, CDATA, EntityReference, XmlDeclaration, ProcessingInstruction, DocumentType, and Comment node types --- .../src/Microsoft/Data/SqlClient/SqlStream.cs | 19 +- .../SqlStreamingXmlTest.cs | 259 +++++++++++++++++- 2 files changed, 262 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs index b317d0e238..33f88b4ebf 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs @@ -477,7 +477,12 @@ private long TotalLength sealed internal class SqlStreamingXml { - private static readonly XmlWriterSettings s_writerSettings = new() { CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment, Encoding = new UTF8Encoding(false) }; + private static readonly XmlWriterSettings s_writerSettings = new() { + CloseOutput = true, + ConformanceLevel = ConformanceLevel.Fragment, + // Potentially limits XML to not supporting UTF-16 characters, but this is required to avoid writing + // a byte order mark and is consistent with prior default used within StringWriter/StringBuilder. + Encoding = new UTF8Encoding(false) }; private readonly int _columnOrdinal; private SqlDataReader _reader; @@ -525,7 +530,8 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) } else if (dataIndex > _charsRemoved) { - charsToSkip = (int)(dataIndex - _charsRemoved); + //dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer + charsToSkip = dataIndex - _charsRemoved; } // If buffer parameter is null, we have to return -1 since there is no way for us to know the @@ -550,7 +556,7 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) //_xmlWriter.WriteNode(_xmlReader, true); // _xmlWriter.Flush(); WriteXmlElement(); - // Update memoryStreamRemaining based on the number of chars just written to the MemoryStream + // Update memoryStreamRemaining based on the number of bytes/chars just written to the MemoryStream memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position; if (charsToSkip > 0) { @@ -583,6 +589,7 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) cnt = memoryStreamRemaining < length ? memoryStreamRemaining : length; for (int i = 0; i < cnt; i++) { + // ReadByte moves the Position forward buffer[bufferIndex + i] = (char)_memoryStream.ReadByte(); } _charsRemoved += cnt; @@ -598,10 +605,15 @@ private void WriteXmlElement() const int WriteNodeBufferSize = 1024; long memoryStreamPosition = _memoryStream.Position; + // Move the Position to the end of the MemoryStream since we are always appending. + _memoryStream.Seek(0, SeekOrigin.End); _xmlReader.Read(); switch (_xmlReader.NodeType) { + // Note: Whitespace, CDATA, EntityReference, XmlDeclaration, ProcessingInstruction, DocumentType, and Comment node types + // are not expected in the XML returned from SQL Server as it normalizes them out, but handle them just in case. + // SignificantWhitespace will occur when used with xml:space="preserve" case XmlNodeType.Element: _xmlWriter.WriteStartElement(_xmlReader.Prefix, _xmlReader.LocalName, _xmlReader.NamespaceURI); _xmlWriter.WriteAttributes(_xmlReader, true); @@ -651,6 +663,7 @@ private void WriteXmlElement() break; } _xmlWriter.Flush(); + // Reset the Position back to where it was before writing this element so that the caller can continue reading from the expected position. _memoryStream.Position = memoryStreamPosition; } } diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs index 610023f18d..f3831761d9 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs @@ -6,6 +6,7 @@ using System.Data; using System.Diagnostics; using System.Globalization; +using System.Xml.Linq; using Xunit; namespace Microsoft.Data.SqlClient.ManualTesting.Tests @@ -13,10 +14,9 @@ namespace Microsoft.Data.SqlClient.ManualTesting.Tests public static class SqlStreamingXmlTest { [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] - public static void LinearSingleNode() + public static void Linear_SingleNode() { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); - // Use a literal XML column of the specified size. The XML is constructed by replicating a string of 'B' characters to reach the desired size, and wrapping it in XML tags. + // Use literal XML column constructed by replicating a string of 'B' characters to reach the desired size, and wrapping it in XML tags. const string commandTextBase = "SELECT Convert(xml, N'' + REPLICATE(CAST('' AS nvarchar(max)) +N'B', ({0} * 1024 * 1024) - 11) + N'')"; TimeSpan time1 = TimedExecution(commandTextBase, 1); @@ -27,10 +27,9 @@ public static void LinearSingleNode() } [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] - public static void LinearMultipleNodes() + public static void Linear_MultipleNodes() { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); - // Use a literal XML column with the specified number of 1MB elements. The XML is constructed by replicating a string of 'B' characters to reach 1MB, then replicating to the desired number of elements. + // Use literal XML column constructed by replicating a string of 'B' characters to reach 1MB, then replicating to the desired number of elements. const string commandTextBase = "SELECT Convert(xml, REPLICATE(N'' + REPLICATE(CAST('' AS nvarchar(max)) + N'B', (1024 * 1024) - 11) + N'', {0}))"; TimeSpan time1 = TimedExecution(commandTextBase, 1); @@ -40,10 +39,244 @@ public static void LinearMultipleNodes() Assert.True(time5.TotalMilliseconds <= (time1.TotalMilliseconds * 6), $"Execution time did not follow linear scale: 1x={time1.TotalMilliseconds}ms vs. 5x={time5.TotalMilliseconds}ms"); } + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_RequiresBuffer() + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + const string commandText = "SELECT Convert(xml, N'bar')"; + long charCount = 0; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + charCount = sqlDataReader.GetChars(0, 0, null, 0, 1); + } + connection.Close(); + } + + //verify -1 is returned since buffer was not provided + Assert.Equal(-1, charCount); + } + + [ConditionalTheory(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + [InlineData(true)] + [InlineData(false)] + public static void GetChars_SequentialDataIndex(bool backwards) + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + const string commandText = "SELECT Convert(xml, N'bar')"; + char[] buffer = new char[2]; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + sqlDataReader.GetChars(0, 0, buffer, 0, 2); + // Verify that providing the same or lower index than the previous call results in an exception. + // When backwards is true we test providing an index that is one less than the previous call, + // otherwise we test providing the same index as the previous call - both should not be allowed. + Assert.Throws(() => sqlDataReader.GetChars(0, backwards ? 0 : 1, buffer, 0, 2)); + } + connection.Close(); + } + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_PartialSingleElement() + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + const string commandText = "SELECT Convert(xml, N'_bar_baz')"; + long charCount = 0; + char[] buffer = new char[3]; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + // Read just the 'bar' characters from the XML by specifying the offset, and the length of 3. + // The offset is 6 to skip the entire first element '' and the initial '_' part of text. + charCount = sqlDataReader.GetChars(0, 6, buffer, 0, 3); + } + connection.Close(); + } + + Assert.Equal(3, charCount); + Assert.Equal("bar", new string(buffer)); + } + + [ConditionalTheory(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + [InlineData(true)] + [InlineData(false)] + public static void GetChars_PartialAcrossElements(bool initialRead) + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + const string commandText = "SELECT Convert(xml, N'baz')"; + long charCount = 0; + char[] buffer = new char[8]; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + if (initialRead) + { + // When initialRead is true, we verify continuation after a previous read, + // otherwise we just verify that we can read across XML elements in a single call. + char[] initialBuffer = new char[2]; + sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); + Assert.Equal("bazbaz_bar_baz"""; + int expectedSize = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + if (initialRead) + { + // When initialRead is true, we verify continuation after a previous read, + // otherwise we just verify that we can read everything in a single call. + char[] initialBuffer = new char[2]; + long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); + char[] remainingBuffer = new char[98]; + long remainingLength = sqlDataReader.GetChars(0, 2, remainingBuffer, 0, 98); + string combined = new string(initialBuffer) + new string(remainingBuffer); + + Assert.Equal(expectedSize, initialLength + remainingLength); + Assert.Equal(xml, combined.Substring(0, expectedSize)); + } + else + { + // Try to read more characters than the actual XML to verify that the method returns only the actual number of characters. + (long length, string text) = ReadAllChars(sqlDataReader, 100); + + Assert.Equal(expectedSize, length); + Assert.Equal(xml, text.Substring(0, expectedSize)); + } + } + connection.Close(); + } + } + + [ConditionalTheory(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + [InlineData(true)] + [InlineData(false)] + public static void GetChars_ExcessiveDataIndex(bool initialRead) + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = """_bar_baz"""; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + if (initialRead) + { + // When initialRead is true, we verify continuation after a previous read, + // otherwise we just verify the large DataIndex in a single call. + char[] initialBuffer = new char[2]; + long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); + Assert.Equal(2, initialLength); + } + + // buffer will not be touched since the DataIndex is beyond the end of the XML, but a suitable buffer must still be provided. + char[] buffer = new char[100]; + long length = sqlDataReader.GetChars(0, 100, buffer, 0, 2); + Assert.Equal(0, length); + } + connection.Close(); + } + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_AsXDocument() + { + SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Use a more complex XML column verify through XDocument. + string xml = """John """; + XDocument expect = XDocument.Parse(xml); + int expectedSize = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using (SqlCommand command = connection.CreateCommand()) + { + connection.Open(); + command.CommandText = commandText; + + SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + if (sqlDataReader.Read()) + { + (long length, string xmlString) = ReadAllChars(sqlDataReader, expectedSize); + + Assert.Equal(expectedSize, length); + XDocument actual = XDocument.Parse(xmlString); + Assert.Equal((int)expect.Root.Attribute("Id"), (int)actual.Root.Attribute("Id")); + Assert.Equal((string)expect.Root.Attribute("Role"), (string)actual.Root.Attribute("Role")); + Assert.NotNull(expect.Root.Element("Name")?.Value); + Assert.Equal(expect.Root.Element("Name")!.Value, actual.Root.Element("Name")!.Value); + Assert.NotNull(expect.Root.Element("Children")?.HasElements); + Assert.Equal(expect.Root.Element("Children")!.HasElements, actual.Root.Element("Children")?.HasElements); + Assert.NotNull(expect.Root.Element("PreservedWhitespace")?.Value); + Assert.Equal(expect.Root.Element("PreservedWhitespace")!.Value, actual.Root.Element("PreservedWhitespace")!.Value); + } + connection.Close(); + } + } + private static TimeSpan TimedExecution(string commandTextBase, int scale) { SqlConnection connection = new(DataTestUtility.TCPConnectionString); - var stopwatch = new Stopwatch(); + Stopwatch stopwatch = new Stopwatch(); + int expectedSize = scale * 1024 * 1024; + using (SqlCommand command = connection.CreateCommand()) { @@ -54,8 +287,9 @@ private static TimeSpan TimedExecution(string commandTextBase, int scale) if (sqlDataReader.Read()) { stopwatch.Start(); - ReadAllChars(sqlDataReader, scale); + (long length, string _) = ReadAllChars(sqlDataReader, expectedSize); stopwatch.Stop(); + Assert.Equal(expectedSize, length); } connection.Close(); } @@ -66,11 +300,10 @@ private static TimeSpan TimedExecution(string commandTextBase, int scale) /// /// Replicate the reading approach used with issue #1877 /// - private static void ReadAllChars(SqlDataReader sqlDataReader, int expectedMB) + private static (long, string) ReadAllChars(SqlDataReader sqlDataReader, int expectedSize) { - var expectedSize = expectedMB * 1024 * 1024; - var text = new char[expectedSize]; - var buffer = new char[1]; + char[] text = new char[expectedSize]; + char[] buffer = new char[1]; long position = 0; long numCharsRead; @@ -85,7 +318,7 @@ private static void ReadAllChars(SqlDataReader sqlDataReader, int expectedMB) } while (numCharsRead > 0); - Assert.Equal(expectedSize, position); + return (position, new string(text)); } } } From 9f44ae87735d12ec7957a88265ea8d9470c85a86 Mon Sep 17 00:00:00 2001 From: Jim Blythe Date: Fri, 27 Feb 2026 11:51:41 -0800 Subject: [PATCH 3/4] On Close, reset _canReadChunk & _charsRemoved --- .../src/Microsoft/Data/SqlClient/SqlStream.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs index 33f88b4ebf..c98cccede4 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs @@ -505,8 +505,10 @@ public void Close() ((IDisposable)_xmlReader).Dispose(); _reader = null; _xmlReader = null; + _canReadChunk = false; _xmlWriter = null; _memoryStream = null; + _charsRemoved = 0; } public int ColumnOrdinal => _columnOrdinal; From 9e62ba5d80fb16da6560f91a8698957825f542c3 Mon Sep 17 00:00:00 2001 From: Jim Blythe Date: Sun, 8 Mar 2026 15:06:04 -0700 Subject: [PATCH 4/4] Replace XmlWriter/MemoryStream with direct XmlReader parsing in SqlStreamingXml, reconstructing XML fragments as strings and streaming them char-by-char. Improves efficiency, reduces allocations, and fixes non-ASCII and surrogate pairs. Add comprehensive unit tests for XML edge cases (non-ASCII, surrogate pairs, comments, CDATA, attributes, namespaces, etc.). Refactor existing tests for clarity and better handling of disposables. (AI assist using ChatGPT to better consider edge cases) --- .../src/Microsoft/Data/SqlClient/SqlStream.cs | 438 ++++++--- .../SqlStreamingXmlTest.cs | 853 ++++++++++++++---- 2 files changed, 991 insertions(+), 300 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs index c98cccede4..648ab45b67 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlStream.cs @@ -102,7 +102,7 @@ public override int Read(byte[] buffer, int offset, int count) // Read and buffer the first two bytes _bufferedData = new byte[2]; cBufferedData = ReadBytes(_bufferedData, 0, 2); - // Check to se if we should add the byte order mark + // Check to see if we should add the byte order mark if ((cBufferedData < 2) || ((_bufferedData[0] == 0xDF) && (_bufferedData[1] == 0xFF))) { _bom = 0; @@ -477,196 +477,346 @@ private long TotalLength sealed internal class SqlStreamingXml { - private static readonly XmlWriterSettings s_writerSettings = new() { - CloseOutput = true, - ConformanceLevel = ConformanceLevel.Fragment, - // Potentially limits XML to not supporting UTF-16 characters, but this is required to avoid writing - // a byte order mark and is consistent with prior default used within StringWriter/StringBuilder. - Encoding = new UTF8Encoding(false) }; + private readonly int _columnOrdinal; // changing this is only done through the ctor, so it is safe to be readonly + private SqlDataReader _reader; // reader we will stream off, becomes null when closed + private XmlReader _xmlReader; // XmlReader over the current column, becomes null when closed - private readonly int _columnOrdinal; - private SqlDataReader _reader; - private XmlReader _xmlReader; - private bool _canReadChunk; - private XmlWriter _xmlWriter; - private MemoryStream _memoryStream; - private long _charsRemoved; - - public SqlStreamingXml(int i, SqlDataReader reader) + private string _currentTextNode; // rolling buffer of text to deliver + private int _textNodeIndex; // index in _currentTextNode + private char? _pendingHighSurrogate; // pending high surrogate for split surrogate pairs + private long _charsReturned; // total chars returned + private bool _canReadChunk; // XmlReader.CanReadValueChunk + + public SqlStreamingXml(int columnOrdinal, SqlDataReader reader) { - _columnOrdinal = i; + _columnOrdinal = columnOrdinal; _reader = reader; } + public int ColumnOrdinal => _columnOrdinal; + public void Close() { - ((IDisposable)_xmlWriter).Dispose(); - ((IDisposable)_memoryStream).Dispose(); - ((IDisposable)_xmlReader).Dispose(); - _reader = null; + _xmlReader?.Dispose(); _xmlReader = null; + _reader = null; + + _currentTextNode = null; + _textNodeIndex = 0; + _pendingHighSurrogate = null; + _charsReturned = 0; _canReadChunk = false; - _xmlWriter = null; - _memoryStream = null; - _charsRemoved = 0; } - public int ColumnOrdinal => _columnOrdinal; - public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) { - if (_xmlReader == null) + if (_reader == null) { - SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows: false, advanceReader: false); - _xmlReader = sqlStream.ToXmlReader(); - _canReadChunk = _xmlReader.CanReadValueChunk; - _memoryStream = new MemoryStream(); - _xmlWriter = XmlWriter.Create(_memoryStream, s_writerSettings); + throw new ObjectDisposedException(nameof(SqlStreamingXml)); } - long charsToSkip = 0; - long cnt = 0; - if (dataIndex < _charsRemoved) + if (buffer == null) { - throw ADP.NonSeqByteAccess(dataIndex, _charsRemoved, nameof(GetChars)); + return -1; } - else if (dataIndex > _charsRemoved) + + if (length == 0) { - //dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer - charsToSkip = dataIndex - _charsRemoved; + return 0; } - // If buffer parameter is null, we have to return -1 since there is no way for us to know the - // total size up front without reading and converting the XML. - if (buffer == null) + if (dataIndex < _charsReturned) { - return -1; + throw new InvalidOperationException($"Non-sequential read: requested {dataIndex}, already returned {_charsReturned}"); } - long memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position; - while (memoryStreamRemaining < (length + charsToSkip) && !_xmlReader.EOF) + EnsureReaderInitialized(); + + // Skip to requested dataIndex + long skip = dataIndex - _charsReturned; + while (skip > 0) { - // Check whether the MemoryStream has been fully read. - // If so, reset the MemoryStream for reuse and to avoid growing size too much. - if (_memoryStream.Length > 0 && memoryStreamRemaining == 0) + char discard; + if (!TryReadNextChar(out discard)) { - // This also sets the Position back to 0. - _memoryStream.SetLength(0); + return 0; // EOF } - // Can't call _xmlWriter.WriteNode here, since it reads all of the data in before returning the first char. - // Do own implementation of WriteNode instead that reads just enough data to return the required number of chars - //_xmlWriter.WriteNode(_xmlReader, true); - // _xmlWriter.Flush(); - WriteXmlElement(); - // Update memoryStreamRemaining based on the number of bytes/chars just written to the MemoryStream - memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position; - if (charsToSkip > 0) + + skip--; + _charsReturned++; + } + + // Read chars into buffer + int copied = 0; + while (copied < length) + { + char c; + if (!TryReadNextChar(out c)) { - cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip; - // Move the Position forward - _memoryStream.Seek(cnt, SeekOrigin.Current); - memoryStreamRemaining -= cnt; - charsToSkip -= cnt; - _charsRemoved += cnt; + break; } + + buffer[bufferIndex + copied] = c; + copied++; + _charsReturned++; + } + + return copied; + } + + /// + /// Initializes the XML reader if it has not already been initialized, ensuring it is ready for reading + /// operations. + /// + /// + /// This method prepares the XML reader for use by creating and assigning a new instance + /// if necessary. It should be called before attempting to read XML data to guarantee that the reader is + /// available and properly configured. + /// + private void EnsureReaderInitialized() + { + if (_xmlReader != null) + { + return; } - if (charsToSkip > 0) + var sqlStream = new SqlStream(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows: false, advanceReader: false); + _xmlReader = sqlStream.ToXmlReader(); + _canReadChunk = _xmlReader.CanReadValueChunk; + } + + /// + /// Progressively fetches the next char from the XmlReader, filling the current text node buffer as necessary. + /// Handles surrogate pairs that may be split across text nodes. + /// + private bool TryReadNextChar(out char c) + { + // Deliver pending high surrogate first + if (_pendingHighSurrogate.HasValue) { - cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip; - // Move the Position forward - _memoryStream.Seek(cnt, SeekOrigin.Current); - memoryStreamRemaining -= cnt; - charsToSkip -= cnt; - _charsRemoved += cnt; + c = _pendingHighSurrogate.Value; + _pendingHighSurrogate = null; + return true; } - if (memoryStreamRemaining == 0) + // Deliver from current text node + if (_currentTextNode != null && _textNodeIndex < _currentTextNode.Length) { - return 0; + char next = _currentTextNode[_textNodeIndex++]; + if (char.IsHighSurrogate(next)) + { + // Surrogate Pairs could not be split across text nodes + c = next; + _pendingHighSurrogate = _currentTextNode[_textNodeIndex++]; + return true; + } + else + { + c = next; + return true; + } } - // At this point charsToSkip must be 0 - Debug.Assert(charsToSkip == 0); - cnt = memoryStreamRemaining < length ? memoryStreamRemaining : length; - for (int i = 0; i < cnt; i++) + // Fill/Refill current text node, then recurse to deliver the next char from one single node at a time; + // will not read entire xml column if requested substring is met. + while (_xmlReader.Read()) { - // ReadByte moves the Position forward - buffer[bufferIndex + i] = (char)_memoryStream.ReadByte(); + // Not using XmlWriter since this maintains better control of allocations and prevents an intermediate buffer copy. + switch (_xmlReader.NodeType) + { + case XmlNodeType.Element: + _currentTextNode = BuildStartOrEmptyTag(); + _textNodeIndex = 0; + return TryReadNextChar(out c); + + case XmlNodeType.Text: + case XmlNodeType.CDATA: + case XmlNodeType.Whitespace: + case XmlNodeType.SignificantWhitespace: + _currentTextNode = ReadAllText(); + _textNodeIndex = 0; + return TryReadNextChar(out c); + + case XmlNodeType.ProcessingInstruction: + _currentTextNode = $""; + _textNodeIndex = 0; + return TryReadNextChar(out c); + + case XmlNodeType.Comment: + _currentTextNode = $""; + _textNodeIndex = 0; + return TryReadNextChar(out c); + + case XmlNodeType.EndElement: + _currentTextNode = BuildEndTag(); + _textNodeIndex = 0; + return TryReadNextChar(out c); + + default: + // Skip EntityReference, DocumentType, XmlDeclaration which are normalized out by SQL Server + continue; + } } - _charsRemoved += cnt; - return cnt; + + // Ensure we don't return any stale chars after EOF + c = '\0'; + return false; // EOF } - // This method duplicates the work of XmlWriter.WriteNode except that it reads one element at a time - // instead of reading the entire node like XmlWriter. - // Caller already ensures !_xmlReader.EOF - private void WriteXmlElement() + /// + /// Reads all text content from the current node of the underlying XML reader and returns it as a string. + /// + /// + /// If the XML reader supports reading in chunks, this method reads the text in segments + /// to improve performance. Otherwise, it retrieves the value directly from the XML reader. + /// + /// A string containing all text read from the XML reader. Returns an empty string if no text is available. + private string ReadAllText() { - // Constants - const int WriteNodeBufferSize = 1024; - - long memoryStreamPosition = _memoryStream.Position; - // Move the Position to the end of the MemoryStream since we are always appending. - _memoryStream.Seek(0, SeekOrigin.End); - - _xmlReader.Read(); - switch (_xmlReader.NodeType) - { - // Note: Whitespace, CDATA, EntityReference, XmlDeclaration, ProcessingInstruction, DocumentType, and Comment node types - // are not expected in the XML returned from SQL Server as it normalizes them out, but handle them just in case. - // SignificantWhitespace will occur when used with xml:space="preserve" - case XmlNodeType.Element: - _xmlWriter.WriteStartElement(_xmlReader.Prefix, _xmlReader.LocalName, _xmlReader.NamespaceURI); - _xmlWriter.WriteAttributes(_xmlReader, true); - if (_xmlReader.IsEmptyElement) - { - _xmlWriter.WriteEndElement(); - break; - } - break; - case XmlNodeType.Text: - if (_canReadChunk) + if (_canReadChunk) + { + char[] buffer = new char[8192]; + int read; + StringBuilder stringBuilder = new StringBuilder(); + while ((read = _xmlReader.ReadValueChunk(buffer, 0, buffer.Length)) > 0) + { + stringBuilder.Append(buffer, 0, read); // only valid chars + } + return stringBuilder.ToString(); + } + else + { + return _xmlReader.Value ?? string.Empty; // never null -> avoids trailing \0 + } + } + + /// + /// Constructs an XML start tag or an empty element tag for the current node of the underlying XML reader, + /// including the namespace prefix and any attributes if present. + /// + /// + /// If the current XML node contains attributes, they are included in the generated tag. + /// If the node is an empty element, a self-closing tag is returned; otherwise, a standard opening tag is + /// produced. The method does not advance the position of the XML reader. + /// + /// A string that represents the XML start tag or a self-closing empty element tag, including all attributes of + /// the current node. + private string BuildStartOrEmptyTag() + { + string prefix = _xmlReader.Prefix; + string tagName = string.IsNullOrEmpty(prefix) ? _xmlReader.LocalName : $"{prefix}:{_xmlReader.LocalName}"; + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.Append('<').Append(tagName); + + if (_xmlReader.HasAttributes) + { + for (int i = 0; i < _xmlReader.AttributeCount; i++) + { + _xmlReader.MoveToAttribute(i); + string attrPrefix = _xmlReader.Prefix; + string attrName = string.IsNullOrEmpty(attrPrefix) ? _xmlReader.LocalName : $"{attrPrefix}:{_xmlReader.LocalName}"; + stringBuilder.Append(' ').Append(attrName).Append("=\"").Append(EscapeAttribute(_xmlReader.Value)).Append('"'); + } + _xmlReader.MoveToElement(); + } + + if (_xmlReader.IsEmptyElement) + { + stringBuilder.Append(" />"); + } + else + { + stringBuilder.Append('>'); + } + + return stringBuilder.ToString(); + } + + /// + /// Builds the closing XML tag for the current element, including the namespace prefix if present. + /// + /// + /// The returned tag is constructed using the prefix and local name from the underlying + /// XML reader. If the element has no namespace prefix, only the local name is used in the tag. + /// + /// A string that represents the closing tag of the current XML element, formatted with the appropriate + /// namespace prefix if one exists. + private string BuildEndTag() + { + string prefix = _xmlReader.Prefix; + string tagName = string.IsNullOrEmpty(prefix) ? _xmlReader.LocalName : $"{prefix}:{_xmlReader.LocalName}"; + return $""; + } + + /// + /// Escapes special characters in the provided string to ensure it is safe for use in XML attributes. + /// + /// ', and '"'. It does not + /// escape single quotes as they are not required for SQL Server attributes. The method uses a StringBuilder for + /// efficient string manipulation. + /// ]]> + /// The string to be escaped. This string may contain special XML characters that need to be replaced with their + /// corresponding entity references. + /// A string with special XML characters replaced by their corresponding entity references. If the input string + /// is null or empty, an empty string is returned. + private string EscapeAttribute(string value) + { + if (string.IsNullOrEmpty(value)) + { + return string.Empty; + } + + // Only create a StringBuilder if we find a character that needs escaping, to avoid unnecessary allocations + StringBuilder sb = null; + + for (int i = 0; i < value.Length; i++) + { + char c = value[i]; + string replacement = c switch + { + '&' => "&", + '<' => "<", + '>' => ">", + '"' => """, + //'\'' => "'", SQL Server does not escape single quotes in attributes + _ => null + }; + + if (replacement != null) + { + sb ??= new StringBuilder(value.Length + 8); + sb.Append(value, 0, i); + sb.Append(replacement); + + for (i = i + 1; i < value.Length; i++) { - char[] writeNodeBuffer = new char[WriteNodeBufferSize]; - int read; - while ((read = _xmlReader.ReadValueChunk(writeNodeBuffer, 0, WriteNodeBufferSize)) > 0) + c = value[i]; + replacement = c switch { - _xmlWriter.WriteChars(writeNodeBuffer, 0, read); + '&' => "&", + '<' => "<", + '>' => ">", + '"' => """, + //'\'' => "'", SQL Server does not escape single quotes in attributes + _ => null + }; + + if (replacement != null) + { + sb.Append(replacement); + } + else + { + sb.Append(c); } } - else - { - _xmlWriter.WriteString(_xmlReader.Value); - } - break; - case XmlNodeType.Whitespace: - case XmlNodeType.SignificantWhitespace: - _xmlWriter.WriteWhitespace(_xmlReader.Value); - break; - case XmlNodeType.CDATA: - _xmlWriter.WriteCData(_xmlReader.Value); - break; - case XmlNodeType.EntityReference: - _xmlWriter.WriteEntityRef(_xmlReader.Name); - break; - case XmlNodeType.XmlDeclaration: - case XmlNodeType.ProcessingInstruction: - _xmlWriter.WriteProcessingInstruction(_xmlReader.Name, _xmlReader.Value); - break; - case XmlNodeType.DocumentType: - _xmlWriter.WriteDocType(_xmlReader.Name, _xmlReader.GetAttribute("PUBLIC"), _xmlReader.GetAttribute("SYSTEM"), _xmlReader.Value); - break; - case XmlNodeType.Comment: - _xmlWriter.WriteComment(_xmlReader.Value); - break; - case XmlNodeType.EndElement: - _xmlWriter.WriteFullEndElement(); - break; + + return sb.ToString(); + } } - _xmlWriter.Flush(); - // Reset the Position back to where it was before writing this element so that the caller can continue reading from the expected position. - _memoryStream.Position = memoryStreamPosition; + + return value; } } } diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs index f3831761d9..cef075e369 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlStreamingXmlTest/SqlStreamingXmlTest.cs @@ -13,6 +13,138 @@ namespace Microsoft.Data.SqlClient.ManualTesting.Tests { public static class SqlStreamingXmlTest { + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_NonAsciiContent() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // XML containing non-ASCII characters: + // - \u00E9 (e-acute) - 2 bytes in UTF-8 + // - \u00F1 (n-tilde) - 2 bytes in UTF-8 + // - \u00FC (u-umlaut) - 2 bytes in UTF-8 + string xml = "caf\u00E9 se\u00F1or \u00FCber"; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_NonAsciiContent_BulkRead() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Same non-ASCII XML but read in a single bulk GetChars call + string xml = "Jos\u00E9 Garc\u00EDa"; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + char[] buffer = new char[expectedLength + 10]; + long charsRead = sqlDataReader.GetChars(0, 0, buffer, 0, buffer.Length); + + Assert.Equal(expectedLength, charsRead); + string result = new(buffer, 0, (int)charsRead); + Assert.Equal(xml, result); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_CjkContent() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // CJK characters: 3 bytes each in UTF-8 + string xml = "\u65E5\u672C\u8A9E\u30C6\u30B9\u30C8"; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_SurrogatePairContent() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Surrogate Pair characters: 4 bytes each in UTF-8 + string xml = "\U0001F600\U0001F525\U0001F680"; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_SurrogatePair_ReadIndividually() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Surrogate Pair character: 4 bytes in UTF-8 + string xml = "\U0001F600"; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + // Find the surrogate pair location in the original string + int highIndex = xml.IndexOf('\uD83D'); + Assert.True(highIndex >= 0); + + int lowIndex = highIndex + 1; + + char[] buffer = new char[1]; + + // Read the high surrogate + long read = sqlDataReader.GetChars(0, highIndex, buffer, 0, 1); + Assert.Equal(1, read); + Assert.True(char.IsHighSurrogate(buffer[0])); + + // Read the low surrogate + read = sqlDataReader.GetChars(0, lowIndex, buffer, 0, 1); + Assert.Equal(1, read); + Assert.True(char.IsLowSurrogate(buffer[0])); + + // Reconstruct pair + string reconstructed = new string(new[] { xml[highIndex], xml[lowIndex] }); + Assert.Equal("\U0001F600", reconstructed); + } + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] public static void Linear_SingleNode() { @@ -42,22 +174,18 @@ public static void Linear_MultipleNodes() [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] public static void GetChars_RequiresBuffer() { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); const string commandText = "SELECT Convert(xml, N'bar')"; long charCount = 0; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - charCount = sqlDataReader.GetChars(0, 0, null, 0, 1); - } - connection.Close(); - } + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + charCount = sqlDataReader.GetChars(0, 0, null, 0, 1); //verify -1 is returned since buffer was not provided Assert.Equal(-1, charCount); @@ -66,52 +194,44 @@ public static void GetChars_RequiresBuffer() [ConditionalTheory(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] [InlineData(true)] [InlineData(false)] - public static void GetChars_SequentialDataIndex(bool backwards) + public static void GetChars_SequentialDataIndex(bool overlapByOne) { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); const string commandText = "SELECT Convert(xml, N'bar')"; char[] buffer = new char[2]; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - sqlDataReader.GetChars(0, 0, buffer, 0, 2); - // Verify that providing the same or lower index than the previous call results in an exception. - // When backwards is true we test providing an index that is one less than the previous call, - // otherwise we test providing the same index as the previous call - both should not be allowed. - Assert.Throws(() => sqlDataReader.GetChars(0, backwards ? 0 : 1, buffer, 0, 2)); - } - connection.Close(); - } + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + sqlDataReader.GetChars(0, 0, buffer, 0, 2); + // Verify that providing the same or lower index than the previous call results in an exception. + // When overlapByOne is true we test providing an index that is one less than the previous call, + // otherwise we test providing the same index as the previous call - both should not be allowed. + Assert.Throws(() => sqlDataReader.GetChars(0, overlapByOne ? 0 : 1, buffer, 0, 2)); } [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] public static void GetChars_PartialSingleElement() { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); const string commandText = "SELECT Convert(xml, N'_bar_baz')"; long charCount = 0; char[] buffer = new char[3]; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - // Read just the 'bar' characters from the XML by specifying the offset, and the length of 3. - // The offset is 6 to skip the entire first element '' and the initial '_' part of text. - charCount = sqlDataReader.GetChars(0, 6, buffer, 0, 3); - } - connection.Close(); - } + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + // Read just the 'bar' characters from the XML by specifying the offset, and the length of 3. + // The offset is 6 to skip the entire first element '' and the initial '_' part of text. + charCount = sqlDataReader.GetChars(0, 6, buffer, 0, 3); Assert.Equal(3, charCount); Assert.Equal("bar", new string(buffer)); @@ -122,36 +242,32 @@ public static void GetChars_PartialSingleElement() [InlineData(false)] public static void GetChars_PartialAcrossElements(bool initialRead) { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); const string commandText = "SELECT Convert(xml, N'baz')"; long charCount = 0; char[] buffer = new char[8]; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - if (initialRead) - { - // When initialRead is true, we verify continuation after a previous read, - // otherwise we just verify that we can read across XML elements in a single call. - char[] initialBuffer = new char[2]; - sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); - Assert.Equal("bazbazbaz_bar_baz"""; int expectedSize = xml.Length; string commandText = $"SELECT Convert(xml, N'{xml}')"; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - if (initialRead) - { - // When initialRead is true, we verify continuation after a previous read, - // otherwise we just verify that we can read everything in a single call. - char[] initialBuffer = new char[2]; - long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); - char[] remainingBuffer = new char[98]; - long remainingLength = sqlDataReader.GetChars(0, 2, remainingBuffer, 0, 98); - string combined = new string(initialBuffer) + new string(remainingBuffer); - - Assert.Equal(expectedSize, initialLength + remainingLength); - Assert.Equal(xml, combined.Substring(0, expectedSize)); - } - else - { - // Try to read more characters than the actual XML to verify that the method returns only the actual number of characters. - (long length, string text) = ReadAllChars(sqlDataReader, 100); + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); - Assert.Equal(expectedSize, length); - Assert.Equal(xml, text.Substring(0, expectedSize)); - } - } - connection.Close(); + if (initialRead) + { + // When initialRead is true, we verify continuation after a previous read, + // otherwise we just verify that we can read everything in a single call. + char[] initialBuffer = new char[2]; + long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); + char[] remainingBuffer = new char[98]; + long remainingLength = sqlDataReader.GetChars(0, 2, remainingBuffer, 0, 98); + string combined = new string(initialBuffer) + new string(remainingBuffer); + + Assert.Equal(expectedSize, initialLength + remainingLength); + Assert.Equal(xml, combined.Substring(0, expectedSize)); + } + else + { + // Try to read more characters than the actual XML to verify that the method returns only the actual number of characters. + (long length, string text) = ReadAllChars(sqlDataReader, 100); + + Assert.Equal(expectedSize, length); + Assert.Equal(xml, text.Substring(0, expectedSize)); } } @@ -206,93 +318,522 @@ public static void GetChars_ExcessiveLength(bool initialRead) [InlineData(false)] public static void GetChars_ExcessiveDataIndex(bool initialRead) { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); string xml = """_bar_baz"""; string commandText = $"SELECT Convert(xml, N'{xml}')"; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = commandText; + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - if (initialRead) - { - // When initialRead is true, we verify continuation after a previous read, - // otherwise we just verify the large DataIndex in a single call. - char[] initialBuffer = new char[2]; - long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); - Assert.Equal(2, initialLength); - } + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); - // buffer will not be touched since the DataIndex is beyond the end of the XML, but a suitable buffer must still be provided. - char[] buffer = new char[100]; - long length = sqlDataReader.GetChars(0, 100, buffer, 0, 2); - Assert.Equal(0, length); - } - connection.Close(); + if (initialRead) + { + // When initialRead is true, we verify continuation after a previous read, + // otherwise we just verify the large DataIndex in a single call. + char[] initialBuffer = new char[2]; + long initialLength = sqlDataReader.GetChars(0, 0, initialBuffer, 0, 2); + Assert.Equal(2, initialLength); } + + // buffer will not be touched since the DataIndex is beyond the end of the XML, but a suitable buffer must still be provided. + char[] buffer = new char[100]; + long length = sqlDataReader.GetChars(0, 100, buffer, 0, 2); + Assert.Equal(0, length); } [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] public static void GetChars_AsXDocument() { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); // Use a more complex XML column verify through XDocument. string xml = """John """; XDocument expect = XDocument.Parse(xml); int expectedSize = xml.Length; string commandText = $"SELECT Convert(xml, N'{xml}')"; - using (SqlCommand command = connection.CreateCommand()) + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string xmlString) = ReadAllChars(sqlDataReader, expectedSize); + + Assert.Equal(expectedSize, length); + XDocument actual = XDocument.Parse(xmlString); + Assert.Equal((int)expect.Root.Attribute("Id"), (int)actual.Root.Attribute("Id")); + Assert.Equal((string)expect.Root.Attribute("Role"), (string)actual.Root.Attribute("Role")); + Assert.NotNull(expect.Root.Element("Name")?.Value); + Assert.Equal(expect.Root.Element("Name")!.Value, actual.Root.Element("Name")!.Value); + Assert.NotNull(expect.Root.Element("Children")?.HasElements); + Assert.Equal(expect.Root.Element("Children")!.HasElements, actual.Root.Element("Children")?.HasElements); + Assert.NotNull(expect.Root.Element("PreservedWhitespace")?.Value); + Assert.Equal(expect.Root.Element("PreservedWhitespace")!.Value, actual.Root.Element("PreservedWhitespace")!.Value); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_ProcessingInstructionOnly() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_ZeroLength() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + const string commandText = "SELECT Convert(xml, N'bar')"; + long charCount = 0; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + // While not used, cannot pass an empty buffer to GetChars, so provide a buffer of size 1 but request 0 characters to read. + char[] buffer = new char[1]; + charCount = sqlDataReader.GetChars(0, 0, buffer, 0, 0); + + //verify 0 is returned since nothing was requested + Assert.Equal(0, charCount); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_CommentAndProcessingInstructionMixed() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_EmptyElementWithAttributes() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // Use an empty element with various attributes, including empty attribute value, normal attribute value, and attributes with escaped characters to verify that all are preserved correctly. + string xml = ""; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_EmptyElementWithAttribute_Apos() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + // ' is normalized by SQL Server and converts to simply ' + string xml = ""; + string expected = ""; + int expectedLength = expected.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(expected, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_ElementWithNamespacePrefix() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = "content"; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_MixedContent() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = "textinnermore"; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_CDATASection() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = " content]]>"; + string expected = "some content"; + int expectedLength = expected.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(expected, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_WhitespaceAndSignificantWhitespace() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = " \t\n "; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_EntityReferences_Normalized() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = "<>&"'"; + const string expected = "<>&\"'"; + int expectedLength = expected.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read()); + + char[] buffer = new char[expectedLength]; + // Use 6 for dataIndex to skip "" + long charsRead = sqlDataReader.GetChars(0, 6, buffer, 0, buffer.Length); + + Assert.Equal(expectedLength, charsRead); + string text = new(buffer, 0, (int)charsRead); + Assert.Equal(expected, text); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_ProcessingInstructions() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_XmlDeclaration_Normalized() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + string expected = ""; + int expectedLength = expected.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(expected, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_CommentNode() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_MultipleComments() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_CommentWithSpecialChars() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + const string commandText = "SELECT @xmlParam"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + command.Parameters.Add(new SqlParameter("@xmlParam", SqlDbType.Xml) { Value = xml }); + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_EntityReferencesInsideComment() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = ""; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + (long length, string result) = ReadAllChars(sqlDataReader, expectedLength); + + Assert.Equal(expectedLength, length); + Assert.Equal(xml, result.Substring(0, (int)length)); + } + + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_SingleCharReadsVsBulk() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int expectedLength = xml.Length; + string commandText = $"SELECT Convert(xml, N'{xml}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + // ---- single char reads ---- + using (SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess)) { - connection.Open(); - command.CommandText = commandText; + Assert.True(sqlDataReader.Read(), "Expected to read a row"); - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) + long position = 0; + string singleReadResult = string.Empty; + char[] buffer = new char[1]; + position = 0; + + while (true) { - (long length, string xmlString) = ReadAllChars(sqlDataReader, expectedSize); - - Assert.Equal(expectedSize, length); - XDocument actual = XDocument.Parse(xmlString); - Assert.Equal((int)expect.Root.Attribute("Id"), (int)actual.Root.Attribute("Id")); - Assert.Equal((string)expect.Root.Attribute("Role"), (string)actual.Root.Attribute("Role")); - Assert.NotNull(expect.Root.Element("Name")?.Value); - Assert.Equal(expect.Root.Element("Name")!.Value, actual.Root.Element("Name")!.Value); - Assert.NotNull(expect.Root.Element("Children")?.HasElements); - Assert.Equal(expect.Root.Element("Children")!.HasElements, actual.Root.Element("Children")?.HasElements); - Assert.NotNull(expect.Root.Element("PreservedWhitespace")?.Value); - Assert.Equal(expect.Root.Element("PreservedWhitespace")!.Value, actual.Root.Element("PreservedWhitespace")!.Value); + long read = sqlDataReader.GetChars(0, position, buffer, 0, 1); + if (read == 0) + { + break; + } + + singleReadResult += buffer[0]; + position += read; } - connection.Close(); + + Assert.Equal(expectedLength, position); + Assert.Equal(xml, singleReadResult); + } + + // Reuse the same command to verify that bulk read returns the same result, and that the two approaches can be used interchangeably. + // ---- bulk read ---- + using (SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess)) + { + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + char[] buffer = new char[expectedLength]; + long bulkRead = sqlDataReader.GetChars(0, 0, buffer, 0, buffer.Length); + string bulkResult = new(buffer, 0, (int)bulkRead); + + Assert.Equal(expectedLength, bulkRead); + Assert.Equal(xml, bulkResult); } } + [ConditionalFact(typeof(DataTestUtility), nameof(DataTestUtility.AreConnStringsSetup))] + public static void GetChars_TwoXmlColumns() + { + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + string xml1 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int expectedLength1 = xml1.Length; + string xml2 = "0123456789"; + int expectedLength2 = xml2.Length; + string commandText = $"SELECT Convert(xml, N'{xml1}'), Convert(xml, N'{xml2}')"; + + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = commandText; + + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + // Bulk read the first column + char[] buffer1 = new char[expectedLength1]; + long column1Count = sqlDataReader.GetChars(0, 0, buffer1, 0, buffer1.Length); + string column1 = new(buffer1, 0, (int)column1Count); + + Assert.Equal(expectedLength1, column1Count); + Assert.Equal(xml1, column1); + + // Bulk read the second column + char[] buffer2 = new char[expectedLength2]; + // Change the column index to 1 to read from the second column, and verify that we get the expected result for the second column. + long column2Count = sqlDataReader.GetChars(1, 0, buffer2, 0, buffer2.Length); + string column2 = new(buffer2, 0, (int)column2Count); + + Assert.Equal(expectedLength2, column2Count); + Assert.Equal(xml2, column2); + } + private static TimeSpan TimedExecution(string commandTextBase, int scale) { - SqlConnection connection = new(DataTestUtility.TCPConnectionString); - Stopwatch stopwatch = new Stopwatch(); + using SqlConnection connection = new(DataTestUtility.TCPConnectionString); + Stopwatch stopwatch = new(); int expectedSize = scale * 1024 * 1024; - using (SqlCommand command = connection.CreateCommand()) - { - connection.Open(); - command.CommandText = string.Format(CultureInfo.InvariantCulture, commandTextBase, scale); + using SqlCommand command = connection.CreateCommand(); + connection.Open(); + command.CommandText = string.Format(CultureInfo.InvariantCulture, commandTextBase, scale); - SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); - if (sqlDataReader.Read()) - { - stopwatch.Start(); - (long length, string _) = ReadAllChars(sqlDataReader, expectedSize); - stopwatch.Stop(); - Assert.Equal(expectedSize, length); - } - connection.Close(); - } + using SqlDataReader sqlDataReader = command.ExecuteReader(CommandBehavior.SequentialAccess); + Assert.True(sqlDataReader.Read(), "Expected to read a row"); + + stopwatch.Start(); + (long length, string _) = ReadAllChars(sqlDataReader, expectedSize); + stopwatch.Stop(); + Assert.Equal(expectedSize, length); return stopwatch.Elapsed; } @@ -300,7 +841,7 @@ private static TimeSpan TimedExecution(string commandTextBase, int scale) /// /// Replicate the reading approach used with issue #1877 /// - private static (long, string) ReadAllChars(SqlDataReader sqlDataReader, int expectedSize) + private static (long, string) ReadAllChars(SqlDataReader sqlDataReader, long expectedSize) { char[] text = new char[expectedSize]; char[] buffer = new char[1]; @@ -316,7 +857,7 @@ private static (long, string) ReadAllChars(SqlDataReader sqlDataReader, int expe position += numCharsRead; } } - while (numCharsRead > 0); + while (numCharsRead > 0 && position < expectedSize); return (position, new string(text)); }