Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using Microsoft.Shared.Diagnostics;
using ModelContextProtocol.Client;
using ModelContextProtocol.Protocol;
Expand Down Expand Up @@ -42,19 +43,21 @@ public override async Task<IngestionDocument> ReadAsync(FileInfo source, string
throw new FileNotFoundException("The specified file does not exist.", source.FullName);
}

// Read file content as base64 data URI
// Read file content and create data URI using DataContent
#if NET
byte[] fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false);
ReadOnlyMemory<byte> fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false);
#else
byte[] fileBytes;
ReadOnlyMemory<byte> fileBytes;
using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 1, FileOptions.Asynchronous))
{
using MemoryStream ms = new();
using MemoryStream ms = new((int)Math.Min(int.MaxValue, fs.Length));
await fs.CopyToAsync(ms).ConfigureAwait(false);
fileBytes = ms.ToArray();
fileBytes = ms.GetBuffer().AsMemory(0, (int)ms.Length);
}
#endif
string dataUri = CreateDataUri(fileBytes, mediaType);
string dataUri = new DataContent(
fileBytes,
string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!).Uri;

string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);

Expand All @@ -67,30 +70,22 @@ public override async Task<IngestionDocument> ReadAsync(Stream source, string id
_ = Throw.IfNull(source);
_ = Throw.IfNullOrEmpty(identifier);

// Read stream content as base64 data URI
using MemoryStream ms = new();
// Read stream content and create data URI using DataContent
using MemoryStream ms = source.CanSeek ? new((int)Math.Min(int.MaxValue, source.Length)) : new();
#if NET
await source.CopyToAsync(ms, cancellationToken).ConfigureAwait(false);
#else
await source.CopyToAsync(ms).ConfigureAwait(false);
#endif
byte[] fileBytes = ms.ToArray();
string dataUri = CreateDataUri(fileBytes, mediaType);
string dataUri = new DataContent(
ms.GetBuffer().AsMemory(0, (int)ms.Length),
string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType).Uri;

string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);

return MarkdownParser.Parse(markdown, identifier);
}

#pragma warning disable S3995 // URI return values should not be strings
private static string CreateDataUri(byte[] fileBytes, string? mediaType)
#pragma warning restore S3995 // URI return values should not be strings
{
string base64Content = Convert.ToBase64String(fileBytes);
string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!;
return $"data:{mimeType};base64,{base64Content}";
}

private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationToken cancellationToken)
{
// Create HTTP client transport for MCP
Expand Down
Loading