diff --git a/src/Elastic.Documentation.ServiceDefaults/Extensions.cs b/src/Elastic.Documentation.ServiceDefaults/Extensions.cs index 3bfaa382c..3ba8f44e4 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Extensions.cs +++ b/src/Elastic.Documentation.ServiceDefaults/Extensions.cs @@ -2,6 +2,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using Elastic.Documentation.ServiceDefaults.Telemetry; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Diagnostics.HealthChecks; using Microsoft.Extensions.DependencyInjection; @@ -51,11 +52,13 @@ public static TBuilder ConfigureOpenTelemetry(this TBuilder builder) w { _ = metrics.AddAspNetCoreInstrumentation() .AddHttpClientInstrumentation() - .AddRuntimeInstrumentation(); + .AddRuntimeInstrumentation() + .AddMeter(TelemetryConstants.AssemblerSyncInstrumentationName); }) .WithTracing(tracing => { _ = tracing.AddSource(builder.Environment.ApplicationName) + .AddSource(TelemetryConstants.AssemblerSyncInstrumentationName) .AddAspNetCoreInstrumentation(instrumentation => // Exclude health check requests from tracing instrumentation.Filter = context => diff --git a/src/Elastic.Documentation.ServiceDefaults/Telemetry/TelemetryConstants.cs b/src/Elastic.Documentation.ServiceDefaults/Telemetry/TelemetryConstants.cs new file mode 100644 index 000000000..e4a90b864 --- /dev/null +++ b/src/Elastic.Documentation.ServiceDefaults/Telemetry/TelemetryConstants.cs @@ -0,0 +1,14 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +namespace Elastic.Documentation.ServiceDefaults.Telemetry; + +/// +/// Centralized constants for OpenTelemetry instrumentation names. +/// These ensure consistency between source/meter creation and registration. +/// +public static class TelemetryConstants +{ + public const string AssemblerSyncInstrumentationName = "Elastic.Documentation.Assembler.Sync"; +} diff --git a/src/services/Elastic.Documentation.Assembler/Deploying/Synchronization/AwsS3SyncApplyStrategy.cs b/src/services/Elastic.Documentation.Assembler/Deploying/Synchronization/AwsS3SyncApplyStrategy.cs index 6158c9a3f..298c9fb82 100644 --- a/src/services/Elastic.Documentation.Assembler/Deploying/Synchronization/AwsS3SyncApplyStrategy.cs +++ b/src/services/Elastic.Documentation.Assembler/Deploying/Synchronization/AwsS3SyncApplyStrategy.cs @@ -2,15 +2,18 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Diagnostics; +using System.Diagnostics.Metrics; using Amazon.S3; using Amazon.S3.Model; using Amazon.S3.Transfer; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.ServiceDefaults.Telemetry; using Microsoft.Extensions.Logging; namespace Elastic.Documentation.Assembler.Deploying.Synchronization; -public class AwsS3SyncApplyStrategy( +public partial class AwsS3SyncApplyStrategy( ILoggerFactory logFactory, IAmazonS3 s3Client, ITransferUtility transferUtility, @@ -19,19 +22,117 @@ public class AwsS3SyncApplyStrategy( IDiagnosticsCollector collector ) : IDocsSyncApplyStrategy { + private static readonly ActivitySource ApplyStrategyActivitySource = new(TelemetryConstants.AssemblerSyncInstrumentationName); + + // Meter for OpenTelemetry metrics + private static readonly Meter SyncMeter = new(TelemetryConstants.AssemblerSyncInstrumentationName); + + // Deployment-level metrics (low cardinality) + private static readonly Histogram FilesPerDeploymentHistogram = SyncMeter.CreateHistogram( + "docs.deployment.files.count", + "files", + "Number of files synced per deployment operation"); + + private static readonly Counter FilesAddedCounter = SyncMeter.CreateCounter( + "docs.sync.files.added.total", + "files", + "Total number of files added to S3"); + + private static readonly Counter FilesUpdatedCounter = SyncMeter.CreateCounter( + "docs.sync.files.updated.total", + "files", + "Total number of files updated in S3"); + + private static readonly Counter FilesDeletedCounter = SyncMeter.CreateCounter( + "docs.sync.files.deleted.total", + "files", + "Total number of files deleted from S3"); + + private static readonly Histogram FileSizeHistogram = SyncMeter.CreateHistogram( + "docs.sync.file.size", + "By", + "Distribution of file sizes synced to S3"); + + private static readonly Counter FilesByExtensionCounter = SyncMeter.CreateCounter( + "docs.sync.files.by_extension", + "files", + "File operations grouped by extension"); + + private static readonly Histogram SyncDurationHistogram = SyncMeter.CreateHistogram( + "docs.sync.duration", + "s", + "Duration of sync operations"); + private readonly ILogger _logger = logFactory.CreateLogger(); - private void DisplayProgress(object? sender, UploadDirectoryProgressArgs args) => LogProgress(_logger, args, null); + private void DisplayProgress(object? sender, UploadDirectoryProgressArgs args) => LogProgress(_logger, args); - private static readonly Action LogProgress = LoggerMessage.Define( - LogLevel.Information, - new EventId(2, nameof(LogProgress)), - "{Args}"); + [LoggerMessage( + EventId = 2, + Level = LogLevel.Debug, + Message = "{Args}")] + private static partial void LogProgress(ILogger logger, UploadDirectoryProgressArgs args); + + [LoggerMessage( + EventId = 3, + Level = LogLevel.Information, + Message = "File operation: {Operation} | Path: {FilePath} | Size: {FileSize} bytes")] + private static partial void LogFileOperation(ILogger logger, string operation, string filePath, long fileSize); public async Task Apply(SyncPlan plan, Cancel ctx = default) { + var sw = Stopwatch.StartNew(); + + using var applyActivity = ApplyStrategyActivitySource.StartActivity("sync apply", ActivityKind.Client); + if (Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true") + { + _ = applyActivity?.SetTag("cicd.pipeline.name", Environment.GetEnvironmentVariable("GITHUB_WORKFLOW") ?? "unknown"); + _ = applyActivity?.SetTag("cicd.pipeline.run.id", Environment.GetEnvironmentVariable("GITHUB_RUN_ID") ?? "unknown"); + _ = applyActivity?.SetTag("cicd.pipeline.run.attempt", Environment.GetEnvironmentVariable("GITHUB_RUN_ATTEMPT") ?? "unknown"); + } + + var addCount = plan.AddRequests.Count; + var updateCount = plan.UpdateRequests.Count; + var deleteCount = plan.DeleteRequests.Count; + var totalFiles = addCount + updateCount + deleteCount; + + // Add aggregate metrics to span + _ = applyActivity?.SetTag("docs.sync.files.added", addCount); + _ = applyActivity?.SetTag("docs.sync.files.updated", updateCount); + _ = applyActivity?.SetTag("docs.sync.files.deleted", deleteCount); + _ = applyActivity?.SetTag("docs.sync.files.total", totalFiles); + + // Record deployment-level metrics + FilesPerDeploymentHistogram.Record(totalFiles); + + if (addCount > 0) + { + FilesPerDeploymentHistogram.Record(addCount, + [new("operation", "add")]); + } + + if (updateCount > 0) + { + FilesPerDeploymentHistogram.Record(updateCount, + [new("operation", "update")]); + } + + if (deleteCount > 0) + { + FilesPerDeploymentHistogram.Record(deleteCount, + [new("operation", "delete")]); + } + + _logger.LogInformation( + "Deployment sync: {TotalFiles} files ({AddCount} added, {UpdateCount} updated, {DeleteCount} deleted) in {Environment}", + totalFiles, addCount, updateCount, deleteCount, context.Environment.Name); + await Upload(plan, ctx); await Delete(plan, ctx); + + // Record sync duration + SyncDurationHistogram.Record(sw.Elapsed.TotalSeconds, + [new("operation", "sync")]); } private async Task Upload(SyncPlan plan, Cancel ctx) @@ -39,7 +140,42 @@ private async Task Upload(SyncPlan plan, Cancel ctx) var uploadRequests = plan.AddRequests.Cast().Concat(plan.UpdateRequests).ToList(); if (uploadRequests.Count > 0) { - _logger.LogInformation("Starting to process {Count} uploads using directory upload", uploadRequests.Count); + using var uploadActivity = ApplyStrategyActivitySource.StartActivity("upload files", ActivityKind.Client); + _ = uploadActivity?.SetTag("docs.sync.upload.count", uploadRequests.Count); + + var addCount = plan.AddRequests.Count; + var updateCount = plan.UpdateRequests.Count; + + _logger.LogInformation("Starting to process {AddCount} new files and {UpdateCount} updated files", addCount, updateCount); + + // Emit file-level metrics (low cardinality) and logs for each file + foreach (var upload in uploadRequests) + { + var operation = plan.AddRequests.Contains(upload) ? "add" : "update"; + var fileSize = context.WriteFileSystem.FileInfo.New(upload.LocalPath).Length; + var extension = Path.GetExtension(upload.DestinationPath).ToLowerInvariant(); + + // Record counters + if (operation == "add") + FilesAddedCounter.Add(1); + else + FilesUpdatedCounter.Add(1); + + // Record file size distribution + FileSizeHistogram.Record(fileSize, [new("operation", operation)]); + + // Record by extension (low cardinality) + if (!string.IsNullOrEmpty(extension)) + { + FilesByExtensionCounter.Add(1, + new("operation", operation), + new("extension", extension)); + } + + // Log individual file operations for detailed analysis + LogFileOperation(_logger, operation, upload.DestinationPath, fileSize); + } + var tempDir = Path.Combine(context.WriteFileSystem.Path.GetTempPath(), context.WriteFileSystem.Path.GetRandomFileName()); _ = context.WriteFileSystem.Directory.CreateDirectory(tempDir); try @@ -61,10 +197,11 @@ private async Task Upload(SyncPlan plan, Cancel ctx) UploadFilesConcurrently = true }; directoryRequest.UploadDirectoryProgressEvent += DisplayProgress; - _logger.LogInformation("Uploading {Count} files to S3", uploadRequests.Count); + _logger.LogInformation("Uploading {Count} files to S3 bucket {BucketName}", uploadRequests.Count, bucketName); _logger.LogDebug("Starting directory upload from {TempDir}", tempDir); await transferUtility.UploadDirectoryAsync(directoryRequest, ctx); - _logger.LogDebug("Directory upload completed"); + _logger.LogInformation("Successfully uploaded {Count} files ({AddCount} added, {UpdateCount} updated)", + uploadRequests.Count, addCount, updateCount); } finally { @@ -81,6 +218,31 @@ private async Task Delete(SyncPlan plan, Cancel ctx) var deleteRequests = plan.DeleteRequests.ToList(); if (deleteRequests.Count > 0) { + using var deleteActivity = ApplyStrategyActivitySource.StartActivity("delete files", ActivityKind.Client); + _ = deleteActivity?.SetTag("docs.sync.delete.count", deleteRequests.Count); + + _logger.LogInformation("Starting to delete {Count} files from S3 bucket {BucketName}", deleteRequests.Count, bucketName); + + // Emit file-level metrics (low cardinality) and logs for each file + foreach (var delete in deleteRequests) + { + var extension = Path.GetExtension(delete.DestinationPath).ToLowerInvariant(); + + // Record counter + FilesDeletedCounter.Add(1); + + // Record by extension (low cardinality) + if (!string.IsNullOrEmpty(extension)) + { + FilesByExtensionCounter.Add(1, + new("operation", "delete"), + new("extension", extension)); + } + + // Log individual file operations for detailed analysis + LogFileOperation(_logger, "delete", delete.DestinationPath, 0); + } + // Process deletes in batches of 1000 (AWS S3 limit) foreach (var batch in deleteRequests.Chunk(1000)) { @@ -95,16 +257,22 @@ private async Task Delete(SyncPlan plan, Cancel ctx) var response = await s3Client.DeleteObjectsAsync(deleteObjectsRequest, ctx); if (response.HttpStatusCode != System.Net.HttpStatusCode.OK) { + _logger.LogError("Delete batch failed with status code {StatusCode}", response.HttpStatusCode); foreach (var error in response.DeleteErrors) + { + _logger.LogError("Failed to delete {Key}: {Message}", error.Key, error.Message); collector.EmitError(error.Key, $"Failed to delete: {error.Message}"); + } } else { var newCount = Interlocked.Add(ref deleteCount, batch.Length); - _logger.LogInformation("Deleted {Count} objects ({DeleteCount}/{TotalDeleteCount})", + _logger.LogInformation("Deleted {BatchCount} files ({CurrentCount}/{TotalCount})", batch.Length, newCount, deleteRequests.Count); } } + + _logger.LogInformation("Successfully deleted {Count} files", deleteCount); } } } diff --git a/src/services/Elastic.Documentation.Assembler/Elastic.Documentation.Assembler.csproj b/src/services/Elastic.Documentation.Assembler/Elastic.Documentation.Assembler.csproj index 412e5276b..b9518dc9a 100644 --- a/src/services/Elastic.Documentation.Assembler/Elastic.Documentation.Assembler.csproj +++ b/src/services/Elastic.Documentation.Assembler/Elastic.Documentation.Assembler.csproj @@ -13,6 +13,7 @@ +