Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions BotSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandle
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ImageHandler", "src\Plugins\BotSharp.Plugin.ImageHandler\BotSharp.Plugin.ImageHandler.csproj", "{242F2D93-FCCE-4982-8075-F3052ECCA92C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.FuzzySharp", "src\Plugins\BotSharp.Plugin.FuzzySharp\BotSharp.Plugin.FuzzySharp.csproj", "{E7C243B9-E751-B3B4-8F16-95C76CA90D31}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -629,6 +631,14 @@ Global
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|Any CPU.Build.0 = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.ActiveCfg = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.Build.0 = Release|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.ActiveCfg = Debug|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.Build.0 = Debug|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.Build.0 = Release|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.ActiveCfg = Release|Any CPU
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -701,6 +711,7 @@ Global
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{242F2D93-FCCE-4982-8075-F3052ECCA92C} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{E7C243B9-E751-B3B4-8F16-95C76CA90D31} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}
Expand Down
3 changes: 3 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="CsvHelper" Version="33.1.0" />
<PackageVersion Include="FuzzySharp" Version="2.0.2" />
<PackageVersion Include="EntityFramework" Version="6.4.4" />
<PackageVersion Include="Google_GenerativeAI" Version="3.4.1" />
<PackageVersion Include="Google_GenerativeAI.Live" Version="3.4.1" />
Expand All @@ -18,6 +20,7 @@
<PackageVersion Include="Microsoft.Extensions.Logging" Version="10.0.0" />
<PackageVersion Include="Microsoft.Extensions.Caching.Memory" Version="8.0.1" />
<PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
<PackageVersion Include="SharpFuzz" Version="2.2.0" />
<PackageVersion Include="SharpHook" Version="5.3.9" />
<PackageVersion Include="SixLabors.ImageSharp" Version="3.1.12" />
<PackageVersion Include="System.ClientModel" Version="1.3.0" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace BotSharp.Abstraction.Knowledges;

public interface IPhraseCollection
{
Task<Dictionary<string, HashSet<string>>> LoadVocabularyAsync();
Task<Dictionary<string, (string DbPath, string CanonicalForm)>> LoadSynonymMappingAsync();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace BotSharp.Abstraction.Knowledges;

public interface IPhraseService
{
Task<List<SearchPhrasesResult>> SearchPhrasesAsync(string term);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

namespace BotSharp.Abstraction.Knowledges.Models;

public class SearchPhrasesResult
{
public string Token { get; set; } = string.Empty;
public List<string> Sources { get; set; } = new();
public string CanonicalForm { get; set; } = string.Empty;
public string MatchType { get; set; } = string.Empty;
public double Confidence { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>$(TargetFramework)</TargetFramework>
<Nullable>enable</Nullable>
<LangVersion>$(LangVersion)</LangVersion>
<VersionPrefix>$(BotSharpVersion)</VersionPrefix>
<GeneratePackageOnBuild>$(GeneratePackageOnBuild)</GeneratePackageOnBuild>
<GenerateDocumentationFile>$(GenerateDocumentationFile)</GenerateDocumentationFile>
<OutputPath>$(SolutionDir)packages</OutputPath>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="CsvHelper" />
<PackageReference Include="FuzzySharp" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Infrastructure\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
</ItemGroup>
</Project>
20 changes: 20 additions & 0 deletions src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

namespace BotSharp.Plugin.FuzzySharp.Constants;

public static class MatchReason
{
/// <summary>
/// Token matched a synonym term (e.g., HVAC -> Air Conditioning/Heating)
/// </summary>
public const string SynonymMatch = "synonym_match";

/// <summary>
/// Token exactly matched a vocabulary entry
/// </summary>
public const string ExactMatch = "exact_match";

/// <summary>
/// Token was flagged as a potential typo and a correction was suggested
/// </summary>
public const string TypoCorrection = "typo_correction";
}
29 changes: 29 additions & 0 deletions src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

namespace BotSharp.Plugin.FuzzySharp.Constants;

public static class TextConstants
{
/// <summary>
/// Characters that need to be separated during tokenization (by adding spaces before and after)
/// Includes: parentheses, brackets, braces, punctuation marks, special symbols, etc.
/// This ensures "(IH)" is split into "(", "IH", ")"
/// </summary>
public static readonly char[] SeparatorChars =
{
// Parentheses and brackets
'(', ')', '[', ']', '{', '}',
// Punctuation marks
',', '.', ';', ':', '!', '?',
// Special symbols
'=', '@', '#', '$', '%', '^', '&', '*', '+', '-', '\\', '|', '<', '>', '~', '`'
};

/// <summary>
/// Whitespace characters used as token separators during tokenization.
/// Includes: space, tab, newline, and carriage return.
/// </summary>
public static readonly char[] TokenSeparators =
{
' ', '\t', '\n', '\r'
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using BotSharp.Abstraction.Knowledges;
using BotSharp.Abstraction.Knowledges.Models;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;

namespace BotSharp.Plugin.FuzzySharp.Controllers;

[ApiController]
public class FuzzySharpController : ControllerBase
{
private readonly IPhraseService _phraseService;
private readonly ILogger<FuzzySharpController> _logger;

public FuzzySharpController(
IPhraseService phraseService,
ILogger<FuzzySharpController> logger)
{
_phraseService = phraseService;
_logger = logger;
}

/// <summary>
/// Analyze text for typos and entities using vocabulary.
///
/// Returns:
/// - `original`: Original input text
/// - `tokens`: Tokenized text (only included if `include_tokens=true`)
/// - `flagged`: List of flagged items (each with `match_type`):
/// - `synonym_match` - Business abbreviations (confidence=1.0)
/// - `exact_match` - Exact vocabulary matches (confidence=1.0)
/// - `typo_correction` - Spelling corrections (confidence less than 1.0)
/// - `processing_time_ms`: Processing time in milliseconds
/// </summary>
/// <param name="request">Text analysis request</param>
/// <returns>Text analysis response</returns>
[HttpPost("fuzzy-sharp/analyze-text")]
[ProducesResponseType(typeof(List<SearchPhrasesResult>), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status400BadRequest)]
[ProducesResponseType(StatusCodes.Status500InternalServerError)]
public async Task<IActionResult> AnalyzeText([FromBody] string text)
{
try
{
if (string.IsNullOrWhiteSpace(text))
{
return BadRequest(new { error = "Text is required" });
}

var result = await _phraseService.SearchPhrasesAsync(text);
return Ok(result);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error analyzing and searching entities");
return StatusCode(500, new { error = $"Error analyzing and searching entities: {ex.Message}" });
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments;

public class TextAnalysisRequest
{
public string Text { get; set; } = string.Empty;
public string? VocabularyFolderName { get; set; }
public string? SynonymMappingFile { get; set; }
public double Cutoff { get; set; } = 0.82;
public int TopK { get; set; } = 5;
public int MaxNgram { get; set; } = 5;
public bool IncludeTokens { get; set; } = false;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;

namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;

public interface INgramProcessor
{
/// <summary>
/// Process tokens and generate all possible n-gram match results
/// </summary>
/// <param name="tokens">List of tokens to process</param>
/// <param name="vocabulary">Vocabulary (source -> vocabulary set)</param>
/// <param name="synonymMapping">Synonym term Mapping</param>
/// <param name="lookup">Lookup table (lowercase vocabulary -> (canonical form, source list))</param>
/// <param name="maxNgram">Maximum n-gram length</param>
/// <param name="cutoff">Minimum confidence threshold for fuzzy matching</param>
/// <param name="topK">Maximum number of matches to return</param>
/// <returns>List of flagged items</returns>
List<FlaggedItem> ProcessNgrams(
List<string> tokens,
Dictionary<string, HashSet<string>> vocabulary,
Dictionary<string, (string DbPath, string CanonicalForm)> synonymMapping,
Dictionary<string, (string CanonicalForm, List<string> Sources)> lookup,
int maxNgram,
double cutoff,
int topK);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;

namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;

/// <summary>
/// Result processor interface
/// Responsible for processing match results, including deduplication and sorting
/// </summary>
public interface IResultProcessor
{
/// <summary>
/// Process a list of flagged items, removing overlapping duplicates and sorting
/// </summary>
/// <param name="flagged">List of flagged items to process</param>
/// <returns>Processed list of flagged items (deduplicated and sorted)</returns>
List<FlaggedItem> ProcessResults(List<FlaggedItem> flagged);
}
39 changes: 39 additions & 0 deletions src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;

public interface ITokenMatcher
{
/// <summary>
/// Try to match a content span and return a match result
/// </summary>
/// <param name="context">The matching context containing all necessary information</param>
/// <returns>Match result if found, null otherwise</returns>
MatchResult? TryMatch(MatchContext context);

/// <summary>
/// Priority of this matcher (higher priority matchers are tried first)
/// </summary>
int Priority { get; }
}

/// <summary>
/// Context information for token matching
/// </summary>
public record MatchContext(
string ContentSpan,
string ContentLow,
int StartIndex,
int NgramLength,
Dictionary<string, HashSet<string>> Vocabulary,
Dictionary<string, (string DbPath, string CanonicalForm)> SynonymMapping,
Dictionary<string, (string CanonicalForm, List<string> Sources)> Lookup,
double Cutoff,
int TopK);

/// <summary>
/// Result of a token match
/// </summary>
public record MatchResult(
string CanonicalForm,
List<string> Sources,
string MatchType,
double Confidence);
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;

public class FlaggedItem
{
public int Index { get; set; }
public string Token { get; set; } = string.Empty;
public List<string> Sources { get; set; } = new();
public string MatchType { get; set; } = string.Empty;
public string CanonicalForm { get; set; } = string.Empty;
public double Confidence { get; set; }
public int NgramLength { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;

public class TextAnalysisResponse
{
public string Original { get; set; } = string.Empty;
public List<string>? Tokens { get; set; }
public List<FlaggedItem> Flagged { get; set; } = new();
public double ProcessingTimeMs { get; set; }
}
29 changes: 29 additions & 0 deletions src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using BotSharp.Plugin.FuzzySharp.FuzzSharp;
using BotSharp.Abstraction.Knowledges;
using BotSharp.Abstraction.Plugins;
using BotSharp.Plugin.FuzzySharp.Services;
using BotSharp.Plugin.FuzzySharp.Services.Matching;
using BotSharp.Plugin.FuzzySharp.Services.Processors;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;

namespace BotSharp.Plugin.FuzzySharp;

public class FuzzySharpPlugin : IBotSharpPlugin
{
public string Id => "379e6f7b-c58c-458b-b8cd-0374e5830711";
public string Name => "Fuzzy Sharp";
public string Description => "Analyze text for typos and entities using domain-specific vocabulary.";
public string IconUrl => "https://cdn-icons-png.flaticon.com/512/9592/9592995.png";

public void RegisterDI(IServiceCollection services, IConfiguration config)
{
services.AddScoped<INgramProcessor, NgramProcessor>();
services.AddScoped<IResultProcessor, ResultProcessor>();
services.AddScoped<IPhraseService, PhraseService>();
services.AddScoped<IPhraseCollection, CsvPhraseCollectionLoader>();
services.AddScoped<ITokenMatcher, ExactMatcher>();
services.AddScoped<ITokenMatcher, SynonymMatcher>();
services.AddScoped<ITokenMatcher, FuzzyMatcher>();
}
}
Loading
Loading