From 226259b267b155147d09c89c7e69c5cd85acad61 Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Mon, 8 Jun 2026 04:34:30 +0200 Subject: [PATCH 1/3] Update packages --- .editorconfig | 14 + .gitattributes | 1 + src/.editorconfig | 4 - .../ZoneTree.FullTextSearch.Playground.csproj | 2 +- .../HashedSearchEngineTests.cs | 2 +- .../RecordTableTests.cs | 2 +- .../ZoneTree.FullTextSearch.UnitTests.csproj | 12 +- .../Directory.Build.props | 4 +- .../Index/IndexOfTokenRecordPreviousToken.cs | 765 +++++++++--------- .../CompositeKeyOfRecordTokenComparer.cs | 2 +- ...mpositeKeyOfTokenRecordPreviousComparer.cs | 2 +- ...SearchOnIndexOfTokenRecordPreviousToken.cs | 4 +- ...SearchOnIndexOfTokenRecordPreviousToken.cs | 4 +- .../SearchEngines/AdvancedZoneTreeOptions.cs | 4 +- .../SearchEngines/HashedSearchEngine.cs | 8 +- .../Storage/RecordTable.cs | 4 +- .../ZoneTree.FullTextSearch.csproj | 6 +- 17 files changed, 424 insertions(+), 416 deletions(-) create mode 100644 .editorconfig create mode 100644 .gitattributes delete mode 100644 src/.editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..2f153d1 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,14 @@ +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.cs] + +# CA1051: Do not declare visible instance fields +dotnet_diagnostic.CA1051.severity = none diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/src/.editorconfig b/src/.editorconfig deleted file mode 100644 index 7d99bd6..0000000 --- a/src/.editorconfig +++ /dev/null @@ -1,4 +0,0 @@ -[*.cs] - -# CA1051: Do not declare visible instance fields -dotnet_diagnostic.CA1051.severity = none diff --git a/src/ZoneTree.FullTextSearch.Playground/ZoneTree.FullTextSearch.Playground.csproj b/src/ZoneTree.FullTextSearch.Playground/ZoneTree.FullTextSearch.Playground.csproj index 99c8a9e..c4e0008 100644 --- a/src/ZoneTree.FullTextSearch.Playground/ZoneTree.FullTextSearch.Playground.csproj +++ b/src/ZoneTree.FullTextSearch.Playground/ZoneTree.FullTextSearch.Playground.csproj @@ -2,7 +2,7 @@ Exe - net9.0 + net10.0 enable disable diff --git a/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs index bcba331..3db8bc6 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs @@ -1,4 +1,4 @@ -using Tenray.ZoneTree.Exceptions; +using ZoneTree.Exceptions; using ZoneTree.FullTextSearch.SearchEngines; namespace ZoneTree.FullTextSearch.UnitTests; diff --git a/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs index 9e82ef8..fc1c6e4 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs @@ -1,4 +1,4 @@ -using Tenray.ZoneTree.Exceptions; +using ZoneTree.Exceptions; namespace ZoneTree.FullTextSearch.UnitTests; diff --git a/src/ZoneTree.FullTextSearch.UnitTests/ZoneTree.FullTextSearch.UnitTests.csproj b/src/ZoneTree.FullTextSearch.UnitTests/ZoneTree.FullTextSearch.UnitTests.csproj index f04296c..928dd81 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/ZoneTree.FullTextSearch.UnitTests.csproj +++ b/src/ZoneTree.FullTextSearch.UnitTests/ZoneTree.FullTextSearch.UnitTests.csproj @@ -1,7 +1,7 @@ - net9.0 + net10.0 enable disable @@ -10,17 +10,17 @@ - + all runtime; build; native; contentfiles; analyzers; buildtransitive - - - + + + all runtime; build; native; contentfiles; analyzers; buildtransitive - + diff --git a/src/ZoneTree.FullTextSearch/Directory.Build.props b/src/ZoneTree.FullTextSearch/Directory.Build.props index a73696d..b6cd47e 100644 --- a/src/ZoneTree.FullTextSearch/Directory.Build.props +++ b/src/ZoneTree.FullTextSearch/Directory.Build.props @@ -5,8 +5,8 @@ Ahmed Yasin Koculu ZoneTree.FullTextSearch ZoneTree.FullTextSearch - 1.0.8.0 - 1.0.8.0 + 1.0.9.0 + 1.0.9.0 Ahmed Yasin Koculu ZoneTree.FullTextSearch ZoneTree.FullTextSearch is an open-source library that extends ZoneTree to provide efficient full-text search capabilities. It offers a fast, embedded search engine suitable for applications that require high performance and do not rely on external databases. diff --git a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs index 156a083..0d11171 100644 --- a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs @@ -1,12 +1,9 @@ -using System.Threading; -using Tenray.ZoneTree; -using Tenray.ZoneTree.Comparers; -using Tenray.ZoneTree.PresetTypes; -using Tenray.ZoneTree.Serializers; +using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Model; -using ZoneTree.FullTextSearch.QueryLanguage; using ZoneTree.FullTextSearch.Search; using ZoneTree.FullTextSearch.SearchEngines; +using ZoneTree.PresetTypes; +using ZoneTree.Serializers; namespace ZoneTree.FullTextSearch.Index; @@ -21,399 +18,399 @@ public sealed class IndexOfTokenRecordPreviousToken where TRecord : unmanaged where TToken : unmanaged { - readonly bool useSecondaryIndex; - - bool isDropped; - - bool isDisposed; - - readonly SearchOnIndexOfTokenRecordPreviousToken - searchAlgorithm; - - readonly AdvancedSearchOnIndexOfTokenRecordPreviousToken - advancedSearchAlgorithm; - - /// - /// Gets the primary zone tree used to store and retrieve records by token and previous token. - /// - public readonly IZoneTree< - CompositeKeyOfTokenRecordPrevious, - byte> ZoneTree1; - - /// - /// Gets the maintainer for managing the primary zone tree, including background tasks. - /// - public readonly IMaintainer Maintainer1; - - /// - /// Gets the secondary zone tree used to store and retrieve records by record and token, - /// if a secondary index is enabled. - /// - public readonly IZoneTree< - CompositeKeyOfRecordToken, - byte> ZoneTree2; - - /// - /// Gets the maintainer for managing the secondary zone tree, including background tasks. - /// - public readonly IMaintainer Maintainer2; - - /// - /// Gets the ref comparer of record. - /// - public IRefComparer RecordComparer { get; } - - /// - /// Gets the ref comparer of token. - /// - public IRefComparer TokenComparer { get; } - - /// - /// Gets or sets a value indicating whether the index is read-only. - /// When set to true, both the primary and secondary zone trees (if applicable) become read-only. - /// - public bool IsReadOnly + readonly bool useSecondaryIndex; + + bool isDropped; + + bool isDisposed; + + readonly SearchOnIndexOfTokenRecordPreviousToken + searchAlgorithm; + + readonly AdvancedSearchOnIndexOfTokenRecordPreviousToken + advancedSearchAlgorithm; + + /// + /// Gets the primary zone tree used to store and retrieve records by token and previous token. + /// + public readonly IZoneTree< + CompositeKeyOfTokenRecordPrevious, + byte> ZoneTree1; + + /// + /// Gets the maintainer for managing the primary zone tree, including background tasks. + /// + public readonly IMaintainer Maintainer1; + + /// + /// Gets the secondary zone tree used to store and retrieve records by record and token, + /// if a secondary index is enabled. + /// + public readonly IZoneTree< + CompositeKeyOfRecordToken, + byte> ZoneTree2; + + /// + /// Gets the maintainer for managing the secondary zone tree, including background tasks. + /// + public readonly IMaintainer Maintainer2; + + /// + /// Gets the ref comparer of record. + /// + public IRefComparer RecordComparer { get; } + + /// + /// Gets the ref comparer of token. + /// + public IRefComparer TokenComparer { get; } + + /// + /// Gets or sets a value indicating whether the index is read-only. + /// When set to true, both the primary and secondary zone trees (if applicable) become read-only. + /// + public bool IsReadOnly + { + get => ZoneTree1.IsReadOnly || (ZoneTree2 != null && ZoneTree2.IsReadOnly); + set { - get => ZoneTree1.IsReadOnly || (ZoneTree2 != null && ZoneTree2.IsReadOnly); - set - { - ZoneTree1.IsReadOnly = value; - if (ZoneTree2 != null) - ZoneTree2.IsReadOnly = value; - } + ZoneTree1.IsReadOnly = value; + if (ZoneTree2 != null) + ZoneTree2.IsReadOnly = value; } - - /// - /// Returns true if the index is dropped, otherwise false. - /// - public bool IsIndexDropped { get => isDropped; } - - /// - /// Initializes a new instance of the class, - /// with the option to configure primary and secondary zone trees. - /// - /// The path to the data storage, defaulting to "data". - /// The comparer of record. - /// The comparer of token. - /// Indicates whether a secondary index should be used to perform faster deletion. - /// Defines the life time of cached blocks. Default is 1 minute. - /// /// Advanced ZoneTree Options enabling customization of underlying ZoneTree instances. - public IndexOfTokenRecordPreviousToken( - string dataPath = "data", - IRefComparer recordComparer = null, - IRefComparer tokenComparer = null, - bool useSecondaryIndex = false, - long blockCacheLifeTimeInMilliseconds = 60_000, - AdvancedZoneTreeOptions advancedOptions = null) - { - if (recordComparer == null) - recordComparer = ComponentsForKnownTypes.GetComparer(); - if (tokenComparer == null) - tokenComparer = ComponentsForKnownTypes.GetComparer(); - var fileStreamProvider = advancedOptions?.FileStreamProvider; - var factory1 = new ZoneTreeFactory, byte>(fileStreamProvider) - .SetDataDirectory($"{dataPath}/index1") - .SetIsDeletedDelegate( - (in CompositeKeyOfTokenRecordPrevious key, in byte value) => value == 1) - .SetMarkValueDeletedDelegate((ref byte x) => x = 1) - .SetKeySerializer(new StructSerializer>()) - .SetComparer( - new CompositeKeyOfTokenRecordPreviousComparer( - recordComparer, - tokenComparer)); - - advancedOptions?.FactoryConfigurator1?.Invoke(factory1); - - ZoneTree1 = factory1.OpenOrCreate(); - Maintainer1 = ZoneTree1.CreateMaintainer(); - Maintainer1.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); - Maintainer1.BlockCacheLifeTime = - TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); - Maintainer1.EnableJobForCleaningInactiveCaches = true; - RecordComparer = recordComparer; - TokenComparer = tokenComparer; - this.useSecondaryIndex = useSecondaryIndex; - if (useSecondaryIndex) - { - var factory2 = new ZoneTreeFactory, byte>(fileStreamProvider) - .SetDataDirectory($"{dataPath}/index2") - .SetIsDeletedDelegate( - (in CompositeKeyOfRecordToken key, in byte value) => value == 1) - .SetMarkValueDeletedDelegate((ref byte x) => x = 1) - .SetKeySerializer(new StructSerializer>()) - .SetComparer( - new CompositeKeyOfRecordTokenComparer( - recordComparer, - tokenComparer)); - - advancedOptions?.FactoryConfigurator2?.Invoke(factory2); - - ZoneTree2 = factory2.OpenOrCreate(); - Maintainer2 = ZoneTree2.CreateMaintainer(); - Maintainer2.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); - Maintainer2.BlockCacheLifeTime = - TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); - Maintainer2.EnableJobForCleaningInactiveCaches = true; - } - searchAlgorithm = new(this); - advancedSearchAlgorithm = new(this); - } - - /// - /// Throws an exception if the index has been dropped, preventing further operations on a dropped index. - /// - public void ThrowIfIndexIsDropped() - { - if (isDropped) throw new Exception($"{nameof( - IndexOfTokenRecordPreviousToken)} is dropped."); - } - - /// - /// Evicts data from memory to disk in both primary and secondary zone trees. - /// - public void EvictToDisk() - { - ThrowIfIndexIsDropped(); - Maintainer1.EvictToDisk(); - Maintainer2?.EvictToDisk(); - } - - /// - /// Attempts to cancel any background threads associated with maintenance tasks for both zone trees. - /// - public void TryCancelBackgroundThreads() + } + + /// + /// Returns true if the index is dropped, otherwise false. + /// + public bool IsIndexDropped { get => isDropped; } + + /// + /// Initializes a new instance of the class, + /// with the option to configure primary and secondary zone trees. + /// + /// The path to the data storage, defaulting to "data". + /// The comparer of record. + /// The comparer of token. + /// Indicates whether a secondary index should be used to perform faster deletion. + /// Defines the life time of cached blocks. Default is 1 minute. + /// /// Advanced ZoneTree Options enabling customization of underlying ZoneTree instances. + public IndexOfTokenRecordPreviousToken( + string dataPath = "data", + IRefComparer recordComparer = null, + IRefComparer tokenComparer = null, + bool useSecondaryIndex = false, + long blockCacheLifeTimeInMilliseconds = 60_000, + AdvancedZoneTreeOptions advancedOptions = null) + { + if (recordComparer == null) + recordComparer = ComponentsForKnownTypes.GetComparer(); + if (tokenComparer == null) + tokenComparer = ComponentsForKnownTypes.GetComparer(); + var fileStreamProvider = advancedOptions?.FileStreamProvider; + var factory1 = new ZoneTreeFactory, byte>(fileStreamProvider) + .SetDataDirectory($"{dataPath}/index1") + .SetIsDeletedDelegate( + (in CompositeKeyOfTokenRecordPrevious key, in byte value) => value == 1) + .SetMarkValueDeletedDelegate((ref byte x) => x = 1) + .SetKeySerializer(new StructSerializer>()) + .SetComparer( + new CompositeKeyOfTokenRecordPreviousComparer( + recordComparer, + tokenComparer)); + + advancedOptions?.FactoryConfigurator1?.Invoke(factory1); + + ZoneTree1 = factory1.OpenOrCreate(); + Maintainer1 = ZoneTree1.CreateMaintainer(); + Maintainer1.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); + Maintainer1.BlockCacheLifeTime = + TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); + Maintainer1.EnableJobForCleaningInactiveCaches = true; + RecordComparer = recordComparer; + TokenComparer = tokenComparer; + this.useSecondaryIndex = useSecondaryIndex; + if (useSecondaryIndex) { - ThrowIfIndexIsDropped(); - Maintainer1.TryCancelBackgroundThreads(); - Maintainer2?.TryCancelBackgroundThreads(); + var factory2 = new ZoneTreeFactory, byte>(fileStreamProvider) + .SetDataDirectory($"{dataPath}/index2") + .SetIsDeletedDelegate( + (in CompositeKeyOfRecordToken key, in byte value) => value == 1) + .SetMarkValueDeletedDelegate((ref byte x) => x = 1) + .SetKeySerializer(new StructSerializer>()) + .SetComparer( + new CompositeKeyOfRecordTokenComparer( + recordComparer, + tokenComparer)); + + advancedOptions?.FactoryConfigurator2?.Invoke(factory2); + + ZoneTree2 = factory2.OpenOrCreate(); + Maintainer2 = ZoneTree2.CreateMaintainer(); + Maintainer2.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); + Maintainer2.BlockCacheLifeTime = + TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); + Maintainer2.EnableJobForCleaningInactiveCaches = true; } - - /// - /// Waits for all background threads associated with maintenance tasks to complete for both zone trees. - /// - public void WaitForBackgroundThreads() + searchAlgorithm = new(this); + advancedSearchAlgorithm = new(this); + } + + /// + /// Throws an exception if the index has been dropped, preventing further operations on a dropped index. + /// + public void ThrowIfIndexIsDropped() + { + if (isDropped) throw new Exception($"{nameof( + IndexOfTokenRecordPreviousToken)} is dropped."); + } + + /// + /// Evicts data from memory to disk in both primary and secondary zone trees. + /// + public void EvictToDisk() + { + ThrowIfIndexIsDropped(); + Maintainer1.EvictToDisk(); + Maintainer2?.EvictToDisk(); + } + + /// + /// Attempts to cancel any background threads associated with maintenance tasks for both zone trees. + /// + public void TryCancelBackgroundThreads() + { + ThrowIfIndexIsDropped(); + Maintainer1.TryCancelBackgroundThreads(); + Maintainer2?.TryCancelBackgroundThreads(); + } + + /// + /// Waits for all background threads associated with maintenance tasks to complete for both zone trees. + /// + public void WaitForBackgroundThreads() + { + ThrowIfIndexIsDropped(); + Maintainer1.WaitForBackgroundThreads(); + Maintainer2?.WaitForBackgroundThreads(); + } + + /// + /// Drops the index by canceling and waiting for background threads, and then destroying the zone trees. + /// + public void Drop() + { + ThrowIfIndexIsDropped(); + TryCancelBackgroundThreads(); + WaitForBackgroundThreads(); + isDropped = true; + IsReadOnly = true; + ZoneTree1.Maintenance.Drop(); + ZoneTree2?.Maintenance.Drop(); + ZoneTree1.Dispose(); + ZoneTree2?.Dispose(); + } + + /// + /// Upserts a record in the primary zone tree, and optionally in the secondary zone tree if enabled. + /// + /// The token associated with the record. + /// The record to be upserted. + /// The token that precedes the current token in the record. + public void UpsertRecord(TToken token, TRecord record, TToken previousToken) + { + ThrowIfIndexIsDropped(); + ZoneTree1.Upsert(new CompositeKeyOfTokenRecordPrevious() { - ThrowIfIndexIsDropped(); - Maintainer1.WaitForBackgroundThreads(); - Maintainer2?.WaitForBackgroundThreads(); - } - - /// - /// Drops the index by canceling and waiting for background threads, and then destroying the zone trees. - /// - public void Drop() + Token = token, + Record = record, + PreviousToken = previousToken + }, 0); + if (!useSecondaryIndex) return; + var key = new CompositeKeyOfRecordToken() { - ThrowIfIndexIsDropped(); - TryCancelBackgroundThreads(); - WaitForBackgroundThreads(); - isDropped = true; - IsReadOnly = true; - ZoneTree1.Maintenance.Drop(); - ZoneTree2?.Maintenance.Drop(); - ZoneTree1.Dispose(); - ZoneTree2?.Dispose(); - } - - /// - /// Upserts a record in the primary zone tree, and optionally in the secondary zone tree if enabled. - /// - /// The token associated with the record. - /// The record to be upserted. - /// The token that precedes the current token in the record. - public void UpsertRecord(TToken token, TRecord record, TToken previousToken) + Record = record, + Token = token, + }; + ZoneTree2.TryAdd(key, new(), out _); + } + + /// + /// Deletes a record from the primary zone tree, and optionally from the secondary zone tree if a secondary index is enabled. + /// + /// The token associated with the record to delete. + /// The record to be deleted. + /// The token that precedes the current token in the record. + public void DeleteRecord(TToken token, TRecord record, TToken previousToken) + { + ThrowIfIndexIsDropped(); + ZoneTree1.ForceDelete(new CompositeKeyOfTokenRecordPrevious() { - ThrowIfIndexIsDropped(); - ZoneTree1.Upsert(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = previousToken - }, 0); - if (!useSecondaryIndex) return; - var key = new CompositeKeyOfRecordToken() - { - Record = record, - Token = token, - }; - ZoneTree2.TryAdd(key, new(), out _); - } + Token = token, + Record = record, + PreviousToken = previousToken + }); - /// - /// Deletes a record from the primary zone tree, and optionally from the secondary zone tree if a secondary index is enabled. - /// - /// The token associated with the record to delete. - /// The record to be deleted. - /// The token that precedes the current token in the record. - public void DeleteRecord(TToken token, TRecord record, TToken previousToken) - { - ThrowIfIndexIsDropped(); - ZoneTree1.ForceDelete(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = previousToken - }); - - if (!useSecondaryIndex) - return; - - ZoneTree2.ForceDelete(new CompositeKeyOfRecordToken() - { - Record = record, - Token = token, - }); - } + if (!useSecondaryIndex) + return; - /// - /// Deletes a record from the index without using the secondary index. - /// - /// The record to delete. - /// The number of entries deleted. - long DeleteRecordWithoutInvertedIndex(TRecord record) + ZoneTree2.ForceDelete(new CompositeKeyOfRecordToken() { - using var iterator1 = ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - var deletedEntries = 0L; - var recordComparer = RecordComparer; - while (iterator1.Next()) - { - var key = iterator1.CurrentKey; - if (recordComparer.AreNotEqual(key.Record, record)) continue; - ZoneTree1.ForceDelete(key); - ++deletedEntries; - } - return deletedEntries; - } - - /// - /// Deletes a record from the index, including from the secondary index if enabled. - /// - /// The record to delete. - /// The number of entries deleted. - public long DeleteRecord(TRecord record) + Record = record, + Token = token, + }); + } + + /// + /// Deletes a record from the index without using the secondary index. + /// + /// The record to delete. + /// The number of entries deleted. + long DeleteRecordWithoutInvertedIndex(TRecord record) + { + using var iterator1 = ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + var deletedEntries = 0L; + var recordComparer = RecordComparer; + while (iterator1.Next()) { - ThrowIfIndexIsDropped(); - if (!useSecondaryIndex) return DeleteRecordWithoutInvertedIndex(record); - using var iterator1 = ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - using var iterator2 = ZoneTree2.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - iterator2.Seek(new() - { - Record = record - }); - var recordComparer = RecordComparer; - var deletedEntries = 0L; - while (iterator2.Next()) - { - var reverseKey = iterator2.CurrentKey; - if (recordComparer.AreNotEqual(reverseKey.Record, record)) break; - var reverseKeyToken = reverseKey.Token; - iterator1.Seek(new() - { - Token = reverseKeyToken, - Record = record - }); - while (iterator1.Next()) - { - var ftkey = iterator1.CurrentKey; - if (TokenComparer.AreNotEqual(ftkey.Token, reverseKeyToken)) - break; - if (recordComparer.AreNotEqual(ftkey.Record, record)) - break; - ZoneTree1 - .ForceDelete(ftkey); - ++deletedEntries; - } - ZoneTree2?.ForceDelete(reverseKey); - } - return deletedEntries; + var key = iterator1.CurrentKey; + if (recordComparer.AreNotEqual(key.Record, record)) continue; + ZoneTree1.ForceDelete(key); + ++deletedEntries; } - - /// - /// Searches the index for records that match the specified tokens, with optional support for facets, token order respect, and pagination. - /// - /// - /// A read-only span of tokens that the records must contain. This parameter is mandatory unless facets are provided. - /// The tokens are logically grouped using "AND", meaning all tokens must be present in the matching records. - /// If both the tokens span and the facets span are empty, the result will be an empty array, as searching without tokens and facets is not supported. - /// Tokens can be empty if facets are provided; in this case, the search will be based solely on the facets. - /// To retrieve records without specific search tokens or facets, consider fetching them from the actual record source instead of using the search index. - /// - /// - /// An optional token that the search will prioritize when searching. - /// If not specified, the first token in the tokens span is used. - /// - /// - /// A boolean indicating whether the search should respect the order of tokens in the record. - /// If true, the records must contain the tokens in the specified order. - /// - /// - /// An optional read-only span of tokens that can be used to filter the search results. - /// If any facets are provided, records must contain at least one of these facet tokens to be included in the results. - /// If the span is empty or not provided, no facet filtering is applied, and all matching records are returned regardless of facet values. - /// - /// - /// The number of matching records to skip in the result set, useful for pagination. - /// Defaults to 0. - /// - /// - /// The maximum number of records to return, useful for limiting the result set size. - /// Defaults to 0, which indicates no limit. - /// - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// - /// An array of records that match the specified tokens and facets, respecting the token order if specified. - /// The array may be empty if no matching records are found. - /// - public TRecord[] SimpleSearch( - ReadOnlySpan tokens, - TToken? firstLookAt = null, - bool respectTokenOrder = true, - ReadOnlySpan facets = default, - int skip = 0, - int limit = 0, - CancellationToken cancellationToken = default) + return deletedEntries; + } + + /// + /// Deletes a record from the index, including from the secondary index if enabled. + /// + /// The record to delete. + /// The number of entries deleted. + public long DeleteRecord(TRecord record) + { + ThrowIfIndexIsDropped(); + if (!useSecondaryIndex) return DeleteRecordWithoutInvertedIndex(record); + using var iterator1 = ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + using var iterator2 = ZoneTree2.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + iterator2.Seek(new() { - return searchAlgorithm - .Search(tokens, firstLookAt, respectTokenOrder, facets, skip, limit, cancellationToken); - } - - /// - /// Performs a search based on the specified query and returns the matching records. - /// - /// The search query to execute. - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// An array of records that match the search criteria. - public TRecord[] Search( - SearchQuery query, - CancellationToken cancellationToken = default) + Record = record + }); + var recordComparer = RecordComparer; + var deletedEntries = 0L; + while (iterator2.Next()) { - return advancedSearchAlgorithm.Search(query, cancellationToken); + var reverseKey = iterator2.CurrentKey; + if (recordComparer.AreNotEqual(reverseKey.Record, record)) break; + var reverseKeyToken = reverseKey.Token; + iterator1.Seek(new() + { + Token = reverseKeyToken, + Record = record + }); + while (iterator1.Next()) + { + var ftkey = iterator1.CurrentKey; + if (TokenComparer.AreNotEqual(ftkey.Token, reverseKeyToken)) + break; + if (recordComparer.AreNotEqual(ftkey.Record, record)) + break; + ZoneTree1 + .ForceDelete(ftkey); + ++deletedEntries; + } + ZoneTree2?.ForceDelete(reverseKey); } - - /// - /// Disposes the resources used by the index. - /// - public void Dispose() + return deletedEntries; + } + + /// + /// Searches the index for records that match the specified tokens, with optional support for facets, token order respect, and pagination. + /// + /// + /// A read-only span of tokens that the records must contain. This parameter is mandatory unless facets are provided. + /// The tokens are logically grouped using "AND", meaning all tokens must be present in the matching records. + /// If both the tokens span and the facets span are empty, the result will be an empty array, as searching without tokens and facets is not supported. + /// Tokens can be empty if facets are provided; in this case, the search will be based solely on the facets. + /// To retrieve records without specific search tokens or facets, consider fetching them from the actual record source instead of using the search index. + /// + /// + /// An optional token that the search will prioritize when searching. + /// If not specified, the first token in the tokens span is used. + /// + /// + /// A boolean indicating whether the search should respect the order of tokens in the record. + /// If true, the records must contain the tokens in the specified order. + /// + /// + /// An optional read-only span of tokens that can be used to filter the search results. + /// If any facets are provided, records must contain at least one of these facet tokens to be included in the results. + /// If the span is empty or not provided, no facet filtering is applied, and all matching records are returned regardless of facet values. + /// + /// + /// The number of matching records to skip in the result set, useful for pagination. + /// Defaults to 0. + /// + /// + /// The maximum number of records to return, useful for limiting the result set size. + /// Defaults to 0, which indicates no limit. + /// + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// + /// An array of records that match the specified tokens and facets, respecting the token order if specified. + /// The array may be empty if no matching records are found. + /// + public TRecord[] SimpleSearch( + ReadOnlySpan tokens, + TToken? firstLookAt = null, + bool respectTokenOrder = true, + ReadOnlySpan facets = default, + int skip = 0, + int limit = 0, + CancellationToken cancellationToken = default) + { + return searchAlgorithm + .Search(tokens, firstLookAt, respectTokenOrder, facets, skip, limit, cancellationToken); + } + + /// + /// Performs a search based on the specified query and returns the matching records. + /// + /// The search query to execute. + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// An array of records that match the search criteria. + public TRecord[] Search( + SearchQuery query, + CancellationToken cancellationToken = default) + { + return advancedSearchAlgorithm.Search(query, cancellationToken); + } + + /// + /// Disposes the resources used by the index. + /// + public void Dispose() + { + if (isDisposed) return; + isDisposed = true; + Maintainer1.WaitForBackgroundThreads(); + Maintainer1.Dispose(); + ZoneTree1.Dispose(); + if (useSecondaryIndex) { - if (isDisposed) return; - isDisposed = true; - Maintainer1.WaitForBackgroundThreads(); - Maintainer1.Dispose(); - ZoneTree1.Dispose(); - if (useSecondaryIndex) - { - Maintainer2?.WaitForBackgroundThreads(); - Maintainer2?.Dispose(); - ZoneTree2?.Dispose(); - } + Maintainer2?.WaitForBackgroundThreads(); + Maintainer2?.Dispose(); + ZoneTree2?.Dispose(); } + } } diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs index bd4f90c..7400ff4 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs @@ -1,4 +1,4 @@ -using Tenray.ZoneTree.Comparers; +using ZoneTree.Comparers; namespace ZoneTree.FullTextSearch.Model; diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs index 7d2314e..e3194ff 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs @@ -1,4 +1,4 @@ -using Tenray.ZoneTree.Comparers; +using ZoneTree.Comparers; namespace ZoneTree.FullTextSearch.Model; diff --git a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs index 703332a..fa50b11 100644 --- a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs @@ -1,6 +1,6 @@ using System.Threading; -using Tenray.ZoneTree; -using Tenray.ZoneTree.Comparers; +using ZoneTree; +using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Index; namespace ZoneTree.FullTextSearch.Search; diff --git a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs index 4da5f0e..fbece98 100644 --- a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs @@ -1,5 +1,5 @@ -using Tenray.ZoneTree; -using Tenray.ZoneTree.Comparers; +using ZoneTree; +using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Index; namespace ZoneTree.FullTextSearch.Search; diff --git a/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs b/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs index 773ae62..8ec221f 100644 --- a/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs +++ b/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs @@ -1,6 +1,6 @@ -using Tenray.ZoneTree; +using ZoneTree; using ZoneTree.FullTextSearch.Model; -using Tenray.ZoneTree.AbstractFileStream; +using ZoneTree.AbstractFileStream; namespace ZoneTree.FullTextSearch.SearchEngines; diff --git a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs index 2206088..fdb7a60 100644 --- a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs +++ b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs @@ -1,8 +1,8 @@ using System; using System.Drawing; -using Tenray.ZoneTree; -using Tenray.ZoneTree.Comparers; -using Tenray.ZoneTree.Core; +using ZoneTree; +using ZoneTree.Comparers; +using ZoneTree.Core; using ZoneTree.FullTextSearch; using ZoneTree.FullTextSearch.Index; using ZoneTree.FullTextSearch.QueryLanguage; @@ -11,7 +11,7 @@ using ZoneTree.FullTextSearch.Hashing; using System.Security.Cryptography; using ZoneTree.FullTextSearch.Model; -using Tenray.ZoneTree.AbstractFileStream; +using ZoneTree.AbstractFileStream; namespace ZoneTree.FullTextSearch.SearchEngines; diff --git a/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs b/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs index eab35e3..9f3942a 100644 --- a/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs +++ b/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs @@ -1,5 +1,5 @@ -using Tenray.ZoneTree; -using Tenray.ZoneTree.AbstractFileStream; +using ZoneTree; +using ZoneTree.AbstractFileStream; namespace ZoneTree.FullTextSearch; diff --git a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj index 25351b8..e04ddb4 100644 --- a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj +++ b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj @@ -1,7 +1,7 @@  - net9.0;net8.0;net7.0;net6.0 + net10.0;net9.0;net8.0;net7.0;net6.0 true en-US https://github.com/koculu/ZoneTree.FullTextSearch @@ -34,11 +34,11 @@ - + all runtime; build; native; contentfiles; analyzers; buildtransitive - + From 2e3d0a03700ace3629fe7d3aaecf8f702a103db9 Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Mon, 8 Jun 2026 04:40:36 +0200 Subject: [PATCH 2/3] dotnet format repo --- .../Program.cs | 42 +- .../SearchEngineApp.cs | 600 ++--- .../ExecuteParsedQueries.cs | 2226 ++++++++--------- .../ExecuteSearchQueryTests.cs | 150 +- .../FacetTests.cs | 88 +- .../HashedSearchEngineTests.cs | 258 +- .../RecordTableTests.cs | 228 +- .../SearchQueryTests.cs | 442 ++-- .../sampleData/ProductList.cs | 62 +- .../Hashing/DefaultHashCodeGenerator.cs | 74 +- .../Hashing/IHashCodeGenerator.cs | 40 +- .../Hashing/NormalizableHashCodeGenerator.cs | 132 +- .../Index/IndexOfTokenRecordPreviousToken.cs | 2 +- .../Misc/FolderIterator.cs | 106 +- .../Model/CompositeKeyOfRecordToken.cs | 74 +- .../CompositeKeyOfRecordTokenComparer.cs | 94 +- .../CompositeKeyOfTokenRecordPrevious.cs | 90 +- ...mpositeKeyOfTokenRecordPreviousComparer.cs | 106 +- .../Model/NGramToken4.cs | 80 +- .../Model/TokenPair.cs | 72 +- .../Normalizers/DiacriticNormalizer.cs | 152 +- .../Normalizers/ICharNormalizer.cs | 14 +- .../Normalizers/IStringNormalizer.cs | 16 +- .../QueryLanguage/Parser.cs | 772 +++--- .../QueryLanguage/Token.cs | 68 +- .../QueryLanguage/TokenType.cs | 26 +- .../QueryLanguage/Tokenizer.cs | 354 +-- .../QueryLanguage/UnexpectedTokenException.cs | 22 +- ...SearchOnIndexOfTokenRecordPreviousToken.cs | 604 ++--- .../Search/HashedSearchQueryFactory.cs | 378 +-- .../Search/QueryNode.cs | 300 +-- .../Search/QueryNodeType.cs | 8 +- ...SearchOnIndexOfTokenRecordPreviousToken.cs | 468 ++-- .../Search/SearchQuery.cs | 126 +- .../SearchEngines/AdvancedZoneTreeOptions.cs | 88 +- .../SearchEngines/HashedSearchEngine.cs | 758 +++--- .../Storage/RecordTable.cs | 368 +-- .../Tokenizer/IWordTokenizer.cs | 26 +- .../Tokenizer/Slice.cs | 2 +- .../Tokenizer/SliceExtension.cs | 48 +- .../Tokenizer/WordTokenizer.cs | 320 +-- 41 files changed, 4959 insertions(+), 4925 deletions(-) diff --git a/src/ZoneTree.FullTextSearch.Playground/Program.cs b/src/ZoneTree.FullTextSearch.Playground/Program.cs index 51779e1..294fa09 100644 --- a/src/ZoneTree.FullTextSearch.Playground/Program.cs +++ b/src/ZoneTree.FullTextSearch.Playground/Program.cs @@ -1,28 +1,28 @@ -namespace ZoneTree.FullTextSearch.Playground; +namespace ZoneTree.FullTextSearch.Playground; public sealed class Program { - static void Main(string[] args) + static void Main(string[] args) + { + using var app = new SearchEngineApp(); + if (args.Length > 0) { - using var app = new SearchEngineApp(); - if (args.Length > 0) - { - switch (args[0]) - { - case "create": - { - app.CreateIndex(app.DefaultIndexPath, app.DefaultFilePattern, false); - break; - } - case "drop": - { - app.DropIndex(false); - break; - } - } - return; - } - app.Run(); + switch (args[0]) + { + case "create": + { + app.CreateIndex(app.DefaultIndexPath, app.DefaultFilePattern, false); + break; + } + case "drop": + { + app.DropIndex(false); + break; + } + } + return; } + app.Run(); + } } diff --git a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs index 0f5197d..61a2e44 100644 --- a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs +++ b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs @@ -1,4 +1,4 @@ -using System.Diagnostics; +using System.Diagnostics; using System.Runtime; using ZoneTree.FullTextSearch.SearchEngines; using ZoneTree.FullTextSearch.Normalizers; @@ -10,332 +10,332 @@ namespace ZoneTree.FullTextSearch.Playground; public sealed class SearchEngineApp : IDisposable { - readonly string DataPath = "data"; + readonly string DataPath = "data"; - public readonly string DefaultIndexPath = @"D:\code"; + public readonly string DefaultIndexPath = @"D:\code"; - public readonly string DefaultFilePattern = "*.cs"; + public readonly string DefaultFilePattern = "*.cs"; - readonly bool UseSecondaryIndex = false; + readonly bool UseSecondaryIndex = false; - readonly bool UseDiacriticNormalizer = false; + readonly bool UseDiacriticNormalizer = false; - readonly bool IndexInBackground = false; + readonly bool IndexInBackground = false; - readonly HashedSearchEngine SearchEngine; + readonly HashedSearchEngine SearchEngine; - readonly RecordTable RecordTable; + readonly RecordTable RecordTable; - public SearchEngineApp() - { - var hashGenerator = UseDiacriticNormalizer ? new NormalizableHashCodeGenerator( - new DiacriticNormalizer(), false) : null; - - SearchEngine = new HashedSearchEngine( - DataPath, - UseSecondaryIndex, - new WordTokenizer(3), - hashCodeGenerator: hashGenerator); - RecordTable = new RecordTable(DataPath); - } + public SearchEngineApp() + { + var hashGenerator = UseDiacriticNormalizer ? new NormalizableHashCodeGenerator( + new DiacriticNormalizer(), false) : null; - public void Run() - { - MainMenu(); - } + SearchEngine = new HashedSearchEngine( + DataPath, + UseSecondaryIndex, + new WordTokenizer(3), + hashCodeGenerator: hashGenerator); + RecordTable = new RecordTable(DataPath); + } + + public void Run() + { + MainMenu(); + } - void MainMenu() + void MainMenu() + { + while (true) { - while (true) + Console.Clear(); + Console.WriteLine("ZoneTree.FullTextSearch - HashedSearchEngine"); + + if (!UseSecondaryIndex) + Console.WriteLine("SecondaryIndex is disabled. Deleting records might be slow."); + + Console.WriteLine("1. Create Index"); + Console.WriteLine("2. Search"); + Console.WriteLine("3. Show Stats"); + Console.WriteLine("4. Drop Index"); + Console.WriteLine("5. Collect GC"); + Console.WriteLine("6. Exit"); + Console.Write("Select an option: "); + var input = Console.ReadLine(); + try + { + switch (input) { - Console.Clear(); - Console.WriteLine("ZoneTree.FullTextSearch - HashedSearchEngine"); - - if (!UseSecondaryIndex) - Console.WriteLine("SecondaryIndex is disabled. Deleting records might be slow."); - - Console.WriteLine("1. Create Index"); - Console.WriteLine("2. Search"); - Console.WriteLine("3. Show Stats"); - Console.WriteLine("4. Drop Index"); - Console.WriteLine("5. Collect GC"); - Console.WriteLine("6. Exit"); - Console.Write("Select an option: "); - var input = Console.ReadLine(); - try - { - switch (input) - { - case "1": - var o = ConfigureIndex(); - if (IndexInBackground) - Task.Run(() => CreateIndex(o.indexPath, o.pattern, false)); - else - CreateIndex(o.indexPath, o.pattern, true); - break; - case "2": - Search(); - break; - case "3": - ShowStats(); - break; - case "4": - DropIndex(true); - return; - case "5": - CollectGC(); - break; - case "6": - case "q": - case "Q": - return; - default: - Console.WriteLine("Invalid option. Please try again."); - break; - } - } - catch (Exception e) - { - Console.WriteLine(e.ToString()); - PressAnyKeyToContinue(); - } + case "1": + var o = ConfigureIndex(); + if (IndexInBackground) + Task.Run(() => CreateIndex(o.indexPath, o.pattern, false)); + else + CreateIndex(o.indexPath, o.pattern, true); + break; + case "2": + Search(); + break; + case "3": + ShowStats(); + break; + case "4": + DropIndex(true); + return; + case "5": + CollectGC(); + break; + case "6": + case "q": + case "Q": + return; + default: + Console.WriteLine("Invalid option. Please try again."); + break; } - } - - static void ShowMemoryUsage(string label) - { - var currentProcess = Process.GetCurrentProcess(); - var physicalMemory = currentProcess.WorkingSet64 / (1024 * 1024); - var virtualMemory = currentProcess.PrivateMemorySize64 / (1024 * 1024); - var pagedMemory = currentProcess.PagedMemorySize64 / (1024 * 1024); - var peakPagedMemorySize = currentProcess.PeakPagedMemorySize64 / (1024 * 1024); - var gcMemory = GC.GetTotalMemory(forceFullCollection: false) / (1024 * 1024); - - var separator = "+----------------------------------+--------------+"; - var header = "| Metric | Value (MB) |"; - - Console.WriteLine(separator); - Console.WriteLine($"| {label.PadRight(32)}| |"); - Console.WriteLine(separator); - Console.WriteLine(header); - Console.WriteLine(separator); - Console.WriteLine($"| Total Physical Memory Usage | {physicalMemory,12} |"); - Console.WriteLine($"| Total Virtual Memory Usage | {virtualMemory,12} |"); - Console.WriteLine($"| Paged Memory Size | {pagedMemory,12} |"); - Console.WriteLine($"| Peak Paged Memory Size | {peakPagedMemorySize,12} |"); - Console.WriteLine($"| Total GC Memory | {gcMemory,12} |"); - Console.WriteLine(separator); - Console.WriteLine(); - } - - static void CollectGC() - { - ShowMemoryUsage("Before GC"); - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - ShowMemoryUsage("Before LOH GC"); - GCSettings.LargeObjectHeapCompactionMode = - GCLargeObjectHeapCompactionMode.CompactOnce; - GC.Collect(); - ShowMemoryUsage("After LOH GC"); + } + catch (Exception e) + { + Console.WriteLine(e.ToString()); PressAnyKeyToContinue(); + } } - - static void PressAnyKeyToContinue() + } + + static void ShowMemoryUsage(string label) + { + var currentProcess = Process.GetCurrentProcess(); + var physicalMemory = currentProcess.WorkingSet64 / (1024 * 1024); + var virtualMemory = currentProcess.PrivateMemorySize64 / (1024 * 1024); + var pagedMemory = currentProcess.PagedMemorySize64 / (1024 * 1024); + var peakPagedMemorySize = currentProcess.PeakPagedMemorySize64 / (1024 * 1024); + var gcMemory = GC.GetTotalMemory(forceFullCollection: false) / (1024 * 1024); + + var separator = "+----------------------------------+--------------+"; + var header = "| Metric | Value (MB) |"; + + Console.WriteLine(separator); + Console.WriteLine($"| {label.PadRight(32)}| |"); + Console.WriteLine(separator); + Console.WriteLine(header); + Console.WriteLine(separator); + Console.WriteLine($"| Total Physical Memory Usage | {physicalMemory,12} |"); + Console.WriteLine($"| Total Virtual Memory Usage | {virtualMemory,12} |"); + Console.WriteLine($"| Paged Memory Size | {pagedMemory,12} |"); + Console.WriteLine($"| Peak Paged Memory Size | {peakPagedMemorySize,12} |"); + Console.WriteLine($"| Total GC Memory | {gcMemory,12} |"); + Console.WriteLine(separator); + Console.WriteLine(); + } + + static void CollectGC() + { + ShowMemoryUsage("Before GC"); + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + ShowMemoryUsage("Before LOH GC"); + GCSettings.LargeObjectHeapCompactionMode = + GCLargeObjectHeapCompactionMode.CompactOnce; + GC.Collect(); + ShowMemoryUsage("After LOH GC"); + PressAnyKeyToContinue(); + } + + static void PressAnyKeyToContinue() + { + Console.WriteLine("Press any key to continue..."); + Console.ReadKey(); + } + + public void DropIndex(bool isInteractive) + { + RecordTable.Drop(); + SearchEngine.Drop(); + if (isInteractive) { - Console.WriteLine("Press any key to continue..."); - Console.ReadKey(); + Console.WriteLine("Dropped the index. Press any key to continue..."); + Console.ReadKey(); } - - public void DropIndex(bool isInteractive) + } + + void ShowStats() + { + Console.WriteLine("Counting tokens and records..."); + var sw = Stopwatch.StartNew(); + var tokenCount = SearchEngine.Index.ZoneTree1.Count(); + var recordCount = RecordTable.ZoneTree1.Count(); + var elapsedMilliseconds = sw.ElapsedMilliseconds; + + // Prepare the data + string[] headers = { "Metric", "Value" }; + string[] tokenCountRow = { "Token Count", tokenCount.ToString() }; + string[] recordCountRow = { "Record Count", recordCount.ToString() }; + string[] elapsedTimeRow = { "Elapsed Time (ms)", elapsedMilliseconds.ToString() }; + + // Determine the width of each column + int columnWidth = Math.Max(headers[0].Length, Math.Max(tokenCountRow[0].Length, Math.Max(recordCountRow[0].Length, elapsedTimeRow[0].Length))) + 2; + int valueWidth = Math.Max(headers[1].Length, Math.Max(tokenCountRow[1].Length, Math.Max(recordCountRow[1].Length, elapsedTimeRow[1].Length))) + 2; + + // Print the table + string separator = $"+{new string('-', columnWidth)}+{new string('-', valueWidth)}+"; + + Console.Clear(); + Console.WriteLine(separator); + Console.WriteLine($"| {"Metric".PadRight(columnWidth - 1)}| {"Value".PadRight(valueWidth - 1)}|"); + Console.WriteLine(separator); + Console.WriteLine($"| {tokenCountRow[0].PadRight(columnWidth - 1)}| {tokenCountRow[1].PadRight(valueWidth - 1)}|"); + Console.WriteLine($"| {recordCountRow[0].PadRight(columnWidth - 1)}| {recordCountRow[1].PadRight(valueWidth - 1)}|"); + Console.WriteLine($"| {elapsedTimeRow[0].PadRight(columnWidth - 1)}| {elapsedTimeRow[1].PadRight(valueWidth - 1)}|"); + Console.WriteLine(separator); + + PressAnyKeyToContinue(); + } + + public (string indexPath, string pattern) ConfigureIndex() + { + Console.WriteLine($"Enter path to index (default: {DefaultIndexPath}):"); + var indexPath = Console.ReadLine(); + if (string.IsNullOrEmpty(indexPath)) indexPath = DefaultIndexPath; + Console.WriteLine("Index path:" + indexPath); + Console.WriteLine($"Enter pattern to index (default: {DefaultFilePattern}):"); + var pattern = Console.ReadLine(); + if (string.IsNullOrEmpty(pattern)) pattern = DefaultFilePattern; + return (indexPath, pattern); + } + + public void CreateIndex(string indexPath, string pattern, bool isInteractive) + { + var sw = Stopwatch.StartNew(); + var folderIterator = new FolderIterator(indexPath, pattern, true); + var nextRecord = RecordTable.GetLastRecord() ?? 1; + Console.WriteLine("nextRecord: " + nextRecord); + var totalRecordUpserted = 0; + + var cancellationTokenSource = new CancellationTokenSource(); + var task = Task.Run(() => { - RecordTable.Drop(); - SearchEngine.Drop(); - if (isInteractive) - { - Console.WriteLine("Dropped the index. Press any key to continue..."); - Console.ReadKey(); - } - } - - void ShowStats() - { - Console.WriteLine("Counting tokens and records..."); - var sw = Stopwatch.StartNew(); - var tokenCount = SearchEngine.Index.ZoneTree1.Count(); - var recordCount = RecordTable.ZoneTree1.Count(); - var elapsedMilliseconds = sw.ElapsedMilliseconds; - - // Prepare the data - string[] headers = { "Metric", "Value" }; - string[] tokenCountRow = { "Token Count", tokenCount.ToString() }; - string[] recordCountRow = { "Record Count", recordCount.ToString() }; - string[] elapsedTimeRow = { "Elapsed Time (ms)", elapsedMilliseconds.ToString() }; - - // Determine the width of each column - int columnWidth = Math.Max(headers[0].Length, Math.Max(tokenCountRow[0].Length, Math.Max(recordCountRow[0].Length, elapsedTimeRow[0].Length))) + 2; - int valueWidth = Math.Max(headers[1].Length, Math.Max(tokenCountRow[1].Length, Math.Max(recordCountRow[1].Length, elapsedTimeRow[1].Length))) + 2; - - // Print the table - string separator = $"+{new string('-', columnWidth)}+{new string('-', valueWidth)}+"; - - Console.Clear(); - Console.WriteLine(separator); - Console.WriteLine($"| {"Metric".PadRight(columnWidth - 1)}| {"Value".PadRight(valueWidth - 1)}|"); - Console.WriteLine(separator); - Console.WriteLine($"| {tokenCountRow[0].PadRight(columnWidth - 1)}| {tokenCountRow[1].PadRight(valueWidth - 1)}|"); - Console.WriteLine($"| {recordCountRow[0].PadRight(columnWidth - 1)}| {recordCountRow[1].PadRight(valueWidth - 1)}|"); - Console.WriteLine($"| {elapsedTimeRow[0].PadRight(columnWidth - 1)}| {elapsedTimeRow[1].PadRight(valueWidth - 1)}|"); - Console.WriteLine(separator); - - PressAnyKeyToContinue(); - } - - public (string indexPath, string pattern) ConfigureIndex() + var iteratorTask = folderIterator.IterateAll( + (path) => + { + if (cancellationTokenSource.IsCancellationRequested) + return Task.CompletedTask; + return Task.Run(async () => + { + try + { + if (cancellationTokenSource.IsCancellationRequested) return; + if (!RecordTable.TryGetRecord(path, out var record)) + record = Interlocked.Increment(ref nextRecord); + + var text = await File.ReadAllTextAsync(path); + RecordTable.UpsertRecord(record, path); + SearchEngine.AddRecord(record, text); + Interlocked.Increment(ref totalRecordUpserted); + } + catch (Exception ex) + { + Console.WriteLine(ex.ToString()); + throw; + } + }); + }, + cancellationTokenSource.Token); + iteratorTask.Wait(); + sw.Stop(); + Console.WriteLine($"Created {totalRecordUpserted} records in: " + sw.ElapsedMilliseconds + " ms"); + + sw.Restart(); + SearchEngine.Index.EvictToDisk(); + RecordTable.EvictToDisk(); + Console.WriteLine("Waiting for background threads..."); + SearchEngine.Index.WaitForBackgroundThreads(); + RecordTable.WaitForBackgroundThreads(); + Console.WriteLine("Merging completed in: " + sw.ElapsedMilliseconds + " ms"); + if (cancellationTokenSource.IsCancellationRequested && isInteractive) + { + Console.WriteLine("Press any key to return to the main menu..."); + Console.ReadKey(); + } + }); + Console.WriteLine("Creating the index..."); + if (isInteractive) { - Console.WriteLine($"Enter path to index (default: {DefaultIndexPath}):"); - var indexPath = Console.ReadLine(); - if (string.IsNullOrEmpty(indexPath)) indexPath = DefaultIndexPath; - Console.WriteLine("Index path:" + indexPath); - Console.WriteLine($"Enter pattern to index (default: {DefaultFilePattern}):"); - var pattern = Console.ReadLine(); - if (string.IsNullOrEmpty(pattern)) pattern = DefaultFilePattern; - return (indexPath, pattern); + Console.WriteLine("Press any key to quit the index creation..."); + Console.ReadKey(); + cancellationTokenSource.Cancel(); } + task.Wait(); + } - public void CreateIndex(string indexPath, string pattern, bool isInteractive) + void Search() + { + while (true) { - var sw = Stopwatch.StartNew(); - var folderIterator = new FolderIterator(indexPath, pattern, true); - var nextRecord = RecordTable.GetLastRecord() ?? 1; - Console.WriteLine("nextRecord: " + nextRecord); - var totalRecordUpserted = 0; - - var cancellationTokenSource = new CancellationTokenSource(); - var task = Task.Run(() => + Console.Clear(); + Console.WriteLine("Start with '[DEL]' to delete the search results."); + Console.WriteLine("Enter search query (or 'q' to return to main menu):"); + var text = Console.ReadLine(); + if (text.Equals("q", StringComparison.InvariantCultureIgnoreCase)) break; + + var isDeleteRequest = false; + if (text.StartsWith("[DEL]", StringComparison.InvariantCultureIgnoreCase)) + { + text = text.Substring(5); + isDeleteRequest = true; + } + + var sw = Stopwatch.StartNew(); + var pageLimit = 100; + var result = SearchEngine.Search(text, 0, pageLimit); + var elapsed = sw.ElapsedMilliseconds; + Console.WriteLine($"Found {result.Length} records in {elapsed} ms. (Search limited to {pageLimit} records.)"); + + if (isDeleteRequest) + { + sw.Restart(); + var sum = 0L; + Parallel.ForEach(result, record => { - var iteratorTask = folderIterator.IterateAll( - (path) => - { - if (cancellationTokenSource.IsCancellationRequested) - return Task.CompletedTask; - return Task.Run(async () => - { - try - { - if (cancellationTokenSource.IsCancellationRequested) return; - if (!RecordTable.TryGetRecord(path, out var record)) - record = Interlocked.Increment(ref nextRecord); - - var text = await File.ReadAllTextAsync(path); - RecordTable.UpsertRecord(record, path); - SearchEngine.AddRecord(record, text); - Interlocked.Increment(ref totalRecordUpserted); - } - catch (Exception ex) - { - Console.WriteLine(ex.ToString()); - throw; - } - }); - }, - cancellationTokenSource.Token); - iteratorTask.Wait(); - sw.Stop(); - Console.WriteLine($"Created {totalRecordUpserted} records in: " + sw.ElapsedMilliseconds + " ms"); - - sw.Restart(); - SearchEngine.Index.EvictToDisk(); - RecordTable.EvictToDisk(); - Console.WriteLine("Waiting for background threads..."); - SearchEngine.Index.WaitForBackgroundThreads(); - RecordTable.WaitForBackgroundThreads(); - Console.WriteLine("Merging completed in: " + sw.ElapsedMilliseconds + " ms"); - if (cancellationTokenSource.IsCancellationRequested && isInteractive) - { - Console.WriteLine("Press any key to return to the main menu..."); - Console.ReadKey(); - } + var a = SearchEngine.DeleteRecord(record); + Interlocked.Add(ref sum, a); }); - Console.WriteLine("Creating the index..."); - if (isInteractive) + elapsed = sw.ElapsedMilliseconds; + Console.WriteLine($"Deleted {result.Length} / ({sum}) records in {elapsed} ms."); + } + + var i = 1; + var continueLoop = false; + foreach (var record in result) + { + RecordTable.TryGetValue(record, out var path); + Console.WriteLine($"{i}. {path}"); + + if (i % 10 == 0) { - Console.WriteLine("Press any key to quit the index creation..."); - Console.ReadKey(); - cancellationTokenSource.Cancel(); + Console.WriteLine("Press 'Enter' to continue viewing the next set of records, or 'q' to return to the search."); + var key = Console.ReadKey().KeyChar; + if (key == 'q' || key == 'Q') + { + continueLoop = true; + break; + } } - task.Wait(); - } - - void Search() - { - while (true) - { - Console.Clear(); - Console.WriteLine("Start with '[DEL]' to delete the search results."); - Console.WriteLine("Enter search query (or 'q' to return to main menu):"); - var text = Console.ReadLine(); - if (text.Equals("q", StringComparison.InvariantCultureIgnoreCase)) break; - - var isDeleteRequest = false; - if (text.StartsWith("[DEL]", StringComparison.InvariantCultureIgnoreCase)) - { - text = text.Substring(5); - isDeleteRequest = true; - } - - var sw = Stopwatch.StartNew(); - var pageLimit = 100; - var result = SearchEngine.Search(text, 0, pageLimit); - var elapsed = sw.ElapsedMilliseconds; - Console.WriteLine($"Found {result.Length} records in {elapsed} ms. (Search limited to {pageLimit} records.)"); - - if (isDeleteRequest) - { - sw.Restart(); - var sum = 0L; - Parallel.ForEach(result, record => - { - var a = SearchEngine.DeleteRecord(record); - Interlocked.Add(ref sum, a); - }); - elapsed = sw.ElapsedMilliseconds; - Console.WriteLine($"Deleted {result.Length} / ({sum}) records in {elapsed} ms."); - } - - var i = 1; - var continueLoop = false; - foreach (var record in result) - { - RecordTable.TryGetValue(record, out var path); - Console.WriteLine($"{i}. {path}"); - - if (i % 10 == 0) - { - Console.WriteLine("Press 'Enter' to continue viewing the next set of records, or 'q' to return to the search."); - var key = Console.ReadKey().KeyChar; - if (key == 'q' || key == 'Q') - { - continueLoop = true; - break; - } - } - ++i; - } - if (continueLoop) continue; - - Console.WriteLine("End of results. Press any key to perform another search or 'q' to return to the main menu..."); - if (Console.ReadKey().KeyChar == 'q') break; - } - } + ++i; + } + if (continueLoop) continue; - public void Dispose() - { - if (!SearchEngine.Index.IsIndexDropped) - SearchEngine.Dispose(); - if (!RecordTable.IsDropped) - RecordTable.Dispose(); + Console.WriteLine("End of results. Press any key to perform another search or 'q' to return to the main menu..."); + if (Console.ReadKey().KeyChar == 'q') break; } + } + + public void Dispose() + { + if (!SearchEngine.Index.IsIndexDropped) + SearchEngine.Dispose(); + if (!RecordTable.IsDropped) + RecordTable.Dispose(); + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/ExecuteParsedQueries.cs b/src/ZoneTree.FullTextSearch.UnitTests/ExecuteParsedQueries.cs index ee3bac1..a4c4f9e 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/ExecuteParsedQueries.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/ExecuteParsedQueries.cs @@ -1,4 +1,4 @@ -using ZoneTree.FullTextSearch.QueryLanguage; +using ZoneTree.FullTextSearch.QueryLanguage; using ZoneTree.FullTextSearch.SearchEngines; using ZoneTree.FullTextSearch.Tokenizer; @@ -6,1115 +6,1115 @@ namespace ZoneTree.FullTextSearch.UnitTests; public sealed class ExecuteParsedQueries { - [Test] - public void SimpleQueries() - { - var dataPath = "data/SingleTokenAndQuery2"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "fox"); - searchEngine.AddRecord(2, "fox cow cat"); - searchEngine.AddRecord(3, "fox cat cow"); - searchEngine.AddFacet(3, "category", "red"); - - var parser = new Parser("(cat OR cow) AND NOT category:tear"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - - parser = new Parser("cat cow AND NOT category:red"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2 })); - - parser = new Parser("'cat cow' AND NOT category:red"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { })); - - parser = new Parser("'cat cow' AND NOT category:blue"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - - parser = new Parser("\"cat cow\" AND NOT category:\"blue\""); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - - parser = new Parser("\"cat cow\" AND NOT category:\'blue\'"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - } - - [Test] - public void PhraseQueryTest() - { - var dataPath = "data/PhraseQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "quick brown fox"); - searchEngine.AddRecord(2, "lazy dog"); - searchEngine.AddRecord(3, "quick brown cat"); - - var parser = new Parser("\"quick brown fox\""); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - - parser = new Parser("'quick brown fox'"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void FacetQueryTest() - { - var dataPath = "data/FacetQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "electronics"); - searchEngine.AddRecord(2, "electronics Samsung"); - searchEngine.AddFacet(2, "category", "electronics"); - - var parser = new Parser("category:electronics"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2 })); - } - - [Test] - public void FacetInQueryTest() - { - var dataPath = "data/FacetInQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "electronics"); - searchEngine.AddRecord(2, "books"); - searchEngine.AddRecord(3, "furniture"); - searchEngine.AddFacet(1, "category", "electronics"); - searchEngine.AddFacet(2, "category", "books"); - searchEngine.AddFacet(3, "category", "furniture"); - - var parser = new Parser("category IN [\"books\", \"electronics\"]"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void NotInQueryTest() - { - var dataPath = "data/NotInQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "fox"); - searchEngine.AddRecord(2, "lazy dog"); - searchEngine.AddRecord(3, "cat"); - - var parser = new Parser("NOT IN [\"lazy dog\", \"cat\"]"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void ComplexAndOrQueryTest() - { - var dataPath = "data/ComplexAndOrQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat dog"); - searchEngine.AddRecord(2, "cat fox"); - searchEngine.AddRecord(3, "dog fox"); - - var parser = new Parser("cat AND (dog OR fox)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void NegationQueryWithAndTest() - { - var dataPath = "data/NegationQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "black cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "fox"); - - var parser = new Parser("(dog or fox or black) and NOT cat"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - } - - [Test] - public void NegationQueryTest() - { - var dataPath = "data/NegationQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "black cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "fox"); - - var parser = new Parser("NOT cat"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - - parser = new Parser("NOT cat or abc"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - } - - [Test] - public void FacetWithAndQueryTest() - { - var dataPath = "data/FacetWithAndQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "books"); - searchEngine.AddFacet(1, "author", "F. Scott Fitzgerald"); - searchEngine.AddRecord(2, "books"); - searchEngine.AddFacet(2, "author", "Bar"); - - var parser = new Parser("books AND author:\"F. Scott Fitzgerald\""); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void FacetWithOrQueryTest() - { - var dataPath = "data/FacetWithOrQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "books"); - searchEngine.AddFacet(1, "author", "J.K. Rowling"); - searchEngine.AddFacet(1, "category", "books"); - searchEngine.AddRecord(2, "electronics"); - searchEngine.AddFacet(2, "brand", "Sony"); - searchEngine.AddFacet(2, "category", "electronics"); - - var parser = new Parser("(category:books OR category:electronics) AND (author:\"J.K. Rowling\" OR brand:Sony)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void FacetNotInQueryTest() - { - var dataPath = "data/FacetNotInQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "books"); - searchEngine.AddFacet(1, "category", "books"); - searchEngine.AddRecord(2, "electronics"); - searchEngine.AddFacet(2, "category", "electronics"); - searchEngine.AddRecord(3, "furniture"); - searchEngine.AddFacet(3, "category", "furniture"); - - var parser = new Parser("category NOT IN [\"books\", \"furniture\"]"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2 })); - } - - [Test] - public void InKeywordListQueryTest() - { - var dataPath = "data/InKeywordListQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "fox"); - searchEngine.AddRecord(4, "cow"); - - var parser = new Parser("IN [\"cat\", \"dog\", \"fox\"]"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - } - - [Test] - public void FacetExpressionWithNotInTest() - { - var dataPath = "data/FacetExpressionWithNotInTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "Samsung"); - searchEngine.AddFacet(1, "brand", "Samsung"); - searchEngine.AddRecord(2, "Nokia"); - searchEngine.AddFacet(2, "brand", "Nokia"); - searchEngine.AddRecord(3, "Sony"); - searchEngine.AddFacet(3, "brand", "Sony"); - - var parser = new Parser("NOT brand IN [\"Samsung\", \"Nokia\"]"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - } - - [Test] - public void MultipleFacetsInComplexQueryTest() - { - var dataPath = "data/MultipleFacetsInComplexQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "Harry Potter"); - searchEngine.AddFacet(1, "author", "J.K. Rowling"); - searchEngine.AddFacet(1, "category", "books"); - searchEngine.AddRecord(2, "Sony TV"); - searchEngine.AddFacet(2, "brand", "Sony"); - searchEngine.AddFacet(2, "category", "electronics"); - searchEngine.AddRecord(3, "Samsung Galaxy"); - searchEngine.AddFacet(3, "brand", "Samsung"); - searchEngine.AddFacet(3, "category", "electronics"); - - var parser = new Parser("(category:books OR category:electronics) AND (author:\"J.K. Rowling\" OR brand:Sony)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void SimpleKeywordQueryTest() - { - var dataPath = "data/SimpleKeywordQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "cat dog"); - searchEngine.AddRecord(4, "fox"); - - var parser = new Parser("cat dog"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - } - - [Test] - public void SimpleFacetQueryTest() - { - var dataPath = "data/SimpleFacetQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "electronics"); - searchEngine.AddFacet(1, "category", "electronics"); - searchEngine.AddRecord(2, "appliances"); - searchEngine.AddFacet(2, "category", "appliances"); - - var parser = new Parser("category:electronics"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void MultipleFacetExpressionsTest() - { - var dataPath = "data/MultipleFacetExpressionsTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "electronics"); - searchEngine.AddFacet(1, "category", "electronics"); - searchEngine.AddFacet(1, "brand", "Samsung"); - searchEngine.AddRecord(2, "electronics"); - searchEngine.AddFacet(2, "category", "electronics"); - searchEngine.AddFacet(2, "brand", "Sony"); - searchEngine.AddRecord(3, "device"); - searchEngine.AddFacet(2, "category", "analog"); - searchEngine.AddFacet(2, "brand", "Dell"); - - var parser = new Parser("category:electronics brand:Samsung"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void FacetWithPhraseTest() - { - var dataPath = "data/FacetWithPhraseTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "The Great Gatsby"); - searchEngine.AddFacet(1, "title", "The Great Gatsby"); - searchEngine.AddRecord(2, "To Kill a Mockingbird"); - searchEngine.AddFacet(2, "title", "To Kill a Mockingbird"); - - var parser = new Parser("title:\"The Great Gatsby\""); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void FacetInWithAndQueryTest() - { - var dataPath = "data/FacetInWithAndQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "Samsung"); - searchEngine.AddFacet(1, "brand", "Samsung"); - searchEngine.AddRecord(2, "Sony"); - searchEngine.AddFacet(2, "brand", "Sony"); - searchEngine.AddRecord(3, "Samsung"); - searchEngine.AddFacet(3, "brand", "Samsung"); - searchEngine.AddFacet(3, "category", "electronics"); - - var parser = new Parser("brand IN [\"Samsung\", \"Sony\"] AND category:electronics"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - } - - [Test] - public void EmptyQueryTest() - { - var dataPath = "data/CombinedAndQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "cat dog"); - searchEngine.AddRecord(4, "fox"); - - var parser = new Parser(""); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(Array.Empty())); - } - - [Test] - public void CombinedAndQueryTest() - { - var dataPath = "data/CombinedAndQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "cat dog"); - searchEngine.AddRecord(4, "fox"); - - var parser = new Parser("cat AND dog"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3 })); - } - - [Test] - public void CombinedOrQueryTest() - { - var dataPath = "data/CombinedOrQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "cat dog"); - searchEngine.AddRecord(4, "fox"); - - var parser = new Parser("cat OR dog"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - } - - [Test] - public void NegationWithOrQueryTest() - { - var dataPath = "data/NegationWithOrQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "fox"); - - var parser = new Parser("NOT cat OR dog"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - } - - [Test] - public void ComplexAndOrQueryWithNegationTest() - { - var dataPath = "data/ComplexAndOrQueryWithNegationTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat"); - searchEngine.AddRecord(2, "dog"); - searchEngine.AddRecord(3, "lazy dog"); - searchEngine.AddRecord(4, "fox"); - - var parser = new Parser("(cat OR dog) AND (NOT \"lazy dog\" OR fox)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void FacetWithNotInQueryTest() - { - var dataPath = "data/FacetWithNotInQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "electronics"); - searchEngine.AddFacet(1, "category", "electronics"); - searchEngine.AddFacet(1, "brand", "Samsung"); - searchEngine.AddRecord(2, "books"); - searchEngine.AddFacet(2, "category", "books"); - searchEngine.AddFacet(2, "brand", "Sony"); - - var parser = new Parser("NOT category IN [\"books\", \"furniture\"] AND brand:Samsung"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void MultipleKeywordsWithoutOperatorsTest() - { - var dataPath = "data/MultipleKeywordsWithoutOperatorsTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "quick"); - searchEngine.AddRecord(2, "brown"); - searchEngine.AddRecord(3, "fox"); - searchEngine.AddRecord(4, "quick brown fox"); - - var parser = new Parser("quick brown fox"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 4 })); - } - - [Test] - public void ComplexMultiFacetAndOrQueryTest() - { - var dataPath = "data/ComplexMultiFacetAndOrQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding multiple records with complex facets - searchEngine.AddRecord(1, "Harry Potter and the Sorcerer's Stone"); - searchEngine.AddFacet(1, "author", "J.K. Rowling"); - searchEngine.AddFacet(1, "category", "books"); - searchEngine.AddFacet(1, "publisher", "Bloomsbury"); - - searchEngine.AddRecord(2, "Harry Potter and the Chamber of Secrets"); - searchEngine.AddFacet(2, "author", "J.K. Rowling"); - searchEngine.AddFacet(2, "category", "books"); - searchEngine.AddFacet(2, "publisher", "Scholastic"); - - searchEngine.AddRecord(3, "The Fellowship of the Ring"); - searchEngine.AddFacet(3, "author", "J.R.R. Tolkien"); - searchEngine.AddFacet(3, "category", "books"); - searchEngine.AddFacet(3, "publisher", "Allen & Unwin"); - - searchEngine.AddRecord(4, "The Two Towers"); - searchEngine.AddFacet(4, "author", "J.R.R. Tolkien"); - searchEngine.AddFacet(4, "category", "books"); - searchEngine.AddFacet(4, "publisher", "Allen & Unwin"); - - searchEngine.AddRecord(5, "The Return of the King"); - searchEngine.AddFacet(5, "author", "J.R.R. Tolkien"); - searchEngine.AddFacet(5, "category", "books"); - searchEngine.AddFacet(5, "publisher", "Allen & Unwin"); - - searchEngine.AddRecord(6, "The Hobbit"); - searchEngine.AddFacet(6, "author", "J.R.R. Tolkien"); - searchEngine.AddFacet(6, "category", "books"); - searchEngine.AddFacet(6, "publisher", "George Allen & Unwin"); - - searchEngine.AddRecord(7, "The Hobbit: An Unexpected Journey"); - searchEngine.AddFacet(7, "author", "Peter Jackson"); - searchEngine.AddFacet(7, "category", "movies"); - - // Complex query combining multiple facets and conditions - var parser = new Parser("(author:\"J.K. Rowling\" AND publisher:Scholastic) OR (author:\"J.R.R. Tolkien\" AND publisher:\"Allen & Unwin\")"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3, 4, 5 })); - } - - [Test] - public void MultiLayeredNestedQueryTest() - { - var dataPath = "data/MultiLayeredNestedQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding data with multiple categories and facets - searchEngine.AddRecord(1, "MacBook Pro"); - searchEngine.AddFacet(1, "brand", "Apple"); - searchEngine.AddFacet(1, "category", "laptops"); - searchEngine.AddFacet(1, "processor", "M1"); - - searchEngine.AddRecord(2, "MacBook Air"); - searchEngine.AddFacet(2, "brand", "Apple"); - searchEngine.AddFacet(2, "category", "laptops"); - searchEngine.AddFacet(2, "processor", "M1"); - - searchEngine.AddRecord(3, "Surface Laptop 4"); - searchEngine.AddFacet(3, "brand", "Microsoft"); - searchEngine.AddFacet(3, "category", "laptops"); - searchEngine.AddFacet(3, "processor", "Intel"); - - searchEngine.AddRecord(4, "Dell XPS 13"); - searchEngine.AddFacet(4, "brand", "Dell"); - searchEngine.AddFacet(4, "category", "laptops"); - searchEngine.AddFacet(4, "processor", "Intel"); - - searchEngine.AddRecord(5, "iPhone 12"); - searchEngine.AddFacet(5, "brand", "Apple"); - searchEngine.AddFacet(5, "category", "smartphones"); - searchEngine.AddFacet(5, "processor", "A14"); - - searchEngine.AddRecord(6, "Galaxy S21"); - searchEngine.AddFacet(6, "brand", "Samsung"); - searchEngine.AddFacet(6, "category", "smartphones"); - searchEngine.AddFacet(6, "processor", "Exynos"); - - searchEngine.AddRecord(7, "Surface Pro 7"); - searchEngine.AddFacet(7, "brand", "Microsoft"); - searchEngine.AddFacet(7, "category", "tablets"); - searchEngine.AddFacet(7, "processor", "Intel"); - - // Multi-layered nested query combining facets, categories, and processors - var parser = new Parser("((brand:Apple AND category:laptops AND processor:M1) OR (brand:Microsoft AND category:laptops)) AND NOT brand:Dell"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - } - - [Test] - public void QueryWithMultipleNegationsTest() - { - var dataPath = "data/QueryWithMultipleNegationsTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding more records with different categories and facets - searchEngine.AddRecord(1, "Harry Potter and the Philosopher's Stone"); - searchEngine.AddFacet(1, "author", "J.K. Rowling"); - searchEngine.AddFacet(1, "category", "books"); - searchEngine.AddFacet(1, "publisher", "Bloomsbury"); - - searchEngine.AddRecord(2, "Harry Potter and the Chamber of Secrets"); - searchEngine.AddFacet(2, "author", "J.K. Rowling"); - searchEngine.AddFacet(2, "category", "books"); - searchEngine.AddFacet(2, "publisher", "Scholastic"); - - searchEngine.AddRecord(3, "The Hobbit"); - searchEngine.AddFacet(3, "author", "J.R.R. Tolkien"); - searchEngine.AddFacet(3, "category", "books"); - searchEngine.AddFacet(3, "publisher", "Allen & Unwin"); - - searchEngine.AddRecord(4, "The Hobbit: An Unexpected Journey"); - searchEngine.AddFacet(4, "author", "Peter Jackson"); - searchEngine.AddFacet(4, "category", "movies"); - - searchEngine.AddRecord(5, "Inception"); - searchEngine.AddFacet(5, "director", "Christopher Nolan"); - searchEngine.AddFacet(5, "category", "movies"); - - searchEngine.AddRecord(6, "Interstellar"); - searchEngine.AddFacet(6, "director", "Christopher Nolan"); - searchEngine.AddFacet(6, "category", "movies"); - - searchEngine.AddRecord(7, "Dunkirk"); - searchEngine.AddFacet(7, "director", "Christopher Nolan"); - searchEngine.AddFacet(7, "category", "movies"); - - // Complex query with multiple negations and AND/OR combinations - var parser = new Parser("(category:books AND NOT author:\"J.K. Rowling\") OR (category:movies AND NOT director:\"Christopher Nolan\")"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3, 4 })); - } - - [Test] - public void DeeplyNestedComplexQueryTest() - { - var dataPath = "data/DeeplyNestedComplexQueryTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding a variety of records with deeply nested facets and categories - searchEngine.AddRecord(1, "iPhone 13"); - searchEngine.AddFacet(1, "brand", "Apple"); - searchEngine.AddFacet(1, "category", "smartphones"); - searchEngine.AddFacet(1, "processor", "A15"); - - searchEngine.AddRecord(2, "iPhone 12"); - searchEngine.AddFacet(2, "brand", "Apple"); - searchEngine.AddFacet(2, "category", "smartphones"); - searchEngine.AddFacet(2, "processor", "A14"); - - searchEngine.AddRecord(3, "MacBook Air"); - searchEngine.AddFacet(3, "brand", "Apple"); - searchEngine.AddFacet(3, "category", "laptops"); - searchEngine.AddFacet(3, "processor", "M1"); - - searchEngine.AddRecord(4, "Galaxy S21"); - searchEngine.AddFacet(4, "brand", "Samsung"); - searchEngine.AddFacet(4, "category", "smartphones"); - searchEngine.AddFacet(4, "processor", "Exynos"); - - searchEngine.AddRecord(5, "Surface Laptop 4"); - searchEngine.AddFacet(5, "brand", "Microsoft"); - searchEngine.AddFacet(5, "category", "laptops"); - searchEngine.AddFacet(5, "processor", "Intel"); - - searchEngine.AddRecord(6, "Surface Pro 7"); - searchEngine.AddFacet(6, "brand", "Microsoft"); - searchEngine.AddFacet(6, "category", "tablets"); - searchEngine.AddFacet(6, "processor", "Intel"); - - searchEngine.AddRecord(7, "Dell XPS 13"); - searchEngine.AddFacet(7, "brand", "Dell"); - searchEngine.AddFacet(7, "category", "laptops"); - searchEngine.AddFacet(7, "processor", "Intel"); - - searchEngine.AddRecord(8, "ThinkPad X1 Carbon"); - searchEngine.AddFacet(8, "brand", "Lenovo"); - searchEngine.AddFacet(8, "category", "laptops"); - searchEngine.AddFacet(8, "processor", "Intel"); - - // Deeply nested query combining AND, OR, and NOT across different categories and facets - var parser = new Parser("((brand:Apple AND category:smartphones) OR " + - "(brand:Microsoft AND category:laptops)) AND " + - "(processor:Intel OR NOT category:tablets)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 5 })); - } - - [Test] - public void OperatorPrecedenceBetweenAndOrTest() - { - var dataPath = "data/OperatorPrecedenceBetweenAndOrTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding records - searchEngine.AddRecord(1, "cat dog fox"); - searchEngine.AddRecord(2, "cat fox"); - searchEngine.AddRecord(3, "dog fox"); - searchEngine.AddRecord(4, "dog"); - searchEngine.AddRecord(5, "fox"); - searchEngine.AddRecord(6, "cat"); - - // Query: cat AND dog OR fox - // Expected interpretation: (cat AND dog) OR fox - // Matches records: 1, 2, 3, 5 - var parser = new Parser("cat AND dog OR fox"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5 })); - - - // Query: cat AND dog OR fox - // Expected interpretation: fox OR (cat AND dog) - // Matches records: 1, 2, 3, 5 - parser = new Parser("fox OR cat AND dog"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5 })); - - // Query: cat AND (dog OR fox) - // Explicit precedence given by parentheses - // Matches records: 1, 2 - parser = new Parser("cat AND (dog OR fox)"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); - } - - [Test] - public void BasicOperatorPrecedenceTest() - { - var dataPath = "data/BasicOperatorPrecedenceTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding records - searchEngine.AddRecord(1, "cat dog"); - searchEngine.AddRecord(2, "cat"); - searchEngine.AddRecord(3, "dog"); - searchEngine.AddRecord(4, "fox"); - searchEngine.AddRecord(5, "dog fox"); - - // Query: cat OR dog AND NOT fox - // Expected interpretation: cat OR (dog AND (NOT fox)) - // Matches records: 1, 2, 3 - var parser = new Parser("cat OR dog AND NOT fox"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - } - - [Test] - public void NestedOperatorPrecedenceTest() - { - var dataPath = "data/NestedOperatorPrecedenceTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding records - searchEngine.AddRecord(1, "cat dog"); - searchEngine.AddRecord(2, "cat fox"); - searchEngine.AddRecord(3, "dog fox"); - searchEngine.AddRecord(4, "dog"); - searchEngine.AddRecord(5, "fox"); - searchEngine.AddRecord(6, "cat"); - - // Query: (cat OR dog) AND NOT (fox OR dog) - // Expected interpretation: (cat OR dog) AND NOT (fox OR dog) - // Matches records: 6 - var parser = new Parser("(cat OR dog) AND NOT (fox OR dog)"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 6 })); - - // Query: NOT cat OR (dog AND fox) - // Expected interpretation: (NOT cat) OR (dog AND fox) - // Matches records: 3, 4, 5 - parser = new Parser("NOT cat OR (dog AND fox)"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 3, 4, 5 })); - } - - [Test] - public void ComplexOperatorPrecedenceWithFacetsTest() - { - var dataPath = "data/ComplexOperatorPrecedenceWithFacetsTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding records - searchEngine.AddRecord(1, "book"); - searchEngine.AddRecord(2, "book"); - searchEngine.AddRecord(3, "electronics"); - searchEngine.AddFacet(1, "author", "George Orwell"); - searchEngine.AddFacet(2, "author", "F. Scott Fitzgerald"); - searchEngine.AddFacet(3, "brand", "Sony"); - - // Query: author:"George Orwell" OR brand:Sony AND NOT author:"F. Scott Fitzgerald" - // Expected interpretation: (author:"George Orwell") OR (brand:Sony AND NOT author:"F. Scott Fitzgerald") - // Matches records: 1, 3 - var parser = new Parser("author:\"George Orwell\" OR brand:Sony AND NOT author:\"F. Scott Fitzgerald\""); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 3 })); - } - - [Test] - public void MixedOperationsWithParenthesesTest() - { - var dataPath = "data/MixedOperationsWithParenthesesTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Adding records - searchEngine.AddRecord(1, "cat dog fox"); - searchEngine.AddRecord(2, "cat dog"); - searchEngine.AddRecord(3, "cat fox"); - searchEngine.AddRecord(4, "dog fox"); - searchEngine.AddRecord(5, "fox"); - searchEngine.AddRecord(6, "dog"); - - // Query: (cat OR dog) AND fox - // Expected interpretation: ((cat OR dog) AND fox) - // Matches records: 1, 3, 4 - var parser = new Parser("(cat OR dog) AND fox"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 3, 4 })); - - // Query: NOT (cat OR fox) AND dog - // Expected interpretation: (NOT (cat OR fox)) AND dog - // Matches records: 6 - parser = new Parser("NOT (cat OR fox) AND dog"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 6 })); - } - - [Test] - public void UnicodeSupportTest() - { - var dataPath = "data/UnicodeSupportTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine( - dataPath, - wordTokenizer: new WordTokenizer(1)); - - // Adding records with Unicode characters - searchEngine.AddRecord(1, "こんにちは 世界"); // Japanese for "Hello World" - searchEngine.AddRecord(2, "Привет мир"); // Russian for "Hello World" - searchEngine.AddRecord(3, "你好 世界"); // Chinese for "Hello World" - searchEngine.AddRecord(4, "안녕하세요 세계"); // Korean for "Hello World" - searchEngine.AddRecord(5, "Hello World"); // English - searchEngine.AddRecord(6, "مرحبا بالعالم"); // Arabic for "Hello World" - - // Query: "こんにちは" - // Should match record 1 - var parser = new Parser("こんにちは"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - - // Query: "Привет" - // Should match record 2 - parser = new Parser("Привет"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2 })); - - // Query: "世界" (common in Japanese and Chinese) - // Should match records 1 and 3 - parser = new Parser("世界"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 3 })); - - // Query: "안녕하세요" - // Should match record 4 - parser = new Parser("안녕하세요"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 4 })); - - // Query: "Hello" - // Should match record 5 - parser = new Parser("Hello"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 5 })); - - // Query: "World" - // Should match record 5 - parser = new Parser("World"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 5 })); - - // Query: "مرحبا" - // Should match record 6 - parser = new Parser("مرحبا"); - var queryArabic = parser.Parse(); - var resultArabic = searchEngine.Search(queryArabic).Order(); - Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); - - // Query: "بالعالم" - // Should match record 6 - parser = new Parser("بالعالم"); - queryArabic = parser.Parse(); - resultArabic = searchEngine.Search(queryArabic).Order(); - Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); - - // Query: "مرحبا بالعالم" - // Should match record 6 - parser = new Parser("\"مرحبا بالعالم\""); - queryArabic = parser.Parse(); - resultArabic = searchEngine.Search(queryArabic).Order(); - Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); - - // Query: "Hello World" - // Should match record 5 (exact match for English phrase) - parser = new Parser("\"Hello World\""); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 5 })); - } - - [Test] - public void AliasAndOrNotTest() - { - var dataPath = "data/AliasAndOrNotTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "cat dog fox"); - searchEngine.AddRecord(2, "cat fox"); - searchEngine.AddRecord(3, "dog fox"); - searchEngine.AddRecord(4, "dog"); - searchEngine.AddRecord(5, "fox"); - searchEngine.AddRecord(6, "cat"); - - // Test alias for AND (&) - var parser = new Parser("cat & dog"); - var query = parser.Parse(); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); - - // Test alias for OR (|) - parser = new Parser("cat | dog"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 4, 6 })); - - // Test alias for NOT (-) - parser = new Parser("-fox"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 4, 6 })); - - // Combined aliases test: "cat & (dog | -fox)" - parser = new Parser("cat & (dog | -fox)"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 6 })); - - - // Test aliases without whitespace - parser = new Parser("cat&(dog|-fox)"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 6 })); - - // More complex case with all three aliases: "cat & -dog | fox" - parser = new Parser("cat & -dog | fox"); - query = parser.Parse(); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5, 6 })); - } - - [Test] - public void DeleteRecordTest() - { - var dataPath = "data/DeleteRecordTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath, useSecondaryIndex: false); - searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); - searchEngine.AddRecord(2, "The quick brown fox."); - - // Delete the first record - searchEngine.DeleteRecord(1); - - // Search to confirm deletion - var results = searchEngine.Search("quick brown"); - Assert.That(results.Order(), Is.EqualTo(new int[] { 2 })); - } - - [Test] - public void DeleteTokensTest() - { - var dataPath = "data/DeleteTokensTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath, useSecondaryIndex: false); - searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); - searchEngine.AddRecord(2, "The quick brown fox."); - - // Delete tokens associated with the first record - searchEngine.DeleteTokens(1, "The quick brown fox jumps over the lazy dog."); - - // Search to confirm deletion - var results = searchEngine.Search("quick brown or lazy"); - Assert.That(results.Order(), Is.EqualTo(new int[] { 2 })); - } - - [Test] - public void UpdateRecordTest() - { - var dataPath = "data/UpdateRecordTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); - - // Update the record with new text - searchEngine.UpdateRecord( - 1, - "The quick brown fox jumps over the lazy dog.", - "The quick brown fox leaps over the lazy dog."); - - // Confirm the old text is no longer found - var oldResults = searchEngine.Search("jumps"); - Assert.That(oldResults, Is.Empty); - - // Confirm the new text is found - var newResults = searchEngine.Search("leaps"); - Assert.That(newResults.Order(), Is.EqualTo(new int[] { 1 })); - } - - [Test] - public void DeleteAndUpdateMixedTest() - { - var dataPath = "data/DeleteAndUpdateMixedTest"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "The quick brown fox."); - searchEngine.AddRecord(2, "The lazy dog."); - - // Update the first record - searchEngine.UpdateRecord(1, "The quick brown fox.", "The quick brown bear."); - - // Delete the second record - searchEngine.DeleteRecord(2); - - // Confirm that the old text for the first record is no longer found - var oldResults = searchEngine.Search("fox"); - Assert.That(oldResults, Is.Empty); - - // Confirm that the new text for the first record is found - var newResults = searchEngine.Search("bear"); - Assert.That(newResults.Order(), Is.EqualTo(new int[] { 1 })); - - // Confirm that the second record has been deleted - var deletedResults = searchEngine.Search("dog"); - Assert.That(deletedResults, Is.Empty); - } -} \ No newline at end of file + [Test] + public void SimpleQueries() + { + var dataPath = "data/SingleTokenAndQuery2"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "fox"); + searchEngine.AddRecord(2, "fox cow cat"); + searchEngine.AddRecord(3, "fox cat cow"); + searchEngine.AddFacet(3, "category", "red"); + + var parser = new Parser("(cat OR cow) AND NOT category:tear"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + + parser = new Parser("cat cow AND NOT category:red"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2 })); + + parser = new Parser("'cat cow' AND NOT category:red"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { })); + + parser = new Parser("'cat cow' AND NOT category:blue"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + + parser = new Parser("\"cat cow\" AND NOT category:\"blue\""); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + + parser = new Parser("\"cat cow\" AND NOT category:\'blue\'"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + } + + [Test] + public void PhraseQueryTest() + { + var dataPath = "data/PhraseQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "quick brown fox"); + searchEngine.AddRecord(2, "lazy dog"); + searchEngine.AddRecord(3, "quick brown cat"); + + var parser = new Parser("\"quick brown fox\""); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + + parser = new Parser("'quick brown fox'"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void FacetQueryTest() + { + var dataPath = "data/FacetQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "electronics"); + searchEngine.AddRecord(2, "electronics Samsung"); + searchEngine.AddFacet(2, "category", "electronics"); + + var parser = new Parser("category:electronics"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2 })); + } + + [Test] + public void FacetInQueryTest() + { + var dataPath = "data/FacetInQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "electronics"); + searchEngine.AddRecord(2, "books"); + searchEngine.AddRecord(3, "furniture"); + searchEngine.AddFacet(1, "category", "electronics"); + searchEngine.AddFacet(2, "category", "books"); + searchEngine.AddFacet(3, "category", "furniture"); + + var parser = new Parser("category IN [\"books\", \"electronics\"]"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void NotInQueryTest() + { + var dataPath = "data/NotInQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "fox"); + searchEngine.AddRecord(2, "lazy dog"); + searchEngine.AddRecord(3, "cat"); + + var parser = new Parser("NOT IN [\"lazy dog\", \"cat\"]"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void ComplexAndOrQueryTest() + { + var dataPath = "data/ComplexAndOrQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat dog"); + searchEngine.AddRecord(2, "cat fox"); + searchEngine.AddRecord(3, "dog fox"); + + var parser = new Parser("cat AND (dog OR fox)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void NegationQueryWithAndTest() + { + var dataPath = "data/NegationQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "black cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "fox"); + + var parser = new Parser("(dog or fox or black) and NOT cat"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + } + + [Test] + public void NegationQueryTest() + { + var dataPath = "data/NegationQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "black cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "fox"); + + var parser = new Parser("NOT cat"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + + parser = new Parser("NOT cat or abc"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + } + + [Test] + public void FacetWithAndQueryTest() + { + var dataPath = "data/FacetWithAndQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "books"); + searchEngine.AddFacet(1, "author", "F. Scott Fitzgerald"); + searchEngine.AddRecord(2, "books"); + searchEngine.AddFacet(2, "author", "Bar"); + + var parser = new Parser("books AND author:\"F. Scott Fitzgerald\""); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void FacetWithOrQueryTest() + { + var dataPath = "data/FacetWithOrQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "books"); + searchEngine.AddFacet(1, "author", "J.K. Rowling"); + searchEngine.AddFacet(1, "category", "books"); + searchEngine.AddRecord(2, "electronics"); + searchEngine.AddFacet(2, "brand", "Sony"); + searchEngine.AddFacet(2, "category", "electronics"); + + var parser = new Parser("(category:books OR category:electronics) AND (author:\"J.K. Rowling\" OR brand:Sony)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void FacetNotInQueryTest() + { + var dataPath = "data/FacetNotInQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "books"); + searchEngine.AddFacet(1, "category", "books"); + searchEngine.AddRecord(2, "electronics"); + searchEngine.AddFacet(2, "category", "electronics"); + searchEngine.AddRecord(3, "furniture"); + searchEngine.AddFacet(3, "category", "furniture"); + + var parser = new Parser("category NOT IN [\"books\", \"furniture\"]"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2 })); + } + + [Test] + public void InKeywordListQueryTest() + { + var dataPath = "data/InKeywordListQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "fox"); + searchEngine.AddRecord(4, "cow"); + + var parser = new Parser("IN [\"cat\", \"dog\", \"fox\"]"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + } + + [Test] + public void FacetExpressionWithNotInTest() + { + var dataPath = "data/FacetExpressionWithNotInTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "Samsung"); + searchEngine.AddFacet(1, "brand", "Samsung"); + searchEngine.AddRecord(2, "Nokia"); + searchEngine.AddFacet(2, "brand", "Nokia"); + searchEngine.AddRecord(3, "Sony"); + searchEngine.AddFacet(3, "brand", "Sony"); + + var parser = new Parser("NOT brand IN [\"Samsung\", \"Nokia\"]"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + } + + [Test] + public void MultipleFacetsInComplexQueryTest() + { + var dataPath = "data/MultipleFacetsInComplexQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "Harry Potter"); + searchEngine.AddFacet(1, "author", "J.K. Rowling"); + searchEngine.AddFacet(1, "category", "books"); + searchEngine.AddRecord(2, "Sony TV"); + searchEngine.AddFacet(2, "brand", "Sony"); + searchEngine.AddFacet(2, "category", "electronics"); + searchEngine.AddRecord(3, "Samsung Galaxy"); + searchEngine.AddFacet(3, "brand", "Samsung"); + searchEngine.AddFacet(3, "category", "electronics"); + + var parser = new Parser("(category:books OR category:electronics) AND (author:\"J.K. Rowling\" OR brand:Sony)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void SimpleKeywordQueryTest() + { + var dataPath = "data/SimpleKeywordQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "cat dog"); + searchEngine.AddRecord(4, "fox"); + + var parser = new Parser("cat dog"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + } + + [Test] + public void SimpleFacetQueryTest() + { + var dataPath = "data/SimpleFacetQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "electronics"); + searchEngine.AddFacet(1, "category", "electronics"); + searchEngine.AddRecord(2, "appliances"); + searchEngine.AddFacet(2, "category", "appliances"); + + var parser = new Parser("category:electronics"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void MultipleFacetExpressionsTest() + { + var dataPath = "data/MultipleFacetExpressionsTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "electronics"); + searchEngine.AddFacet(1, "category", "electronics"); + searchEngine.AddFacet(1, "brand", "Samsung"); + searchEngine.AddRecord(2, "electronics"); + searchEngine.AddFacet(2, "category", "electronics"); + searchEngine.AddFacet(2, "brand", "Sony"); + searchEngine.AddRecord(3, "device"); + searchEngine.AddFacet(2, "category", "analog"); + searchEngine.AddFacet(2, "brand", "Dell"); + + var parser = new Parser("category:electronics brand:Samsung"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void FacetWithPhraseTest() + { + var dataPath = "data/FacetWithPhraseTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "The Great Gatsby"); + searchEngine.AddFacet(1, "title", "The Great Gatsby"); + searchEngine.AddRecord(2, "To Kill a Mockingbird"); + searchEngine.AddFacet(2, "title", "To Kill a Mockingbird"); + + var parser = new Parser("title:\"The Great Gatsby\""); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void FacetInWithAndQueryTest() + { + var dataPath = "data/FacetInWithAndQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "Samsung"); + searchEngine.AddFacet(1, "brand", "Samsung"); + searchEngine.AddRecord(2, "Sony"); + searchEngine.AddFacet(2, "brand", "Sony"); + searchEngine.AddRecord(3, "Samsung"); + searchEngine.AddFacet(3, "brand", "Samsung"); + searchEngine.AddFacet(3, "category", "electronics"); + + var parser = new Parser("brand IN [\"Samsung\", \"Sony\"] AND category:electronics"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + } + + [Test] + public void EmptyQueryTest() + { + var dataPath = "data/CombinedAndQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "cat dog"); + searchEngine.AddRecord(4, "fox"); + + var parser = new Parser(""); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(Array.Empty())); + } + + [Test] + public void CombinedAndQueryTest() + { + var dataPath = "data/CombinedAndQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "cat dog"); + searchEngine.AddRecord(4, "fox"); + + var parser = new Parser("cat AND dog"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3 })); + } + + [Test] + public void CombinedOrQueryTest() + { + var dataPath = "data/CombinedOrQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "cat dog"); + searchEngine.AddRecord(4, "fox"); + + var parser = new Parser("cat OR dog"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + } + + [Test] + public void NegationWithOrQueryTest() + { + var dataPath = "data/NegationWithOrQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "fox"); + + var parser = new Parser("NOT cat OR dog"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + } + + [Test] + public void ComplexAndOrQueryWithNegationTest() + { + var dataPath = "data/ComplexAndOrQueryWithNegationTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat"); + searchEngine.AddRecord(2, "dog"); + searchEngine.AddRecord(3, "lazy dog"); + searchEngine.AddRecord(4, "fox"); + + var parser = new Parser("(cat OR dog) AND (NOT \"lazy dog\" OR fox)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void FacetWithNotInQueryTest() + { + var dataPath = "data/FacetWithNotInQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "electronics"); + searchEngine.AddFacet(1, "category", "electronics"); + searchEngine.AddFacet(1, "brand", "Samsung"); + searchEngine.AddRecord(2, "books"); + searchEngine.AddFacet(2, "category", "books"); + searchEngine.AddFacet(2, "brand", "Sony"); + + var parser = new Parser("NOT category IN [\"books\", \"furniture\"] AND brand:Samsung"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void MultipleKeywordsWithoutOperatorsTest() + { + var dataPath = "data/MultipleKeywordsWithoutOperatorsTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "quick"); + searchEngine.AddRecord(2, "brown"); + searchEngine.AddRecord(3, "fox"); + searchEngine.AddRecord(4, "quick brown fox"); + + var parser = new Parser("quick brown fox"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 4 })); + } + + [Test] + public void ComplexMultiFacetAndOrQueryTest() + { + var dataPath = "data/ComplexMultiFacetAndOrQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding multiple records with complex facets + searchEngine.AddRecord(1, "Harry Potter and the Sorcerer's Stone"); + searchEngine.AddFacet(1, "author", "J.K. Rowling"); + searchEngine.AddFacet(1, "category", "books"); + searchEngine.AddFacet(1, "publisher", "Bloomsbury"); + + searchEngine.AddRecord(2, "Harry Potter and the Chamber of Secrets"); + searchEngine.AddFacet(2, "author", "J.K. Rowling"); + searchEngine.AddFacet(2, "category", "books"); + searchEngine.AddFacet(2, "publisher", "Scholastic"); + + searchEngine.AddRecord(3, "The Fellowship of the Ring"); + searchEngine.AddFacet(3, "author", "J.R.R. Tolkien"); + searchEngine.AddFacet(3, "category", "books"); + searchEngine.AddFacet(3, "publisher", "Allen & Unwin"); + + searchEngine.AddRecord(4, "The Two Towers"); + searchEngine.AddFacet(4, "author", "J.R.R. Tolkien"); + searchEngine.AddFacet(4, "category", "books"); + searchEngine.AddFacet(4, "publisher", "Allen & Unwin"); + + searchEngine.AddRecord(5, "The Return of the King"); + searchEngine.AddFacet(5, "author", "J.R.R. Tolkien"); + searchEngine.AddFacet(5, "category", "books"); + searchEngine.AddFacet(5, "publisher", "Allen & Unwin"); + + searchEngine.AddRecord(6, "The Hobbit"); + searchEngine.AddFacet(6, "author", "J.R.R. Tolkien"); + searchEngine.AddFacet(6, "category", "books"); + searchEngine.AddFacet(6, "publisher", "George Allen & Unwin"); + + searchEngine.AddRecord(7, "The Hobbit: An Unexpected Journey"); + searchEngine.AddFacet(7, "author", "Peter Jackson"); + searchEngine.AddFacet(7, "category", "movies"); + + // Complex query combining multiple facets and conditions + var parser = new Parser("(author:\"J.K. Rowling\" AND publisher:Scholastic) OR (author:\"J.R.R. Tolkien\" AND publisher:\"Allen & Unwin\")"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3, 4, 5 })); + } + + [Test] + public void MultiLayeredNestedQueryTest() + { + var dataPath = "data/MultiLayeredNestedQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding data with multiple categories and facets + searchEngine.AddRecord(1, "MacBook Pro"); + searchEngine.AddFacet(1, "brand", "Apple"); + searchEngine.AddFacet(1, "category", "laptops"); + searchEngine.AddFacet(1, "processor", "M1"); + + searchEngine.AddRecord(2, "MacBook Air"); + searchEngine.AddFacet(2, "brand", "Apple"); + searchEngine.AddFacet(2, "category", "laptops"); + searchEngine.AddFacet(2, "processor", "M1"); + + searchEngine.AddRecord(3, "Surface Laptop 4"); + searchEngine.AddFacet(3, "brand", "Microsoft"); + searchEngine.AddFacet(3, "category", "laptops"); + searchEngine.AddFacet(3, "processor", "Intel"); + + searchEngine.AddRecord(4, "Dell XPS 13"); + searchEngine.AddFacet(4, "brand", "Dell"); + searchEngine.AddFacet(4, "category", "laptops"); + searchEngine.AddFacet(4, "processor", "Intel"); + + searchEngine.AddRecord(5, "iPhone 12"); + searchEngine.AddFacet(5, "brand", "Apple"); + searchEngine.AddFacet(5, "category", "smartphones"); + searchEngine.AddFacet(5, "processor", "A14"); + + searchEngine.AddRecord(6, "Galaxy S21"); + searchEngine.AddFacet(6, "brand", "Samsung"); + searchEngine.AddFacet(6, "category", "smartphones"); + searchEngine.AddFacet(6, "processor", "Exynos"); + + searchEngine.AddRecord(7, "Surface Pro 7"); + searchEngine.AddFacet(7, "brand", "Microsoft"); + searchEngine.AddFacet(7, "category", "tablets"); + searchEngine.AddFacet(7, "processor", "Intel"); + + // Multi-layered nested query combining facets, categories, and processors + var parser = new Parser("((brand:Apple AND category:laptops AND processor:M1) OR (brand:Microsoft AND category:laptops)) AND NOT brand:Dell"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + } + + [Test] + public void QueryWithMultipleNegationsTest() + { + var dataPath = "data/QueryWithMultipleNegationsTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding more records with different categories and facets + searchEngine.AddRecord(1, "Harry Potter and the Philosopher's Stone"); + searchEngine.AddFacet(1, "author", "J.K. Rowling"); + searchEngine.AddFacet(1, "category", "books"); + searchEngine.AddFacet(1, "publisher", "Bloomsbury"); + + searchEngine.AddRecord(2, "Harry Potter and the Chamber of Secrets"); + searchEngine.AddFacet(2, "author", "J.K. Rowling"); + searchEngine.AddFacet(2, "category", "books"); + searchEngine.AddFacet(2, "publisher", "Scholastic"); + + searchEngine.AddRecord(3, "The Hobbit"); + searchEngine.AddFacet(3, "author", "J.R.R. Tolkien"); + searchEngine.AddFacet(3, "category", "books"); + searchEngine.AddFacet(3, "publisher", "Allen & Unwin"); + + searchEngine.AddRecord(4, "The Hobbit: An Unexpected Journey"); + searchEngine.AddFacet(4, "author", "Peter Jackson"); + searchEngine.AddFacet(4, "category", "movies"); + + searchEngine.AddRecord(5, "Inception"); + searchEngine.AddFacet(5, "director", "Christopher Nolan"); + searchEngine.AddFacet(5, "category", "movies"); + + searchEngine.AddRecord(6, "Interstellar"); + searchEngine.AddFacet(6, "director", "Christopher Nolan"); + searchEngine.AddFacet(6, "category", "movies"); + + searchEngine.AddRecord(7, "Dunkirk"); + searchEngine.AddFacet(7, "director", "Christopher Nolan"); + searchEngine.AddFacet(7, "category", "movies"); + + // Complex query with multiple negations and AND/OR combinations + var parser = new Parser("(category:books AND NOT author:\"J.K. Rowling\") OR (category:movies AND NOT director:\"Christopher Nolan\")"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3, 4 })); + } + + [Test] + public void DeeplyNestedComplexQueryTest() + { + var dataPath = "data/DeeplyNestedComplexQueryTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding a variety of records with deeply nested facets and categories + searchEngine.AddRecord(1, "iPhone 13"); + searchEngine.AddFacet(1, "brand", "Apple"); + searchEngine.AddFacet(1, "category", "smartphones"); + searchEngine.AddFacet(1, "processor", "A15"); + + searchEngine.AddRecord(2, "iPhone 12"); + searchEngine.AddFacet(2, "brand", "Apple"); + searchEngine.AddFacet(2, "category", "smartphones"); + searchEngine.AddFacet(2, "processor", "A14"); + + searchEngine.AddRecord(3, "MacBook Air"); + searchEngine.AddFacet(3, "brand", "Apple"); + searchEngine.AddFacet(3, "category", "laptops"); + searchEngine.AddFacet(3, "processor", "M1"); + + searchEngine.AddRecord(4, "Galaxy S21"); + searchEngine.AddFacet(4, "brand", "Samsung"); + searchEngine.AddFacet(4, "category", "smartphones"); + searchEngine.AddFacet(4, "processor", "Exynos"); + + searchEngine.AddRecord(5, "Surface Laptop 4"); + searchEngine.AddFacet(5, "brand", "Microsoft"); + searchEngine.AddFacet(5, "category", "laptops"); + searchEngine.AddFacet(5, "processor", "Intel"); + + searchEngine.AddRecord(6, "Surface Pro 7"); + searchEngine.AddFacet(6, "brand", "Microsoft"); + searchEngine.AddFacet(6, "category", "tablets"); + searchEngine.AddFacet(6, "processor", "Intel"); + + searchEngine.AddRecord(7, "Dell XPS 13"); + searchEngine.AddFacet(7, "brand", "Dell"); + searchEngine.AddFacet(7, "category", "laptops"); + searchEngine.AddFacet(7, "processor", "Intel"); + + searchEngine.AddRecord(8, "ThinkPad X1 Carbon"); + searchEngine.AddFacet(8, "brand", "Lenovo"); + searchEngine.AddFacet(8, "category", "laptops"); + searchEngine.AddFacet(8, "processor", "Intel"); + + // Deeply nested query combining AND, OR, and NOT across different categories and facets + var parser = new Parser("((brand:Apple AND category:smartphones) OR " + + "(brand:Microsoft AND category:laptops)) AND " + + "(processor:Intel OR NOT category:tablets)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 5 })); + } + + [Test] + public void OperatorPrecedenceBetweenAndOrTest() + { + var dataPath = "data/OperatorPrecedenceBetweenAndOrTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding records + searchEngine.AddRecord(1, "cat dog fox"); + searchEngine.AddRecord(2, "cat fox"); + searchEngine.AddRecord(3, "dog fox"); + searchEngine.AddRecord(4, "dog"); + searchEngine.AddRecord(5, "fox"); + searchEngine.AddRecord(6, "cat"); + + // Query: cat AND dog OR fox + // Expected interpretation: (cat AND dog) OR fox + // Matches records: 1, 2, 3, 5 + var parser = new Parser("cat AND dog OR fox"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5 })); + + + // Query: cat AND dog OR fox + // Expected interpretation: fox OR (cat AND dog) + // Matches records: 1, 2, 3, 5 + parser = new Parser("fox OR cat AND dog"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5 })); + + // Query: cat AND (dog OR fox) + // Explicit precedence given by parentheses + // Matches records: 1, 2 + parser = new Parser("cat AND (dog OR fox)"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2 })); + } + + [Test] + public void BasicOperatorPrecedenceTest() + { + var dataPath = "data/BasicOperatorPrecedenceTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding records + searchEngine.AddRecord(1, "cat dog"); + searchEngine.AddRecord(2, "cat"); + searchEngine.AddRecord(3, "dog"); + searchEngine.AddRecord(4, "fox"); + searchEngine.AddRecord(5, "dog fox"); + + // Query: cat OR dog AND NOT fox + // Expected interpretation: cat OR (dog AND (NOT fox)) + // Matches records: 1, 2, 3 + var parser = new Parser("cat OR dog AND NOT fox"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + } + + [Test] + public void NestedOperatorPrecedenceTest() + { + var dataPath = "data/NestedOperatorPrecedenceTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding records + searchEngine.AddRecord(1, "cat dog"); + searchEngine.AddRecord(2, "cat fox"); + searchEngine.AddRecord(3, "dog fox"); + searchEngine.AddRecord(4, "dog"); + searchEngine.AddRecord(5, "fox"); + searchEngine.AddRecord(6, "cat"); + + // Query: (cat OR dog) AND NOT (fox OR dog) + // Expected interpretation: (cat OR dog) AND NOT (fox OR dog) + // Matches records: 6 + var parser = new Parser("(cat OR dog) AND NOT (fox OR dog)"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 6 })); + + // Query: NOT cat OR (dog AND fox) + // Expected interpretation: (NOT cat) OR (dog AND fox) + // Matches records: 3, 4, 5 + parser = new Parser("NOT cat OR (dog AND fox)"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 3, 4, 5 })); + } + + [Test] + public void ComplexOperatorPrecedenceWithFacetsTest() + { + var dataPath = "data/ComplexOperatorPrecedenceWithFacetsTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding records + searchEngine.AddRecord(1, "book"); + searchEngine.AddRecord(2, "book"); + searchEngine.AddRecord(3, "electronics"); + searchEngine.AddFacet(1, "author", "George Orwell"); + searchEngine.AddFacet(2, "author", "F. Scott Fitzgerald"); + searchEngine.AddFacet(3, "brand", "Sony"); + + // Query: author:"George Orwell" OR brand:Sony AND NOT author:"F. Scott Fitzgerald" + // Expected interpretation: (author:"George Orwell") OR (brand:Sony AND NOT author:"F. Scott Fitzgerald") + // Matches records: 1, 3 + var parser = new Parser("author:\"George Orwell\" OR brand:Sony AND NOT author:\"F. Scott Fitzgerald\""); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 3 })); + } + + [Test] + public void MixedOperationsWithParenthesesTest() + { + var dataPath = "data/MixedOperationsWithParenthesesTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Adding records + searchEngine.AddRecord(1, "cat dog fox"); + searchEngine.AddRecord(2, "cat dog"); + searchEngine.AddRecord(3, "cat fox"); + searchEngine.AddRecord(4, "dog fox"); + searchEngine.AddRecord(5, "fox"); + searchEngine.AddRecord(6, "dog"); + + // Query: (cat OR dog) AND fox + // Expected interpretation: ((cat OR dog) AND fox) + // Matches records: 1, 3, 4 + var parser = new Parser("(cat OR dog) AND fox"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 3, 4 })); + + // Query: NOT (cat OR fox) AND dog + // Expected interpretation: (NOT (cat OR fox)) AND dog + // Matches records: 6 + parser = new Parser("NOT (cat OR fox) AND dog"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 6 })); + } + + [Test] + public void UnicodeSupportTest() + { + var dataPath = "data/UnicodeSupportTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine( + dataPath, + wordTokenizer: new WordTokenizer(1)); + + // Adding records with Unicode characters + searchEngine.AddRecord(1, "こんにちは 世界"); // Japanese for "Hello World" + searchEngine.AddRecord(2, "Привет мир"); // Russian for "Hello World" + searchEngine.AddRecord(3, "你好 世界"); // Chinese for "Hello World" + searchEngine.AddRecord(4, "안녕하세요 세계"); // Korean for "Hello World" + searchEngine.AddRecord(5, "Hello World"); // English + searchEngine.AddRecord(6, "مرحبا بالعالم"); // Arabic for "Hello World" + + // Query: "こんにちは" + // Should match record 1 + var parser = new Parser("こんにちは"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + + // Query: "Привет" + // Should match record 2 + parser = new Parser("Привет"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2 })); + + // Query: "世界" (common in Japanese and Chinese) + // Should match records 1 and 3 + parser = new Parser("世界"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 3 })); + + // Query: "안녕하세요" + // Should match record 4 + parser = new Parser("안녕하세요"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 4 })); + + // Query: "Hello" + // Should match record 5 + parser = new Parser("Hello"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 5 })); + + // Query: "World" + // Should match record 5 + parser = new Parser("World"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 5 })); + + // Query: "مرحبا" + // Should match record 6 + parser = new Parser("مرحبا"); + var queryArabic = parser.Parse(); + var resultArabic = searchEngine.Search(queryArabic).Order(); + Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); + + // Query: "بالعالم" + // Should match record 6 + parser = new Parser("بالعالم"); + queryArabic = parser.Parse(); + resultArabic = searchEngine.Search(queryArabic).Order(); + Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); + + // Query: "مرحبا بالعالم" + // Should match record 6 + parser = new Parser("\"مرحبا بالعالم\""); + queryArabic = parser.Parse(); + resultArabic = searchEngine.Search(queryArabic).Order(); + Assert.That(resultArabic, Is.EqualTo(new int[] { 6 })); + + // Query: "Hello World" + // Should match record 5 (exact match for English phrase) + parser = new Parser("\"Hello World\""); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 5 })); + } + + [Test] + public void AliasAndOrNotTest() + { + var dataPath = "data/AliasAndOrNotTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "cat dog fox"); + searchEngine.AddRecord(2, "cat fox"); + searchEngine.AddRecord(3, "dog fox"); + searchEngine.AddRecord(4, "dog"); + searchEngine.AddRecord(5, "fox"); + searchEngine.AddRecord(6, "cat"); + + // Test alias for AND (&) + var parser = new Parser("cat & dog"); + var query = parser.Parse(); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); + + // Test alias for OR (|) + parser = new Parser("cat | dog"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 4, 6 })); + + // Test alias for NOT (-) + parser = new Parser("-fox"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 4, 6 })); + + // Combined aliases test: "cat & (dog | -fox)" + parser = new Parser("cat & (dog | -fox)"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 6 })); + + + // Test aliases without whitespace + parser = new Parser("cat&(dog|-fox)"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 6 })); + + // More complex case with all three aliases: "cat & -dog | fox" + parser = new Parser("cat & -dog | fox"); + query = parser.Parse(); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3, 5, 6 })); + } + + [Test] + public void DeleteRecordTest() + { + var dataPath = "data/DeleteRecordTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath, useSecondaryIndex: false); + searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); + searchEngine.AddRecord(2, "The quick brown fox."); + + // Delete the first record + searchEngine.DeleteRecord(1); + + // Search to confirm deletion + var results = searchEngine.Search("quick brown"); + Assert.That(results.Order(), Is.EqualTo(new int[] { 2 })); + } + + [Test] + public void DeleteTokensTest() + { + var dataPath = "data/DeleteTokensTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath, useSecondaryIndex: false); + searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); + searchEngine.AddRecord(2, "The quick brown fox."); + + // Delete tokens associated with the first record + searchEngine.DeleteTokens(1, "The quick brown fox jumps over the lazy dog."); + + // Search to confirm deletion + var results = searchEngine.Search("quick brown or lazy"); + Assert.That(results.Order(), Is.EqualTo(new int[] { 2 })); + } + + [Test] + public void UpdateRecordTest() + { + var dataPath = "data/UpdateRecordTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "The quick brown fox jumps over the lazy dog."); + + // Update the record with new text + searchEngine.UpdateRecord( + 1, + "The quick brown fox jumps over the lazy dog.", + "The quick brown fox leaps over the lazy dog."); + + // Confirm the old text is no longer found + var oldResults = searchEngine.Search("jumps"); + Assert.That(oldResults, Is.Empty); + + // Confirm the new text is found + var newResults = searchEngine.Search("leaps"); + Assert.That(newResults.Order(), Is.EqualTo(new int[] { 1 })); + } + + [Test] + public void DeleteAndUpdateMixedTest() + { + var dataPath = "data/DeleteAndUpdateMixedTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "The quick brown fox."); + searchEngine.AddRecord(2, "The lazy dog."); + + // Update the first record + searchEngine.UpdateRecord(1, "The quick brown fox.", "The quick brown bear."); + + // Delete the second record + searchEngine.DeleteRecord(2); + + // Confirm that the old text for the first record is no longer found + var oldResults = searchEngine.Search("fox"); + Assert.That(oldResults, Is.Empty); + + // Confirm that the new text for the first record is found + var newResults = searchEngine.Search("bear"); + Assert.That(newResults.Order(), Is.EqualTo(new int[] { 1 })); + + // Confirm that the second record has been deleted + var deletedResults = searchEngine.Search("dog"); + Assert.That(deletedResults, Is.Empty); + } +} diff --git a/src/ZoneTree.FullTextSearch.UnitTests/ExecuteSearchQueryTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/ExecuteSearchQueryTests.cs index 4aa8adf..59ed430 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/ExecuteSearchQueryTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/ExecuteSearchQueryTests.cs @@ -1,75 +1,75 @@ -using ZoneTree.FullTextSearch.Search; +using ZoneTree.FullTextSearch.Search; using ZoneTree.FullTextSearch.SearchEngines; namespace ZoneTree.FullTextSearch.UnitTests; public sealed class ExecuteSearchQueryTests { - [Test] - public void SingleTokenAndQuery() - { - var dataPath = "data/SingleTokenAndQuery"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "fox"); - searchEngine.AddRecord(2, "fox cow cat"); - searchEngine.AddRecord(3, "fox cat cow"); + [Test] + public void SingleTokenAndQuery() + { + var dataPath = "data/SingleTokenAndQuery"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "fox"); + searchEngine.AddRecord(2, "fox cow cat"); + searchEngine.AddRecord(3, "fox cat cow"); - var node = new QueryNode(QueryNodeType.And) + var node = new QueryNode(QueryNodeType.And) + { + Tokens = ["fox cat cow"], + RespectTokenOrder = true + }; + var query = new SearchQuery( + new(QueryNodeType.And) { - Tokens = ["fox cat cow"], - RespectTokenOrder = true - }; - var query = new SearchQuery( - new(QueryNodeType.And) - { - Tokens = ["fox cat cow"], - RespectTokenOrder = true - } - ); - Assert.That(searchEngine.Search(query), Is.EqualTo(new int[] { 3 })); + Tokens = ["fox cat cow"], + RespectTokenOrder = true + } + ); + Assert.That(searchEngine.Search(query), Is.EqualTo(new int[] { 3 })); - query = new SearchQuery( - new(QueryNodeType.And) - { - Tokens = ["fox", "cat", "cow"], - RespectTokenOrder = false - } - ); - var result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); + query = new SearchQuery( + new(QueryNodeType.And) + { + Tokens = ["fox", "cat", "cow"], + RespectTokenOrder = false + } + ); + var result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 2, 3 })); - query = new SearchQuery( - new(QueryNodeType.Or) - { - Tokens = ["fox", "cat", "cow"], - RespectTokenOrder = false - } - ); + query = new SearchQuery( + new(QueryNodeType.Or) + { + Tokens = ["fox", "cat", "cow"], + RespectTokenOrder = false + } + ); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - query = new SearchQuery( - new(QueryNodeType.Not) - { - Tokens = ["abc"], - RespectTokenOrder = false - } - ); + query = new SearchQuery( + new(QueryNodeType.Not) + { + Tokens = ["abc"], + RespectTokenOrder = false + } + ); - result = searchEngine.Search(query).Order(); - Assert.That(query.HasAnyPositiveCriteria, Is.False); - Assert.That(result, Is.EqualTo((new int[] { 1, 2, 3 }))); + result = searchEngine.Search(query).Order(); + Assert.That(query.HasAnyPositiveCriteria, Is.False); + Assert.That(result, Is.EqualTo((new int[] { 1, 2, 3 }))); - query = new SearchQuery( - new(QueryNodeType.And) - { - Children = - [ - new (QueryNodeType.And) + query = new SearchQuery( + new(QueryNodeType.And) + { + Children = + [ + new (QueryNodeType.And) { Tokens = ["fox"], }, @@ -77,20 +77,20 @@ public void SingleTokenAndQuery() { Tokens = ["cow"], } - ], - RespectTokenOrder = false - } - ); + ], + RespectTokenOrder = false + } + ); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1 })); + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1 })); - query = new SearchQuery( - new(QueryNodeType.Or) - { - Children = - [ - new (QueryNodeType.And) + query = new SearchQuery( + new(QueryNodeType.Or) + { + Children = + [ + new (QueryNodeType.And) { Tokens = ["fox"], }, @@ -98,11 +98,11 @@ public void SingleTokenAndQuery() { Tokens = ["cow"], } - ] - } - ); + ] + } + ); - result = searchEngine.Search(query).Order(); - Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); - } + result = searchEngine.Search(query).Order(); + Assert.That(result, Is.EqualTo(new int[] { 1, 2, 3 })); + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/FacetTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/FacetTests.cs index 31b95dd..3830c65 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/FacetTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/FacetTests.cs @@ -1,57 +1,57 @@ -using ZoneTree.FullTextSearch.SearchEngines; +using ZoneTree.FullTextSearch.SearchEngines; using ZoneTree.FullTextSearch.UnitTests.sampleData; namespace ZoneTree.FullTextSearch.UnitTests; public sealed class FacetTests { - [Test] - public void TestFacetSearch() - { - var dataPath = "data/TestFacetSearch"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); + [Test] + public void TestFacetSearch() + { + var dataPath = "data/TestFacetSearch"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); - foreach (var product in ProductList.Products) + foreach (var product in ProductList.Products) + { + searchEngine.AddRecord(product.Id, product.ToString()); + foreach (var prop in typeof(Facets).GetProperties()) + { + var value = prop.GetValue(product.Facets); + if (value is string strValue) + { + searchEngine.AddFacet(product.Id, prop.Name, strValue); + } + else if (value is string[] values) { - searchEngine.AddRecord(product.Id, product.ToString()); - foreach (var prop in typeof(Facets).GetProperties()) - { - var value = prop.GetValue(product.Facets); - if (value is string strValue) - { - searchEngine.AddFacet(product.Id, prop.Name, strValue); - } - else if (value is string[] values) - { - foreach (var str in values) - searchEngine.AddFacet(product.Id, prop.Name, str); - } - } + foreach (var str in values) + searchEngine.AddFacet(product.Id, prop.Name, str); } + } + } - var result = searchEngine.SimpleSearch("wireless", new Dictionary + var result = searchEngine.SimpleSearch("wireless", new Dictionary { { "connectivity", "bluetooth"} }); - Assert.That(result, Has.Length.EqualTo(1)); - Assert.That(result[0], Is.EqualTo(1)); + Assert.That(result, Has.Length.EqualTo(1)); + Assert.That(result[0], Is.EqualTo(1)); - result = searchEngine.SimpleSearch("wireless", new Dictionary()); - Assert.That(result, Has.Length.EqualTo(1)); + result = searchEngine.SimpleSearch("wireless", new Dictionary()); + Assert.That(result, Has.Length.EqualTo(1)); - result = searchEngine.SimpleSearch("home", new Dictionary()); - Assert.That(result, Has.Length.EqualTo(3)); + result = searchEngine.SimpleSearch("home", new Dictionary()); + Assert.That(result, Has.Length.EqualTo(3)); - result = searchEngine.SimpleSearch("home", new Dictionary + result = searchEngine.SimpleSearch("home", new Dictionary { { "Resolution", "4K UHD"}, { "EnergyEfficiency", "A+"}, }); - Assert.That(result, Has.Length.EqualTo(2)); + Assert.That(result, Has.Length.EqualTo(2)); - result = searchEngine.SimpleSearch("product", new Dictionary + result = searchEngine.SimpleSearch("product", new Dictionary { { "Resolution", "4K UHD"}, { "EnergyEfficiency", "A+"}, @@ -59,9 +59,9 @@ public void TestFacetSearch() { "Features", "Milk Frother" }, }); - Assert.That(result, Has.Length.EqualTo(4)); + Assert.That(result, Has.Length.EqualTo(4)); - result = searchEngine.SimpleSearch("", new Dictionary + result = searchEngine.SimpleSearch("", new Dictionary { { "Resolution", "4K UHD"}, { "EnergyEfficiency", "A+"}, @@ -69,16 +69,16 @@ public void TestFacetSearch() { "Features", "Milk Frother" }, }); - Assert.That(result, Has.Length.EqualTo(4)); + Assert.That(result, Has.Length.EqualTo(4)); - // Returning all records without providing any criteria is not supported. - result = searchEngine.SimpleSearch(null, new Dictionary()); - Assert.That(result, Is.Empty); + // Returning all records without providing any criteria is not supported. + result = searchEngine.SimpleSearch(null, new Dictionary()); + Assert.That(result, Is.Empty); - result = searchEngine.SimpleSearch("", new Dictionary()); - Assert.That(result, Is.Empty); + result = searchEngine.SimpleSearch("", new Dictionary()); + Assert.That(result, Is.Empty); - result = searchEngine.SimpleSearch(""); - Assert.That(result, Is.Empty); - } + result = searchEngine.SimpleSearch(""); + Assert.That(result, Is.Empty); + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs index 3db8bc6..9cd767a 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/HashedSearchEngineTests.cs @@ -5,133 +5,133 @@ namespace ZoneTree.FullTextSearch.UnitTests; public sealed class HashedSearchEngineTests { - [Test] - public void AddRecord_ShouldAddRecordToIndex() - { - var dataPath = "data/AddRecord_ShouldAddRecordToIndex"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine( - dataPath); - - // Arrange - int record = 1; - string text = "sample text"; - - // Act - searchEngine.AddRecord(record, text); - - // Assert - var searchResult = searchEngine.SimpleSearch("sample"); - Assert.That(searchResult, Is.Not.Empty); - Assert.That(searchResult, Contains.Item(record)); - } - - [Test] - public void DeleteRecord_ShouldRemoveRecordFromIndex() - { - var dataPath = "data/DeleteRecord_ShouldRemoveRecordFromIndex"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Arrange - int record = 1; - string text = "sample text"; - - // Act - searchEngine.AddRecord(record, text); - long deletedCount = searchEngine.DeleteRecord(record); - - // Assert - Assert.That(deletedCount, Is.EqualTo(2)); - var searchResult = searchEngine.SimpleSearch("sample"); - Assert.That(searchResult, Is.Empty); - } - - [Test] - public void Search_WithRespectTokenOrderTrue_ShouldReturnRecords() - { - var dataPath = "data/Search_WithRespectTokenOrderTrue_ShouldReturnRecords"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Arrange - int record1 = 1; - int record2 = 2; - searchEngine.AddRecord(record1, "quick brown fox"); - searchEngine.AddRecord(record2, "brown fox jumps"); - - // Act - var searchResult = searchEngine.SimpleSearch("brown fox"); - - // Assert - Assert.That(searchResult.Length, Is.EqualTo(2)); - Assert.That(searchResult, Contains.Item(record1)); - Assert.That(searchResult, Contains.Item(record2)); - } - - [Test] - public void Search_WithRespectTokenOrderFalse_ShouldReturnRecordsRegardlessOfOrder() - { - var dataPath = "data/Search_WithRespectTokenOrderFalse_ShouldReturnRecordsRegardlessOfOrder"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Arrange - int record1 = 1; - int record2 = 2; - searchEngine.AddRecord(record1, "quick brown fox"); - searchEngine.AddRecord(record2, "fox brown jumps"); - - // Act - var searchResult = searchEngine.SimpleSearch("brown fox", respectTokenOrder: false); - - // Assert - Assert.That(searchResult.Length, Is.EqualTo(2)); - Assert.That(searchResult, Contains.Item(record1)); - Assert.That(searchResult, Contains.Item(record2)); - } - - [Test] - public void Search_WithSkipAndLimit_ShouldReturnLimitedRecords() - { - var dataPath = "data/Search_WithSkipAndLimit_ShouldReturnLimitedRecords"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var searchEngine = new HashedSearchEngine(dataPath); - - // Arrange - searchEngine.AddRecord(1, "record one"); - searchEngine.AddRecord(2, "record two"); - searchEngine.AddRecord(3, "record three"); - - // Act - var searchResult = searchEngine.SimpleSearch("record", skip: 1, limit: 1); - - // Assert - Assert.That(searchResult.Length, Is.EqualTo(1)); - Assert.That(searchResult[0], Is.EqualTo(2)); - } - - [Test] - public void Dispose_ShouldDisposeIndexProperly() - { - // Arrange - var dataPath = "data/Dispose_ShouldDisposeIndexProperly"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - - using var searchEngine = new HashedSearchEngine(dataPath); - searchEngine.AddRecord(1, "sample"); - - // Act - searchEngine.Dispose(); - - // Assert - Assert.That(searchEngine.Index.IsReadOnly, Is.True); - Assert.Throws(() => searchEngine.AddRecord(2, "abc")); - } + [Test] + public void AddRecord_ShouldAddRecordToIndex() + { + var dataPath = "data/AddRecord_ShouldAddRecordToIndex"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine( + dataPath); + + // Arrange + int record = 1; + string text = "sample text"; + + // Act + searchEngine.AddRecord(record, text); + + // Assert + var searchResult = searchEngine.SimpleSearch("sample"); + Assert.That(searchResult, Is.Not.Empty); + Assert.That(searchResult, Contains.Item(record)); + } + + [Test] + public void DeleteRecord_ShouldRemoveRecordFromIndex() + { + var dataPath = "data/DeleteRecord_ShouldRemoveRecordFromIndex"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Arrange + int record = 1; + string text = "sample text"; + + // Act + searchEngine.AddRecord(record, text); + long deletedCount = searchEngine.DeleteRecord(record); + + // Assert + Assert.That(deletedCount, Is.EqualTo(2)); + var searchResult = searchEngine.SimpleSearch("sample"); + Assert.That(searchResult, Is.Empty); + } + + [Test] + public void Search_WithRespectTokenOrderTrue_ShouldReturnRecords() + { + var dataPath = "data/Search_WithRespectTokenOrderTrue_ShouldReturnRecords"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Arrange + int record1 = 1; + int record2 = 2; + searchEngine.AddRecord(record1, "quick brown fox"); + searchEngine.AddRecord(record2, "brown fox jumps"); + + // Act + var searchResult = searchEngine.SimpleSearch("brown fox"); + + // Assert + Assert.That(searchResult.Length, Is.EqualTo(2)); + Assert.That(searchResult, Contains.Item(record1)); + Assert.That(searchResult, Contains.Item(record2)); + } + + [Test] + public void Search_WithRespectTokenOrderFalse_ShouldReturnRecordsRegardlessOfOrder() + { + var dataPath = "data/Search_WithRespectTokenOrderFalse_ShouldReturnRecordsRegardlessOfOrder"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Arrange + int record1 = 1; + int record2 = 2; + searchEngine.AddRecord(record1, "quick brown fox"); + searchEngine.AddRecord(record2, "fox brown jumps"); + + // Act + var searchResult = searchEngine.SimpleSearch("brown fox", respectTokenOrder: false); + + // Assert + Assert.That(searchResult.Length, Is.EqualTo(2)); + Assert.That(searchResult, Contains.Item(record1)); + Assert.That(searchResult, Contains.Item(record2)); + } + + [Test] + public void Search_WithSkipAndLimit_ShouldReturnLimitedRecords() + { + var dataPath = "data/Search_WithSkipAndLimit_ShouldReturnLimitedRecords"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var searchEngine = new HashedSearchEngine(dataPath); + + // Arrange + searchEngine.AddRecord(1, "record one"); + searchEngine.AddRecord(2, "record two"); + searchEngine.AddRecord(3, "record three"); + + // Act + var searchResult = searchEngine.SimpleSearch("record", skip: 1, limit: 1); + + // Assert + Assert.That(searchResult.Length, Is.EqualTo(1)); + Assert.That(searchResult[0], Is.EqualTo(2)); + } + + [Test] + public void Dispose_ShouldDisposeIndexProperly() + { + // Arrange + var dataPath = "data/Dispose_ShouldDisposeIndexProperly"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + + using var searchEngine = new HashedSearchEngine(dataPath); + searchEngine.AddRecord(1, "sample"); + + // Act + searchEngine.Dispose(); + + // Assert + Assert.That(searchEngine.Index.IsReadOnly, Is.True); + Assert.Throws(() => searchEngine.AddRecord(2, "abc")); + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs index fc1c6e4..5c4a213 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/RecordTableTests.cs @@ -4,118 +4,118 @@ namespace ZoneTree.FullTextSearch.UnitTests; public sealed class RecordTableTests { - [Test] - public void UpsertRecord_ShouldInsertRecordAndValueIntoBothZoneTrees() - { - var dataPath = "data/UpsertRecord_ShouldInsertRecordAndValueIntoBothZoneTrees"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var recordTable = new RecordTable(dataPath); - - // Arrange - int record = 1; - string value = "Value1"; - - // Act - recordTable.UpsertRecord(record, value); - - // Assert - Assert.That(recordTable.TryGetValue(record, out var retrievedValue), Is.True); - Assert.That(recordTable.TryGetRecord(value, out var retrievedRecord), Is.True); - Assert.That(retrievedValue, Is.EqualTo("Value1")); - Assert.That(retrievedRecord, Is.EqualTo(record)); - } - - [Test] - public void GetLastRecord_ShouldReturnLastInsertedRecord() - { - var dataPath = "data/GetLastRecord_ShouldReturnLastInsertedRecord"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var recordTable = new RecordTable(dataPath); - - // Arrange - int record1 = 1; - int record2 = 2; - string value1 = "Value1"; - string value2 = "Value2"; - - // Act - recordTable.UpsertRecord(record1, value1); - recordTable.UpsertRecord(record2, value2); - var lastRecord = recordTable.GetLastRecord(); - - // Assert - Assert.That(lastRecord, Is.EqualTo(record2)); - } - - [Test] - public void GetValue_ShouldReturnAssociatedValueForRecord() - { - var dataPath = "data/GetValue_ShouldReturnAssociatedValueForRecord"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var recordTable = new RecordTable(dataPath); - - // Arrange - int record = 1; - string value = "Value1"; - - // Act - recordTable.UpsertRecord(record, value); - recordTable.TryGetValue(record, out var retrievedValue); - - // Assert - Assert.That(retrievedValue, Is.EqualTo(value)); - } - - [Test] - public void TryGetRecord_ShouldReturnTrueAndRecordWhenValueExists() - { - var dataPath = "data/TryGetRecord_ShouldReturnTrueAndRecordWhenValueExists"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var recordTable = new RecordTable(dataPath); - - // Arrange - int record = 1; - string value = "Value1"; - - // Act - recordTable.UpsertRecord(record, value); - bool found = recordTable.TryGetRecord(value, out int retrievedRecord); - - // Assert - Assert.That(found, Is.True); - Assert.That(retrievedRecord, Is.EqualTo(record)); - } - - [Test] - public void TryGetRecord_ShouldReturnFalseWhenValueDoesNotExist() - { - var dataPath = "data/TryGetRecord_ShouldReturnFalseWhenValueDoesNotExist"; - if (Directory.Exists(dataPath)) - Directory.Delete(dataPath, true); - using var recordTable = new RecordTable(dataPath); - - // Act - bool found = recordTable.TryGetRecord("NonExistentValue", out int _); - - // Assert - Assert.That(found, Is.False); - } - - [Test] - public void Dispose_ShouldDisposeBothZoneTreesAndMaintainers() - { - // Arrange - var recordTable = new RecordTable("data/Dispose_ShouldDisposeBothZoneTreesAndMaintainers"); - recordTable.UpsertRecord(1, "Value1"); - - // Act - recordTable.Dispose(); - - // Assert - Assert.Throws(() => recordTable.UpsertRecord(2, "Value2")); - } + [Test] + public void UpsertRecord_ShouldInsertRecordAndValueIntoBothZoneTrees() + { + var dataPath = "data/UpsertRecord_ShouldInsertRecordAndValueIntoBothZoneTrees"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var recordTable = new RecordTable(dataPath); + + // Arrange + int record = 1; + string value = "Value1"; + + // Act + recordTable.UpsertRecord(record, value); + + // Assert + Assert.That(recordTable.TryGetValue(record, out var retrievedValue), Is.True); + Assert.That(recordTable.TryGetRecord(value, out var retrievedRecord), Is.True); + Assert.That(retrievedValue, Is.EqualTo("Value1")); + Assert.That(retrievedRecord, Is.EqualTo(record)); + } + + [Test] + public void GetLastRecord_ShouldReturnLastInsertedRecord() + { + var dataPath = "data/GetLastRecord_ShouldReturnLastInsertedRecord"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var recordTable = new RecordTable(dataPath); + + // Arrange + int record1 = 1; + int record2 = 2; + string value1 = "Value1"; + string value2 = "Value2"; + + // Act + recordTable.UpsertRecord(record1, value1); + recordTable.UpsertRecord(record2, value2); + var lastRecord = recordTable.GetLastRecord(); + + // Assert + Assert.That(lastRecord, Is.EqualTo(record2)); + } + + [Test] + public void GetValue_ShouldReturnAssociatedValueForRecord() + { + var dataPath = "data/GetValue_ShouldReturnAssociatedValueForRecord"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var recordTable = new RecordTable(dataPath); + + // Arrange + int record = 1; + string value = "Value1"; + + // Act + recordTable.UpsertRecord(record, value); + recordTable.TryGetValue(record, out var retrievedValue); + + // Assert + Assert.That(retrievedValue, Is.EqualTo(value)); + } + + [Test] + public void TryGetRecord_ShouldReturnTrueAndRecordWhenValueExists() + { + var dataPath = "data/TryGetRecord_ShouldReturnTrueAndRecordWhenValueExists"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var recordTable = new RecordTable(dataPath); + + // Arrange + int record = 1; + string value = "Value1"; + + // Act + recordTable.UpsertRecord(record, value); + bool found = recordTable.TryGetRecord(value, out int retrievedRecord); + + // Assert + Assert.That(found, Is.True); + Assert.That(retrievedRecord, Is.EqualTo(record)); + } + + [Test] + public void TryGetRecord_ShouldReturnFalseWhenValueDoesNotExist() + { + var dataPath = "data/TryGetRecord_ShouldReturnFalseWhenValueDoesNotExist"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + using var recordTable = new RecordTable(dataPath); + + // Act + bool found = recordTable.TryGetRecord("NonExistentValue", out int _); + + // Assert + Assert.That(found, Is.False); + } + + [Test] + public void Dispose_ShouldDisposeBothZoneTreesAndMaintainers() + { + // Arrange + var recordTable = new RecordTable("data/Dispose_ShouldDisposeBothZoneTreesAndMaintainers"); + recordTable.UpsertRecord(1, "Value1"); + + // Act + recordTable.Dispose(); + + // Assert + Assert.Throws(() => recordTable.UpsertRecord(2, "Value2")); + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/SearchQueryTests.cs b/src/ZoneTree.FullTextSearch.UnitTests/SearchQueryTests.cs index 29b626c..a21ace2 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/SearchQueryTests.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/SearchQueryTests.cs @@ -1,4 +1,4 @@ -using ZoneTree.FullTextSearch.Search; +using ZoneTree.FullTextSearch.Search; using ZoneTree.FullTextSearch.Hashing; using ZoneTree.FullTextSearch.Tokenizer; @@ -6,190 +6,190 @@ namespace ZoneTree.FullTextSearch.UnitTests; public sealed class SearchQueryTests { - [Test] - public void SingleTokenAndQueryRespectTokenOrder() - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(QueryNodeType.And); - var query = new SearchQuery(node); - node.Tokens = ["fox cat cow"]; - node.RespectTokenOrder = true; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - var hashedNode = hashedQuery.QueryNode; - - Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.And)); - Assert.That(hashedNode.HasChildren, Is.False); - Assert.That(hashedNode.HasTokens, Is.True); - var expectedHashedTokens = new[] { "fox", "cat", "cow" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); - } - - [Test] - public void SingleTokenOrQuery() - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(QueryNodeType.Or); - var query = new SearchQuery(node); - node.Tokens = ["fox cat cow"]; - node.RespectTokenOrder = false; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - var hashedNode = hashedQuery.QueryNode; - - // Edge case, single token converted into AND - Assert.That(hashedNode.NodeType, - Is.EqualTo(QueryNodeType.And)); - Assert.That(hashedNode.HasChildren, Is.False); - Assert.That(hashedNode.HasTokens, Is.True); - Assert.That(hashedNode.RespectTokenOrder, Is.True); - var expectedHashedTokens = new[] { "fox", "cat", "cow" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); - } - - [Test] - public void DoubleTokensOrQuery() - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(QueryNodeType.Or); - var query = new SearchQuery(node); - node.Tokens = ["fox cat cow", "lion dog wolf"]; - node.RespectTokenOrder = true; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - - var hashedNode = hashedQuery.QueryNode; - Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.Or)); - Assert.That(hashedNode.HasChildren, Is.True); - Assert.That(hashedNode.HasTokens, Is.False); - Assert.That(hashedNode.RespectTokenOrder, Is.True); - - Assert.That(hashedNode.Children.Length, Is.EqualTo(2)); - - var child1 = hashedNode.Children[0]; - var child2 = hashedNode.Children[1]; - - Assert.That(child1.NodeType, Is.EqualTo(QueryNodeType.And)); - Assert.That(child2.NodeType, Is.EqualTo(QueryNodeType.And)); - Assert.That(child1.RespectTokenOrder, Is.True); - Assert.That(child2.RespectTokenOrder, Is.True); - - var expectedHashedTokens1 = new[] { "fox", "cat", "cow" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(child1.Tokens, Is.EqualTo(expectedHashedTokens1)); - - var expectedHashedTokens2 = new[] { "lion", "dog", "wolf" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(child2.Tokens, Is.EqualTo(expectedHashedTokens2)); - } - - [TestCase(QueryNodeType.And)] - [TestCase(QueryNodeType.Not)] - public void DoubleTokensQuery(QueryNodeType nodeType) - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(nodeType); - var query = new SearchQuery(node); - node.Tokens = ["fox cat cow", "lion dog wolf"]; - node.RespectTokenOrder = false; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - - var hashedNode = hashedQuery.QueryNode; - Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.And)); - Assert.That(hashedNode.HasChildren, Is.True); - Assert.That(hashedNode.HasTokens, Is.False); - Assert.That(hashedNode.RespectTokenOrder, Is.False); - - Assert.That(hashedNode.Children.Length, Is.EqualTo(2)); - - var child1 = hashedNode.Children[0]; - var child2 = hashedNode.Children[1]; - - Assert.That(child1.NodeType, Is.EqualTo(nodeType)); - Assert.That(child2.NodeType, Is.EqualTo(nodeType)); - Assert.That(child1.RespectTokenOrder, Is.True); - Assert.That(child2.RespectTokenOrder, Is.True); - - var expectedHashedTokens1 = new[] { "fox", "cat", "cow" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(child1.Tokens, Is.EqualTo(expectedHashedTokens1)); - - var expectedHashedTokens2 = new[] { "lion", "dog", "wolf" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(child2.Tokens, Is.EqualTo(expectedHashedTokens2)); - } - - [TestCase(QueryNodeType.And)] - [TestCase(QueryNodeType.Not)] - public void DoubleTokensQueryRespectTokenOrder(QueryNodeType nodeType) - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(nodeType); - var query = new SearchQuery(node); - node.Tokens = ["fox cat cow", "lion dog wolf"]; - node.RespectTokenOrder = true; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - - var hashedNode = hashedQuery.QueryNode; - Assert.That(hashedNode.NodeType, Is.EqualTo(nodeType)); - Assert.That(hashedNode.HasChildren, Is.False); - Assert.That(hashedNode.HasTokens, Is.True); - Assert.That(hashedNode.RespectTokenOrder, Is.True); - - Assert.That(hashedNode.Tokens.Length, Is.EqualTo(6)); - - var expectedHashedTokens1 = new[] { "fox", "cat", "cow", "lion", "dog", "wolf" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens1)); - } - - [TestCase(QueryNodeType.Or)] - [TestCase(QueryNodeType.And)] - [TestCase(QueryNodeType.Not)] - public void TripleTokenQuery(QueryNodeType nodeType) - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(nodeType); - var query = new SearchQuery(node); - node.Tokens = ["fox", "cat", "cow"]; - node.RespectTokenOrder = true; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - var hashedNode = hashedQuery.QueryNode; - - Assert.That(hashedNode.NodeType, - Is.EqualTo(nodeType)); - Assert.That(hashedNode.HasChildren, Is.False); - Assert.That(hashedNode.HasTokens, Is.True); - Assert.That(hashedNode.RespectTokenOrder, Is.True); - var expectedHashedTokens = new[] { "fox", "cat", "cow" } - .Select(hashGenerator.GetHashCode) - .ToArray(); - Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); - } - - [TestCase(QueryNodeType.Or)] - [TestCase(QueryNodeType.And)] - [TestCase(QueryNodeType.Not)] - public void ComplexQuery(QueryNodeType rootType) - { - var hashGenerator = new DefaultHashCodeGenerator(); - var tokenizer = new WordTokenizer(); - var node = new QueryNode(rootType); - var query = new SearchQuery(node); - node.Children = [ - new QueryNode(QueryNodeType.And) { + [Test] + public void SingleTokenAndQueryRespectTokenOrder() + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(QueryNodeType.And); + var query = new SearchQuery(node); + node.Tokens = ["fox cat cow"]; + node.RespectTokenOrder = true; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + var hashedNode = hashedQuery.QueryNode; + + Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.And)); + Assert.That(hashedNode.HasChildren, Is.False); + Assert.That(hashedNode.HasTokens, Is.True); + var expectedHashedTokens = new[] { "fox", "cat", "cow" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); + } + + [Test] + public void SingleTokenOrQuery() + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(QueryNodeType.Or); + var query = new SearchQuery(node); + node.Tokens = ["fox cat cow"]; + node.RespectTokenOrder = false; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + var hashedNode = hashedQuery.QueryNode; + + // Edge case, single token converted into AND + Assert.That(hashedNode.NodeType, + Is.EqualTo(QueryNodeType.And)); + Assert.That(hashedNode.HasChildren, Is.False); + Assert.That(hashedNode.HasTokens, Is.True); + Assert.That(hashedNode.RespectTokenOrder, Is.True); + var expectedHashedTokens = new[] { "fox", "cat", "cow" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); + } + + [Test] + public void DoubleTokensOrQuery() + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(QueryNodeType.Or); + var query = new SearchQuery(node); + node.Tokens = ["fox cat cow", "lion dog wolf"]; + node.RespectTokenOrder = true; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + + var hashedNode = hashedQuery.QueryNode; + Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.Or)); + Assert.That(hashedNode.HasChildren, Is.True); + Assert.That(hashedNode.HasTokens, Is.False); + Assert.That(hashedNode.RespectTokenOrder, Is.True); + + Assert.That(hashedNode.Children.Length, Is.EqualTo(2)); + + var child1 = hashedNode.Children[0]; + var child2 = hashedNode.Children[1]; + + Assert.That(child1.NodeType, Is.EqualTo(QueryNodeType.And)); + Assert.That(child2.NodeType, Is.EqualTo(QueryNodeType.And)); + Assert.That(child1.RespectTokenOrder, Is.True); + Assert.That(child2.RespectTokenOrder, Is.True); + + var expectedHashedTokens1 = new[] { "fox", "cat", "cow" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(child1.Tokens, Is.EqualTo(expectedHashedTokens1)); + + var expectedHashedTokens2 = new[] { "lion", "dog", "wolf" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(child2.Tokens, Is.EqualTo(expectedHashedTokens2)); + } + + [TestCase(QueryNodeType.And)] + [TestCase(QueryNodeType.Not)] + public void DoubleTokensQuery(QueryNodeType nodeType) + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(nodeType); + var query = new SearchQuery(node); + node.Tokens = ["fox cat cow", "lion dog wolf"]; + node.RespectTokenOrder = false; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + + var hashedNode = hashedQuery.QueryNode; + Assert.That(hashedNode.NodeType, Is.EqualTo(QueryNodeType.And)); + Assert.That(hashedNode.HasChildren, Is.True); + Assert.That(hashedNode.HasTokens, Is.False); + Assert.That(hashedNode.RespectTokenOrder, Is.False); + + Assert.That(hashedNode.Children.Length, Is.EqualTo(2)); + + var child1 = hashedNode.Children[0]; + var child2 = hashedNode.Children[1]; + + Assert.That(child1.NodeType, Is.EqualTo(nodeType)); + Assert.That(child2.NodeType, Is.EqualTo(nodeType)); + Assert.That(child1.RespectTokenOrder, Is.True); + Assert.That(child2.RespectTokenOrder, Is.True); + + var expectedHashedTokens1 = new[] { "fox", "cat", "cow" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(child1.Tokens, Is.EqualTo(expectedHashedTokens1)); + + var expectedHashedTokens2 = new[] { "lion", "dog", "wolf" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(child2.Tokens, Is.EqualTo(expectedHashedTokens2)); + } + + [TestCase(QueryNodeType.And)] + [TestCase(QueryNodeType.Not)] + public void DoubleTokensQueryRespectTokenOrder(QueryNodeType nodeType) + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(nodeType); + var query = new SearchQuery(node); + node.Tokens = ["fox cat cow", "lion dog wolf"]; + node.RespectTokenOrder = true; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + + var hashedNode = hashedQuery.QueryNode; + Assert.That(hashedNode.NodeType, Is.EqualTo(nodeType)); + Assert.That(hashedNode.HasChildren, Is.False); + Assert.That(hashedNode.HasTokens, Is.True); + Assert.That(hashedNode.RespectTokenOrder, Is.True); + + Assert.That(hashedNode.Tokens.Length, Is.EqualTo(6)); + + var expectedHashedTokens1 = new[] { "fox", "cat", "cow", "lion", "dog", "wolf" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens1)); + } + + [TestCase(QueryNodeType.Or)] + [TestCase(QueryNodeType.And)] + [TestCase(QueryNodeType.Not)] + public void TripleTokenQuery(QueryNodeType nodeType) + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(nodeType); + var query = new SearchQuery(node); + node.Tokens = ["fox", "cat", "cow"]; + node.RespectTokenOrder = true; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + var hashedNode = hashedQuery.QueryNode; + + Assert.That(hashedNode.NodeType, + Is.EqualTo(nodeType)); + Assert.That(hashedNode.HasChildren, Is.False); + Assert.That(hashedNode.HasTokens, Is.True); + Assert.That(hashedNode.RespectTokenOrder, Is.True); + var expectedHashedTokens = new[] { "fox", "cat", "cow" } + .Select(hashGenerator.GetHashCode) + .ToArray(); + Assert.That(hashedNode.Tokens, Is.EqualTo(expectedHashedTokens)); + } + + [TestCase(QueryNodeType.Or)] + [TestCase(QueryNodeType.And)] + [TestCase(QueryNodeType.Not)] + public void ComplexQuery(QueryNodeType rootType) + { + var hashGenerator = new DefaultHashCodeGenerator(); + var tokenizer = new WordTokenizer(); + var node = new QueryNode(rootType); + var query = new SearchQuery(node); + node.Children = [ + new QueryNode(QueryNodeType.And) { Tokens = ["fox cat", "cow"], RespectTokenOrder = false, }, @@ -210,20 +210,20 @@ public void ComplexQuery(QueryNodeType rootType) RespectTokenOrder = false, }, ]; - node.RespectTokenOrder = true; + node.RespectTokenOrder = true; - var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); - var hashedNode = hashedQuery.QueryNode; + var hashedQuery = HashedSearchQueryFactory.FromStringSearchQuery(query, hashGenerator, tokenizer); + var hashedNode = hashedQuery.QueryNode; - Assert.That(hashedNode.NodeType, Is.EqualTo(rootType)); - Assert.That(hashedNode.HasChildren, Is.True); - Assert.That(hashedNode.HasTokens, Is.False); - Assert.That(hashedNode.RespectTokenOrder, Is.True); + Assert.That(hashedNode.NodeType, Is.EqualTo(rootType)); + Assert.That(hashedNode.HasChildren, Is.True); + Assert.That(hashedNode.HasTokens, Is.False); + Assert.That(hashedNode.RespectTokenOrder, Is.True); - ulong[] Hash(string[] words) => words.Select(x => hashGenerator.GetHashCode(x)).ToArray(); + ulong[] Hash(string[] words) => words.Select(x => hashGenerator.GetHashCode(x)).ToArray(); - QueryNode[] expectedChildren = [ - new QueryNode(QueryNodeType.And) { + QueryNode[] expectedChildren = [ + new QueryNode(QueryNodeType.And) { Children = [ new QueryNode(QueryNodeType.And) { Tokens = Hash(["fox", "cat"]), @@ -268,36 +268,36 @@ public void ComplexQuery(QueryNodeType rootType) RespectTokenOrder = false }, ]; - AssertChildrenAreEqual(hashedNode.Children, expectedChildren); + AssertChildrenAreEqual(hashedNode.Children, expectedChildren); + } + + void AssertNodesAreEqual(QueryNode given, QueryNode expected) + { + Assert.That(given.NodeType, Is.EqualTo(expected.NodeType)); + Assert.That(given.RespectTokenOrder, Is.EqualTo(expected.RespectTokenOrder)); + Assert.That(given.IsFacetNode, Is.EqualTo(expected.IsFacetNode)); + Assert.That(given.HasTokens, Is.EqualTo(expected.HasTokens)); + Assert.That(given.HasChildren, Is.EqualTo(expected.HasChildren)); + Assert.That(given.FirstLookAt, Is.EqualTo(expected.FirstLookAt)); + Assert.That(given.IsFacetNode, Is.EqualTo(expected.IsFacetNode)); + if (given.HasTokens) + { + Assert.That(given.Tokens, Is.EqualTo(expected.Tokens)); } - - void AssertNodesAreEqual(QueryNode given, QueryNode expected) + if (given.HasChildren) { - Assert.That(given.NodeType, Is.EqualTo(expected.NodeType)); - Assert.That(given.RespectTokenOrder, Is.EqualTo(expected.RespectTokenOrder)); - Assert.That(given.IsFacetNode, Is.EqualTo(expected.IsFacetNode)); - Assert.That(given.HasTokens, Is.EqualTo(expected.HasTokens)); - Assert.That(given.HasChildren, Is.EqualTo(expected.HasChildren)); - Assert.That(given.FirstLookAt, Is.EqualTo(expected.FirstLookAt)); - Assert.That(given.IsFacetNode, Is.EqualTo(expected.IsFacetNode)); - if (given.HasTokens) - { - Assert.That(given.Tokens, Is.EqualTo(expected.Tokens)); - } - if (given.HasChildren) - { - AssertChildrenAreEqual(given.Children, expected.Children); - } + AssertChildrenAreEqual(given.Children, expected.Children); } + } - void AssertChildrenAreEqual(QueryNode[] given, QueryNode[] expected) + void AssertChildrenAreEqual(QueryNode[] given, QueryNode[] expected) + { + Assert.That(given.Length, Is.EqualTo(expected.Length)); + for (var i = 0; i < given.Length; ++i) { - Assert.That(given.Length, Is.EqualTo(expected.Length)); - for (var i = 0; i < given.Length; ++i) - { - var givenNode = given[i]; - var expectedNode = expected[i]; - AssertNodesAreEqual(givenNode, expectedNode); - } + var givenNode = given[i]; + var expectedNode = expected[i]; + AssertNodesAreEqual(givenNode, expectedNode); } + } } diff --git a/src/ZoneTree.FullTextSearch.UnitTests/sampleData/ProductList.cs b/src/ZoneTree.FullTextSearch.UnitTests/sampleData/ProductList.cs index 75225cf..7b07d7b 100644 --- a/src/ZoneTree.FullTextSearch.UnitTests/sampleData/ProductList.cs +++ b/src/ZoneTree.FullTextSearch.UnitTests/sampleData/ProductList.cs @@ -1,46 +1,46 @@ -namespace ZoneTree.FullTextSearch.UnitTests.sampleData; +namespace ZoneTree.FullTextSearch.UnitTests.sampleData; public sealed class Product { - public long Id { get; set; } - public string Name { get; set; } - public string Category { get; set; } - public decimal Price { get; set; } - public string Brand { get; set; } - public double Rating { get; set; } - public Facets Facets { get; set; } - public string Description { get; set; } + public long Id { get; set; } + public string Name { get; set; } + public string Category { get; set; } + public decimal Price { get; set; } + public string Brand { get; set; } + public double Rating { get; set; } + public Facets Facets { get; set; } + public string Description { get; set; } - public override string ToString() - { - return $"Product: {Id} {Name} {Category} {Price} {Brand} {Rating} {Description}"; - } + public override string ToString() + { + return $"Product: {Id} {Name} {Category} {Price} {Brand} {Rating} {Description}"; + } } public sealed class Facets { - public string Color { get; set; } - public string Connectivity { get; set; } - public string BatteryLife { get; set; } - public string[] Features { get; set; } - public string ScreenSize { get; set; } - public string Resolution { get; set; } - public string[] SmartFeatures { get; set; } - public string[] Ports { get; set; } - public string Capacity { get; set; } - public string EnergyEfficiency { get; set; } - public string Processor { get; set; } - public string Ram { get; set; } - public string Storage { get; set; } - public string GraphicsCard { get; set; } - public string Pressure { get; set; } + public string Color { get; set; } + public string Connectivity { get; set; } + public string BatteryLife { get; set; } + public string[] Features { get; set; } + public string ScreenSize { get; set; } + public string Resolution { get; set; } + public string[] SmartFeatures { get; set; } + public string[] Ports { get; set; } + public string Capacity { get; set; } + public string EnergyEfficiency { get; set; } + public string Processor { get; set; } + public string Ram { get; set; } + public string Storage { get; set; } + public string GraphicsCard { get; set; } + public string Pressure { get; set; } } public static class ProductList { - public static Product[] Products = - [ - new Product + public static Product[] Products = + [ + new Product { Id = 1, Name = "Wireless Noise Cancelling Headphones", diff --git a/src/ZoneTree.FullTextSearch/Hashing/DefaultHashCodeGenerator.cs b/src/ZoneTree.FullTextSearch/Hashing/DefaultHashCodeGenerator.cs index db3724d..c3b2a6b 100644 --- a/src/ZoneTree.FullTextSearch/Hashing/DefaultHashCodeGenerator.cs +++ b/src/ZoneTree.FullTextSearch/Hashing/DefaultHashCodeGenerator.cs @@ -1,4 +1,4 @@ -using ZoneTree.FullTextSearch.Normalizers; +using ZoneTree.FullTextSearch.Normalizers; namespace ZoneTree.FullTextSearch.Hashing; @@ -9,44 +9,44 @@ namespace ZoneTree.FullTextSearch.Hashing; /// public sealed class DefaultHashCodeGenerator : IHashCodeGenerator { - /// - /// Generates a hash code for the specified read-only span of characters using a custom algorithm. - /// The hash code is case-insensitive and returns 0 for spans that contain only whitespace. - /// - /// The read-only span of characters to hash. - /// A 64-bit unsigned integer representing the hash code of the input span. - public ulong GetHashCode(ReadOnlySpan text) + /// + /// Generates a hash code for the specified read-only span of characters using a custom algorithm. + /// The hash code is case-insensitive and returns 0 for spans that contain only whitespace. + /// + /// The read-only span of characters to hash. + /// A 64-bit unsigned integer representing the hash code of the input span. + public ulong GetHashCode(ReadOnlySpan text) + { + if (text.IsWhiteSpace()) return 0; + var hashedValue = 3074457345618258791ul; + var len = text.Length; + for (var i = 0; i < len; i++) { - if (text.IsWhiteSpace()) return 0; - var hashedValue = 3074457345618258791ul; - var len = text.Length; - for (var i = 0; i < len; i++) - { - hashedValue += char.ToLowerInvariant(text[i]); - hashedValue *= 3074457345618258799ul; - } - return hashedValue; + hashedValue += char.ToLowerInvariant(text[i]); + hashedValue *= 3074457345618258799ul; } + return hashedValue; + } - /// - /// Generates a hash code for the specified string using a custom algorithm. - /// The hash code is case-insensitive and returns 0 for null or empty strings. - /// - /// The input string to hash. - /// A 64-bit unsigned integer representing the hash code of the input string. - public ulong GetHashCode(string text) - { - return GetHashCode(text.AsSpan()); - } + /// + /// Generates a hash code for the specified string using a custom algorithm. + /// The hash code is case-insensitive and returns 0 for null or empty strings. + /// + /// The input string to hash. + /// A 64-bit unsigned integer representing the hash code of the input string. + public ulong GetHashCode(string text) + { + return GetHashCode(text.AsSpan()); + } - /// - /// Generates a hash code for the specified read-only memory of characters using a custom algorithm. - /// The hash code is case-insensitive and returns 0 for memory that contains only whitespace. - /// - /// The read-only memory of characters to hash. - /// A 64-bit unsigned integer representing the hash code of the input memory. - public ulong GetHashCode(ReadOnlyMemory text) - { - return GetHashCode(text.Span); - } + /// + /// Generates a hash code for the specified read-only memory of characters using a custom algorithm. + /// The hash code is case-insensitive and returns 0 for memory that contains only whitespace. + /// + /// The read-only memory of characters to hash. + /// A 64-bit unsigned integer representing the hash code of the input memory. + public ulong GetHashCode(ReadOnlyMemory text) + { + return GetHashCode(text.Span); + } } diff --git a/src/ZoneTree.FullTextSearch/Hashing/IHashCodeGenerator.cs b/src/ZoneTree.FullTextSearch/Hashing/IHashCodeGenerator.cs index 0ec38de..5f5124e 100644 --- a/src/ZoneTree.FullTextSearch/Hashing/IHashCodeGenerator.cs +++ b/src/ZoneTree.FullTextSearch/Hashing/IHashCodeGenerator.cs @@ -1,28 +1,28 @@ -namespace ZoneTree.FullTextSearch.Hashing; +namespace ZoneTree.FullTextSearch.Hashing; /// /// Interface for generating hash codes from various text inputs. /// public interface IHashCodeGenerator { - /// - /// Generates a hash code for a read-only span of characters. - /// - /// The span of characters to hash. - /// A 64-bit unsigned integer representing the hash code. - ulong GetHashCode(ReadOnlySpan text); + /// + /// Generates a hash code for a read-only span of characters. + /// + /// The span of characters to hash. + /// A 64-bit unsigned integer representing the hash code. + ulong GetHashCode(ReadOnlySpan text); - /// - /// Generates a hash code for a string. - /// - /// The string to hash. - /// A 64-bit unsigned integer representing the hash code. - ulong GetHashCode(string text); + /// + /// Generates a hash code for a string. + /// + /// The string to hash. + /// A 64-bit unsigned integer representing the hash code. + ulong GetHashCode(string text); - /// - /// Generates a hash code for a read-only memory of characters. - /// - /// The memory of characters to hash. - /// A 64-bit unsigned integer representing the hash code. - ulong GetHashCode(ReadOnlyMemory text); -} \ No newline at end of file + /// + /// Generates a hash code for a read-only memory of characters. + /// + /// The memory of characters to hash. + /// A 64-bit unsigned integer representing the hash code. + ulong GetHashCode(ReadOnlyMemory text); +} diff --git a/src/ZoneTree.FullTextSearch/Hashing/NormalizableHashCodeGenerator.cs b/src/ZoneTree.FullTextSearch/Hashing/NormalizableHashCodeGenerator.cs index e094210..bc16ffa 100644 --- a/src/ZoneTree.FullTextSearch/Hashing/NormalizableHashCodeGenerator.cs +++ b/src/ZoneTree.FullTextSearch/Hashing/NormalizableHashCodeGenerator.cs @@ -1,4 +1,4 @@ -using ZoneTree.FullTextSearch.Normalizers; +using ZoneTree.FullTextSearch.Normalizers; namespace ZoneTree.FullTextSearch.Hashing; @@ -8,79 +8,79 @@ namespace ZoneTree.FullTextSearch.Hashing; /// public sealed class NormalizableHashCodeGenerator : IHashCodeGenerator { - /// - /// Indicates whether the hash generation is case-sensitive. - /// - readonly bool IsCaseSensitive; + /// + /// Indicates whether the hash generation is case-sensitive. + /// + readonly bool IsCaseSensitive; - /// - /// Optional character normalizer used to normalize characters before hashing. - /// - readonly ICharNormalizer CharNormalizer; + /// + /// Optional character normalizer used to normalize characters before hashing. + /// + readonly ICharNormalizer CharNormalizer; - /// - /// Initializes a new instance of the class. - /// - /// An optional character normalizer to apply before hashing. - /// Determines whether the hash generation is case-sensitive. Default is false. - public NormalizableHashCodeGenerator( - ICharNormalizer charNormalizer = null, - bool caseSensitive = false) - { - IsCaseSensitive = caseSensitive; - CharNormalizer = charNormalizer; - } + /// + /// Initializes a new instance of the class. + /// + /// An optional character normalizer to apply before hashing. + /// Determines whether the hash generation is case-sensitive. Default is false. + public NormalizableHashCodeGenerator( + ICharNormalizer charNormalizer = null, + bool caseSensitive = false) + { + IsCaseSensitive = caseSensitive; + CharNormalizer = charNormalizer; + } - /// - /// Generates a hash code for the specified read-only span of characters using a custom algorithm. - /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. - /// Returns 0 for spans that contain only whitespace. - /// - /// The read-only span of characters to hash. - /// A 64-bit unsigned integer representing the hash code of the input span. - public ulong GetHashCode(ReadOnlySpan text) - { - if (text.IsWhiteSpace()) return 0; + /// + /// Generates a hash code for the specified read-only span of characters using a custom algorithm. + /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. + /// Returns 0 for spans that contain only whitespace. + /// + /// The read-only span of characters to hash. + /// A 64-bit unsigned integer representing the hash code of the input span. + public ulong GetHashCode(ReadOnlySpan text) + { + if (text.IsWhiteSpace()) return 0; - var hashedValue = 3074457345618258791ul; - var len = text.Length; - for (var i = 0; i < len; i++) - { - var character = text[i]; + var hashedValue = 3074457345618258791ul; + var len = text.Length; + for (var i = 0; i < len; i++) + { + var character = text[i]; - if (CharNormalizer != null) - character = CharNormalizer.Normalize(character); + if (CharNormalizer != null) + character = CharNormalizer.Normalize(character); - if (!IsCaseSensitive) - character = char.ToLowerInvariant(character); + if (!IsCaseSensitive) + character = char.ToLowerInvariant(character); - hashedValue += character; - hashedValue *= 3074457345618258799ul; - } - return hashedValue; + hashedValue += character; + hashedValue *= 3074457345618258799ul; } + return hashedValue; + } - /// - /// Generates a hash code for the specified string using a custom algorithm. - /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. - /// Returns 0 for null or empty strings. - /// - /// The input string to hash. - /// A 64-bit unsigned integer representing the hash code of the input string. - public ulong GetHashCode(string text) - { - return GetHashCode(text.AsSpan()); - } + /// + /// Generates a hash code for the specified string using a custom algorithm. + /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. + /// Returns 0 for null or empty strings. + /// + /// The input string to hash. + /// A 64-bit unsigned integer representing the hash code of the input string. + public ulong GetHashCode(string text) + { + return GetHashCode(text.AsSpan()); + } - /// - /// Generates a hash code for the specified read-only memory of characters using a custom algorithm. - /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. - /// Returns 0 for memory that contains only whitespace. - /// - /// The read-only memory of characters to hash. - /// A 64-bit unsigned integer representing the hash code of the input memory. - public ulong GetHashCode(ReadOnlyMemory text) - { - return GetHashCode(text.Span); - } + /// + /// Generates a hash code for the specified read-only memory of characters using a custom algorithm. + /// The hash code can be case-sensitive or case-insensitive based on initialization and can apply character normalization. + /// Returns 0 for memory that contains only whitespace. + /// + /// The read-only memory of characters to hash. + /// A 64-bit unsigned integer representing the hash code of the input memory. + public ulong GetHashCode(ReadOnlyMemory text) + { + return GetHashCode(text.Span); + } } diff --git a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs index 0d11171..f858476 100644 --- a/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Index/IndexOfTokenRecordPreviousToken.cs @@ -1,4 +1,4 @@ -using ZoneTree.Comparers; +using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Model; using ZoneTree.FullTextSearch.Search; using ZoneTree.FullTextSearch.SearchEngines; diff --git a/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs b/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs index fd2dc54..884f6df 100644 --- a/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs +++ b/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs @@ -6,61 +6,67 @@ /// public sealed class FolderIterator { - /// - /// Initializes a new instance of the class with the specified path, search pattern, and recursion option. - /// - /// The path of the directory to iterate through. - /// The search pattern to match against the names of files in the directory. - /// Indicates whether the search should include subdirectories. - public FolderIterator(string path, string searchPattern, bool isRecursive) - { - Path = path; - SearchPattern = searchPattern; - IsRecursive = isRecursive; - } + /// + /// Initializes a new instance of the class with the specified path, search pattern, and recursion option. + /// + /// The path of the directory to iterate through. + /// The search pattern to match against the names of files in the directory. + /// Indicates whether the search should include subdirectories. + public FolderIterator(string path, string searchPattern, bool isRecursive) + { + Path = path; + SearchPattern = searchPattern; + IsRecursive = isRecursive; + } - /// - /// Gets the path of the directory to iterate through. - /// - public string Path { get; } + /// + /// Gets the path of the directory to iterate through. + /// + public string Path { get; } - /// - /// Gets the search pattern used to match against the names of files in the directory. - /// - public string SearchPattern { get; } + /// + /// Gets the search pattern used to match against the names of files in the directory. + /// + public string SearchPattern { get; } - /// - /// Gets a value indicating whether the iteration includes subdirectories. - /// - public bool IsRecursive { get; } + /// + /// Gets a value indicating whether the iteration includes subdirectories. + /// + public bool IsRecursive { get; } - /// - /// Asynchronously iterates through all files in the directory that match the search pattern, - /// and invokes a callback function on each file. - /// - /// A function to be invoked for each file found. The function receives the file path as an argument. - /// A task representing the asynchronous operation. - /// Thrown when is null. - public async Task IterateAll(Func callback, CancellationToken cancellationToken = default) + /// + /// Asynchronously iterates through all files in the directory that match the search pattern, + /// and invokes a callback function on each file. + /// + /// A function to be invoked for each file found. The function receives the file path as an argument. + /// A task representing the asynchronous operation. + /// Thrown when is null. + public async Task IterateAll(Func callback, CancellationToken cancellationToken = default) + { + await Task.Run(async () => { - await Task.Run(async () => + var paths = Directory.EnumerateFiles(Path, SearchPattern, new EnumerationOptions() + { + RecurseSubdirectories = true, + IgnoreInaccessible = true + }); + var tasks = new List(); + foreach (var path in paths) + { + if (cancellationToken.IsCancellationRequested) { - var paths = Directory.EnumerateFiles(Path, SearchPattern, new EnumerationOptions() - { - RecurseSubdirectories = true, - IgnoreInaccessible = true - }); - var tasks = new List(); - foreach (var path in paths) - { - if (cancellationToken.IsCancellationRequested) - { - Console.WriteLine("Cancelled the folder iteration."); - break; - } - tasks.Add(callback(path)); - } + Console.WriteLine("Cancelled the folder iteration."); + break; + +<<<<<<< TODO: Unmerged change from project 'ZoneTree.FullTextSearch(net9.0)', Before: await Task.WhenAll(tasks.ToArray()); - }); - } +======= + await Task.WhenAll(tasks.ToArray()).ConfigureAwait(false); +>>>>>>> After +} + tasks.Add(callback(path)); + } + await Task.WhenAll(tasks.ToArray()).ConfigureAwait(false); + }).ConfigureAwait(false).ConfigureAwait(false); + } } diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs index a038bfa..ba47ec4 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordToken.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; namespace ZoneTree.FullTextSearch.Model; @@ -15,40 +15,40 @@ public struct CompositeKeyOfRecordToken where TRecord : unmanaged where TToken : unmanaged { - /// - /// The record component of the composite key. This part of the key identifies the specific record. - /// - public TRecord Record; - - /// - /// The token component of the composite key. This part of the key represents the token - /// associated with the record, used to differentiate records or to index them based on the token. - /// - public TToken Token; - - public override bool Equals(object obj) - { - return obj is CompositeKeyOfRecordToken token && Equals(token); - } - - public bool Equals(CompositeKeyOfRecordToken other) - { - return EqualityComparer.Default.Equals(Record, other.Record) && - EqualityComparer.Default.Equals(Token, other.Token); - } - - public override int GetHashCode() - { - return HashCode.Combine(Record, Token); - } - - public static bool operator ==(CompositeKeyOfRecordToken left, CompositeKeyOfRecordToken right) - { - return left.Equals(right); - } - - public static bool operator !=(CompositeKeyOfRecordToken left, CompositeKeyOfRecordToken right) - { - return !(left == right); - } + /// + /// The record component of the composite key. This part of the key identifies the specific record. + /// + public TRecord Record; + + /// + /// The token component of the composite key. This part of the key represents the token + /// associated with the record, used to differentiate records or to index them based on the token. + /// + public TToken Token; + + public override bool Equals(object obj) + { + return obj is CompositeKeyOfRecordToken token && Equals(token); + } + + public bool Equals(CompositeKeyOfRecordToken other) + { + return EqualityComparer.Default.Equals(Record, other.Record) && + EqualityComparer.Default.Equals(Token, other.Token); + } + + public override int GetHashCode() + { + return HashCode.Combine(Record, Token); + } + + public static bool operator ==(CompositeKeyOfRecordToken left, CompositeKeyOfRecordToken right) + { + return left.Equals(right); + } + + public static bool operator !=(CompositeKeyOfRecordToken left, CompositeKeyOfRecordToken right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs index 7400ff4..914c21b 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfRecordTokenComparer.cs @@ -1,4 +1,4 @@ -using ZoneTree.Comparers; +using ZoneTree.Comparers; namespace ZoneTree.FullTextSearch.Model; @@ -13,52 +13,52 @@ public sealed class CompositeKeyOfRecordTokenComparer where TRecord : unmanaged where TToken : unmanaged { - /// - /// Gets the comparer used to compare the record components of the composite keys. - /// - public IRefComparer RecordComparer { get; } + /// + /// Gets the comparer used to compare the record components of the composite keys. + /// + public IRefComparer RecordComparer { get; } - /// - /// Gets the comparer used to compare the token components of the composite keys. - /// - public IRefComparer TokenComparer { get; } + /// + /// Gets the comparer used to compare the token components of the composite keys. + /// + public IRefComparer TokenComparer { get; } - /// - /// Initializes a new instance of the class. - /// - /// The comparer to use for comparing the record components. - /// The comparer to use for comparing the token components. - public CompositeKeyOfRecordTokenComparer( - IRefComparer recordComparer, - IRefComparer tokenComparer) - { - RecordComparer = recordComparer; - TokenComparer = tokenComparer; - } + /// + /// Initializes a new instance of the class. + /// + /// The comparer to use for comparing the record components. + /// The comparer to use for comparing the token components. + public CompositeKeyOfRecordTokenComparer( + IRefComparer recordComparer, + IRefComparer tokenComparer) + { + RecordComparer = recordComparer; + TokenComparer = tokenComparer; + } - /// - /// Compares two instances. - /// First, it compares the record components using . - /// If the records are equal, it then compares the token components using . - /// - /// The first composite key to compare. - /// The second composite key to compare. - /// - /// An integer indicating the relative order of the two composite keys: - /// - /// Less than zero if is less than . - /// Zero if is equal to . - /// Greater than zero if is greater than . - /// - /// - public int Compare( - in CompositeKeyOfRecordToken x, - in CompositeKeyOfRecordToken y) - { - var rc = RecordComparer.Compare(x.Record, y.Record); - if (rc != 0) return rc; - var hx = x.Token; - var hy = y.Token; - return TokenComparer.Compare(hx, hy); - } -} \ No newline at end of file + /// + /// Compares two instances. + /// First, it compares the record components using . + /// If the records are equal, it then compares the token components using . + /// + /// The first composite key to compare. + /// The second composite key to compare. + /// + /// An integer indicating the relative order of the two composite keys: + /// + /// Less than zero if is less than . + /// Zero if is equal to . + /// Greater than zero if is greater than . + /// + /// + public int Compare( + in CompositeKeyOfRecordToken x, + in CompositeKeyOfRecordToken y) + { + var rc = RecordComparer.Compare(x.Record, y.Record); + if (rc != 0) return rc; + var hx = x.Token; + var hy = y.Token; + return TokenComparer.Compare(hx, hy); + } +} diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs index 756d213..5f58c4c 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPrevious.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; namespace ZoneTree.FullTextSearch; @@ -15,48 +15,48 @@ public struct CompositeKeyOfTokenRecordPrevious where TRecord : unmanaged where TToken : unmanaged { - /// - /// The token component of the composite key. This part of the key represents the current token - /// associated with the record. - /// - public TToken Token; - - /// - /// The record component of the composite key. This part of the key identifies the specific record - /// associated with the token. - /// - public TRecord Record; - - /// - /// The previous token component of the composite key. This part of the key represents the token - /// that immediately precedes the current token in the sequence, providing context for token order. - /// - public TToken PreviousToken; - - public override bool Equals(object obj) - { - return obj is CompositeKeyOfTokenRecordPrevious previous && Equals(previous); - } - - public bool Equals(CompositeKeyOfTokenRecordPrevious other) - { - return EqualityComparer.Default.Equals(Token, other.Token) && - EqualityComparer.Default.Equals(Record, other.Record) && - EqualityComparer.Default.Equals(PreviousToken, other.PreviousToken); - } - - public override int GetHashCode() - { - return HashCode.Combine(Token, Record, PreviousToken); - } - - public static bool operator ==(CompositeKeyOfTokenRecordPrevious left, CompositeKeyOfTokenRecordPrevious right) - { - return left.Equals(right); - } - - public static bool operator !=(CompositeKeyOfTokenRecordPrevious left, CompositeKeyOfTokenRecordPrevious right) - { - return !(left == right); - } + /// + /// The token component of the composite key. This part of the key represents the current token + /// associated with the record. + /// + public TToken Token; + + /// + /// The record component of the composite key. This part of the key identifies the specific record + /// associated with the token. + /// + public TRecord Record; + + /// + /// The previous token component of the composite key. This part of the key represents the token + /// that immediately precedes the current token in the sequence, providing context for token order. + /// + public TToken PreviousToken; + + public override bool Equals(object obj) + { + return obj is CompositeKeyOfTokenRecordPrevious previous && Equals(previous); + } + + public bool Equals(CompositeKeyOfTokenRecordPrevious other) + { + return EqualityComparer.Default.Equals(Token, other.Token) && + EqualityComparer.Default.Equals(Record, other.Record) && + EqualityComparer.Default.Equals(PreviousToken, other.PreviousToken); + } + + public override int GetHashCode() + { + return HashCode.Combine(Token, Record, PreviousToken); + } + + public static bool operator ==(CompositeKeyOfTokenRecordPrevious left, CompositeKeyOfTokenRecordPrevious right) + { + return left.Equals(right); + } + + public static bool operator !=(CompositeKeyOfTokenRecordPrevious left, CompositeKeyOfTokenRecordPrevious right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs index e3194ff..6445595 100644 --- a/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs +++ b/src/ZoneTree.FullTextSearch/Model/CompositeKeyOfTokenRecordPreviousComparer.cs @@ -1,4 +1,4 @@ -using ZoneTree.Comparers; +using ZoneTree.Comparers; namespace ZoneTree.FullTextSearch.Model; @@ -13,58 +13,58 @@ public sealed class CompositeKeyOfTokenRecordPreviousComparer where TRecord : unmanaged where TToken : unmanaged { - /// - /// Gets the comparer used to compare the record components of the composite keys. - /// - public IRefComparer RecordComparer { get; } + /// + /// Gets the comparer used to compare the record components of the composite keys. + /// + public IRefComparer RecordComparer { get; } - /// - /// Gets the comparer used to compare the token components of the composite keys. - /// - public IRefComparer TokenComparer { get; } + /// + /// Gets the comparer used to compare the token components of the composite keys. + /// + public IRefComparer TokenComparer { get; } - /// - /// Initializes a new instance of the class - /// with the specified comparers for the record and token components. - /// - /// The comparer to use for comparing the record components. - /// The comparer to use for comparing the token components. - public CompositeKeyOfTokenRecordPreviousComparer( - IRefComparer recordComparer, - IRefComparer tokenComparer) - { - RecordComparer = recordComparer; - TokenComparer = tokenComparer; - } + /// + /// Initializes a new instance of the class + /// with the specified comparers for the record and token components. + /// + /// The comparer to use for comparing the record components. + /// The comparer to use for comparing the token components. + public CompositeKeyOfTokenRecordPreviousComparer( + IRefComparer recordComparer, + IRefComparer tokenComparer) + { + RecordComparer = recordComparer; + TokenComparer = tokenComparer; + } - /// - /// Compares two instances. - /// The comparison is performed first on the token components, then on the record components if the tokens are equal, - /// and finally on the previous token components if both the tokens and records are equal. - /// - /// The first composite key to compare. - /// The second composite key to compare. - /// - /// An integer indicating the relative order of the two composite keys: - /// - /// Less than zero if is less than . - /// Zero if is equal to . - /// Greater than zero if is greater than . - /// - /// - public int Compare( - in CompositeKeyOfTokenRecordPrevious x, - in CompositeKeyOfTokenRecordPrevious y) - { - var tokenComparer = TokenComparer; - var hx = x.Token; - var hy = y.Token; - var hc = tokenComparer.Compare(hx, hy); - if (hc != 0) return hc; - var rc = RecordComparer.Compare(x.Record, y.Record); - if (rc != 0) return rc; - var px = x.PreviousToken; - var py = y.PreviousToken; - return tokenComparer.Compare(px, py); - } -} \ No newline at end of file + /// + /// Compares two instances. + /// The comparison is performed first on the token components, then on the record components if the tokens are equal, + /// and finally on the previous token components if both the tokens and records are equal. + /// + /// The first composite key to compare. + /// The second composite key to compare. + /// + /// An integer indicating the relative order of the two composite keys: + /// + /// Less than zero if is less than . + /// Zero if is equal to . + /// Greater than zero if is greater than . + /// + /// + public int Compare( + in CompositeKeyOfTokenRecordPrevious x, + in CompositeKeyOfTokenRecordPrevious y) + { + var tokenComparer = TokenComparer; + var hx = x.Token; + var hy = y.Token; + var hc = tokenComparer.Compare(hx, hy); + if (hc != 0) return hc; + var rc = RecordComparer.Compare(x.Record, y.Record); + if (rc != 0) return rc; + var px = x.PreviousToken; + var py = y.PreviousToken; + return tokenComparer.Compare(px, py); + } +} diff --git a/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs b/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs index 32fa507..198aa85 100644 --- a/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs +++ b/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs @@ -10,34 +10,54 @@ namespace ZoneTree.FullTextSearch.Model; [StructLayout(LayoutKind.Explicit, CharSet = CharSet.Unicode, Pack = 1, Size = 8)] public struct NGramToken4 { - /// - /// The raw data representing the 4-character n-gram as a 64-bit unsigned integer. - /// This field overlaps with the individual character fields. - /// - [FieldOffset(0)] - public ulong data; - - /// - /// The first character of the 4-character n-gram. - /// - [FieldOffset(0)] - public char c0; - - /// - /// The second character of the 4-character n-gram. - /// - [FieldOffset(2)] - public char c1; - - /// - /// The third character of the 4-character n-gram. - /// - [FieldOffset(4)] - public char c3; - - /// - /// The fourth character of the 4-character n-gram. - /// - [FieldOffset(6)] - public char c4; + /// + /// The raw data representing the 4-character n-gram as a 64-bit unsigned integer. + /// This field overlaps with the individual character fields. + /// + [FieldOffset(0)] + public ulong data; + + /// + /// The first character of the 4-character n-gram. + /// + [FieldOffset(0)] + public char c0; + + /// + /// The second character of the 4-character n-gram. + /// + [FieldOffset(2)] + public char c1; + + /// + /// The third character of the 4-character n-gram. + /// + [FieldOffset(4)] + public char c3; + + /// + /// The fourth character of the 4-character n-gram. + /// + [FieldOffset(6)] + public char c4; + + public override bool Equals(object obj) + { + throw new NotImplementedException(); + } + + public override int GetHashCode() + { + throw new NotImplementedException(); + } + + public static bool operator ==(NGramToken4 left, NGramToken4 right) + { + return left.Equals(right); + } + + public static bool operator !=(NGramToken4 left, NGramToken4 right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/Model/TokenPair.cs b/src/ZoneTree.FullTextSearch/Model/TokenPair.cs index 768f624..3f186fb 100644 --- a/src/ZoneTree.FullTextSearch/Model/TokenPair.cs +++ b/src/ZoneTree.FullTextSearch/Model/TokenPair.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; namespace ZoneTree.FullTextSearch; @@ -9,39 +9,39 @@ namespace ZoneTree.FullTextSearch; [StructLayout(LayoutKind.Sequential)] public struct TokenPair : IEquatable> where TToken : unmanaged { - /// - /// The current token in the pair. - /// - public TToken Token; - - /// - /// The token that precedes the current token in the sequence. - /// - public TToken PreviousToken; - - public override bool Equals(object obj) - { - return obj is TokenPair pair && Equals(pair); - } - - public bool Equals(TokenPair other) - { - return EqualityComparer.Default.Equals(Token, other.Token) && - EqualityComparer.Default.Equals(PreviousToken, other.PreviousToken); - } - - public override int GetHashCode() - { - return HashCode.Combine(Token, PreviousToken); - } - - public static bool operator ==(TokenPair left, TokenPair right) - { - return left.Equals(right); - } - - public static bool operator !=(TokenPair left, TokenPair right) - { - return !(left == right); - } + /// + /// The current token in the pair. + /// + public TToken Token; + + /// + /// The token that precedes the current token in the sequence. + /// + public TToken PreviousToken; + + public override bool Equals(object obj) + { + return obj is TokenPair pair && Equals(pair); + } + + public bool Equals(TokenPair other) + { + return EqualityComparer.Default.Equals(Token, other.Token) && + EqualityComparer.Default.Equals(PreviousToken, other.PreviousToken); + } + + public override int GetHashCode() + { + return HashCode.Combine(Token, PreviousToken); + } + + public static bool operator ==(TokenPair left, TokenPair right) + { + return left.Equals(right); + } + + public static bool operator !=(TokenPair left, TokenPair right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/Normalizers/DiacriticNormalizer.cs b/src/ZoneTree.FullTextSearch/Normalizers/DiacriticNormalizer.cs index 3881166..a58b438 100644 --- a/src/ZoneTree.FullTextSearch/Normalizers/DiacriticNormalizer.cs +++ b/src/ZoneTree.FullTextSearch/Normalizers/DiacriticNormalizer.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Text; using System.Globalization; @@ -11,13 +11,13 @@ namespace ZoneTree.FullTextSearch.Normalizers; /// public sealed class DiacriticNormalizer : ICharNormalizer, IStringNormalizer { - /// - /// The default mapping of characters with diacritics to their base characters. - /// This dictionary includes common accented characters and their corresponding base characters. - /// - public static readonly IReadOnlyDictionary DefaultCharMap = - new Dictionary - { + /// + /// The default mapping of characters with diacritics to their base characters. + /// This dictionary includes common accented characters and their corresponding base characters. + /// + public static readonly IReadOnlyDictionary DefaultCharMap = + new Dictionary + { // Uppercase mappings {'Â', 'A'}, {'À', 'A'}, {'Á', 'A'}, {'Ä', 'A'}, {'Ã', 'A'}, {'Å', 'A'}, {'Ç', 'C'}, @@ -40,84 +40,84 @@ public sealed class DiacriticNormalizer : ICharNormalizer, IStringNormalizer {'ú', 'u'}, {'ù', 'u'}, {'û', 'u'}, {'ü', 'u'}, {'ý', 'y'}, {'ÿ', 'y'}, {'ş', 's'} - }; + }; - /// - /// The character map used for normalization, mapping characters with diacritics to their base characters. - /// This can be customized through the constructor. - /// - readonly IReadOnlyDictionary CharMap; + /// + /// The character map used for normalization, mapping characters with diacritics to their base characters. + /// This can be customized through the constructor. + /// + readonly IReadOnlyDictionary CharMap; - /// - /// A set of characters that should be excluded from normalization. - /// If a character is in this set, it will be returned as-is, without normalization. - /// - readonly IReadOnlySet ExcludeSet; + /// + /// A set of characters that should be excluded from normalization. + /// If a character is in this set, it will be returned as-is, without normalization. + /// + readonly IReadOnlySet ExcludeSet; - /// - /// Initializes a new instance of the DiacriticNormalizer class. - /// Allows for customization of the character map and exclusion set. - /// - /// A custom character map for normalization. If null, the default map is used. - /// A set of characters to exclude from normalization. If null, an empty set is used. - public DiacriticNormalizer( - IReadOnlyDictionary charMap = null, - IReadOnlySet exclude = null) - { - CharMap = charMap ?? DefaultCharMap; - ExcludeSet = exclude ?? new HashSet(); - } + /// + /// Initializes a new instance of the DiacriticNormalizer class. + /// Allows for customization of the character map and exclusion set. + /// + /// A custom character map for normalization. If null, the default map is used. + /// A set of characters to exclude from normalization. If null, an empty set is used. + public DiacriticNormalizer( + IReadOnlyDictionary charMap = null, + IReadOnlySet exclude = null) + { + CharMap = charMap ?? DefaultCharMap; + ExcludeSet = exclude ?? new HashSet(); + } - /// - /// Normalizes a single character by removing diacritical marks and converting it to its base form. - /// If the character is in the exclusion set, it is returned as-is. - /// - /// The character to normalize. - /// The normalized character, or the original character if it is in the exclusion set or cannot be normalized. - public char Normalize(char input) - { - if (ExcludeSet.Contains(input)) - return input; + /// + /// Normalizes a single character by removing diacritical marks and converting it to its base form. + /// If the character is in the exclusion set, it is returned as-is. + /// + /// The character to normalize. + /// The normalized character, or the original character if it is in the exclusion set or cannot be normalized. + public char Normalize(char input) + { + if (ExcludeSet.Contains(input)) + return input; - if (CharMap.TryGetValue(input, out char baseChar)) - return baseChar; + if (CharMap.TryGetValue(input, out char baseChar)) + return baseChar; - var normalized = input.ToString().Normalize(NormalizationForm.FormD); - var len = normalized.Length; - for (var i = 0; i < len; i++) - { - char c = normalized[i]; - var uc = CharUnicodeInfo.GetUnicodeCategory(c); - if (uc != UnicodeCategory.NonSpacingMark) - return c; - } - return input; + var normalized = input.ToString().Normalize(NormalizationForm.FormD); + var len = normalized.Length; + for (var i = 0; i < len; i++) + { + char c = normalized[i]; + var uc = CharUnicodeInfo.GetUnicodeCategory(c); + if (uc != UnicodeCategory.NonSpacingMark) + return c; } + return input; + } - /// - /// Normalizes a string by removing diacritical marks from each character and converting them to their base forms. - /// Characters in the exclusion set are returned as-is. - /// - /// The string to normalize, provided as a ReadOnlySpan of characters. - /// The normalized string. - public string Normalize(ReadOnlySpan input) + /// + /// Normalizes a string by removing diacritical marks from each character and converting them to their base forms. + /// Characters in the exclusion set are returned as-is. + /// + /// The string to normalize, provided as a ReadOnlySpan of characters. + /// The normalized string. + public string Normalize(ReadOnlySpan input) + { + var len = input.Length; + for (int i = 0; i < len; i++) { - var len = input.Length; - for (int i = 0; i < len; i++) + char currentChar = input[i]; + if (!ExcludeSet.Contains(currentChar) && + CharMap.ContainsKey(currentChar)) + { + var sb = new StringBuilder(len); + sb.Append(input.Slice(0, i)); + for (int j = i; j < len; j++) { - char currentChar = input[i]; - if (!ExcludeSet.Contains(currentChar) && - CharMap.ContainsKey(currentChar)) - { - var sb = new StringBuilder(len); - sb.Append(input.Slice(0, i)); - for (int j = i; j < len; j++) - { - sb.Append(Normalize(input[j])); - } - return sb.ToString(); - } + sb.Append(Normalize(input[j])); } - return input.ToString(); + return sb.ToString(); + } } + return input.ToString(); + } } diff --git a/src/ZoneTree.FullTextSearch/Normalizers/ICharNormalizer.cs b/src/ZoneTree.FullTextSearch/Normalizers/ICharNormalizer.cs index 4e8ec77..1ec75ba 100644 --- a/src/ZoneTree.FullTextSearch/Normalizers/ICharNormalizer.cs +++ b/src/ZoneTree.FullTextSearch/Normalizers/ICharNormalizer.cs @@ -1,14 +1,14 @@ -namespace ZoneTree.FullTextSearch.Normalizers; +namespace ZoneTree.FullTextSearch.Normalizers; /// /// Defines a contract for normalizing individual characters. /// public interface ICharNormalizer { - /// - /// Normalizes a single character by removing or modifying diacritical marks. - /// - /// The character to normalize. - /// The normalized character. - char Normalize(char input); + /// + /// Normalizes a single character by removing or modifying diacritical marks. + /// + /// The character to normalize. + /// The normalized character. + char Normalize(char input); } diff --git a/src/ZoneTree.FullTextSearch/Normalizers/IStringNormalizer.cs b/src/ZoneTree.FullTextSearch/Normalizers/IStringNormalizer.cs index 64c6748..80d19d7 100644 --- a/src/ZoneTree.FullTextSearch/Normalizers/IStringNormalizer.cs +++ b/src/ZoneTree.FullTextSearch/Normalizers/IStringNormalizer.cs @@ -1,14 +1,14 @@ -namespace ZoneTree.FullTextSearch.Normalizers; +namespace ZoneTree.FullTextSearch.Normalizers; /// /// Defines a contract for normalizing strings or spans of characters. /// public interface IStringNormalizer { - /// - /// Normalizes a span of characters, returning a string with normalized characters. - /// - /// The span of characters to normalize. - /// A normalized string. - string Normalize(ReadOnlySpan input); -} \ No newline at end of file + /// + /// Normalizes a span of characters, returning a string with normalized characters. + /// + /// The span of characters to normalize. + /// A normalized string. + string Normalize(ReadOnlySpan input); +} diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/Parser.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/Parser.cs index c433df0..e824fff 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/Parser.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/Parser.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -12,425 +12,425 @@ namespace ZoneTree.FullTextSearch.QueryLanguage; /// public sealed class Parser { - /// - /// The tokens to parse. - /// - readonly Token[] Tokens; - - /// - /// The current position in the token list. - /// - int Position; - - /// - /// Initializes a new instance of the class with a sequence of tokens. - /// - /// The sequence of tokens to parse. - public Parser(IEnumerable tokens) + /// + /// The tokens to parse. + /// + readonly Token[] Tokens; + + /// + /// The current position in the token list. + /// + int Position; + + /// + /// Initializes a new instance of the class with a sequence of tokens. + /// + /// The sequence of tokens to parse. + public Parser(IEnumerable tokens) + { + Tokens = tokens.ToArray(); + Position = 0; + } + + /// + /// Initializes a new instance of the class with a query string. + /// + /// The query string to tokenize and parse. + public Parser(string query) + { + var tokenizer = new Tokenizer(query); + Tokens = tokenizer.Tokenize().ToArray(); + } + + /// + /// Parses the tokens into a object. + /// + /// A representing the parsed query. + public SearchQuery Parse() + { + if (IsAtEnd()) + return new SearchQuery(new QueryNode(QueryNodeType.And)); + var rootNode = ParseExpression(); + return new SearchQuery(rootNode); + } + + /// + /// Parses an expression starting from the current token. + /// + /// The minimum precedence level for the expression. + /// A representing the parsed expression. + QueryNode ParseExpression(int precedence = 0) + { + var left = ParseTerm(); + + while (true) { - Tokens = tokens.ToArray(); - Position = 0; - } - - /// - /// Initializes a new instance of the class with a query string. - /// - /// The query string to tokenize and parse. - public Parser(string query) - { - var tokenizer = new Tokenizer(query); - Tokens = tokenizer.Tokenize().ToArray(); - } - - /// - /// Parses the tokens into a object. - /// - /// A representing the parsed query. - public SearchQuery Parse() - { - if (IsAtEnd()) - return new SearchQuery(new QueryNode(QueryNodeType.And)); - var rootNode = ParseExpression(); - return new SearchQuery(rootNode); - } - - /// - /// Parses an expression starting from the current token. - /// - /// The minimum precedence level for the expression. - /// A representing the parsed expression. - QueryNode ParseExpression(int precedence = 0) - { - var left = ParseTerm(); - - while (true) - { - if (IsAtEnd()) break; - - var operatorToken = Peek(); - var opType = operatorToken.Type; - - if (!operatorToken.IsOperator) - { - if (opType == TokenType.CloseParenthesis) - break; - - var right2 = ParseExpression(); - left = new QueryNode(QueryNodeType.And, children: new[] { left, right2 }); - break; - } - - int currentPrecedence = GetPrecedence(opType); - - if (currentPrecedence < precedence) break; + if (IsAtEnd()) break; - Advance(); + var operatorToken = Peek(); + var opType = operatorToken.Type; - if (IsAtEnd()) break; // tolerate operator in the end. + if (!operatorToken.IsOperator) + { + if (opType == TokenType.CloseParenthesis) + break; - var right = ParseExpression(currentPrecedence + 1); + var right2 = ParseExpression(); + left = new QueryNode(QueryNodeType.And, children: new[] { left, right2 }); + break; + } - if (opType == TokenType.Not) - { - right = new QueryNode(QueryNodeType.Not, children: new[] { right }); - left = new QueryNode(QueryNodeType.And, children: new[] { left, right }); - continue; - } - - var nodeType = opType == TokenType.And ? QueryNodeType.And : QueryNodeType.Or; - left = new QueryNode(nodeType, children: new[] { left, right }); - } - - return left; - } - - /// - /// Gets the precedence level for a given operator token type. - /// - /// The type of the operator token. - /// The precedence level, where a higher number indicates higher precedence. - int GetPrecedence(TokenType tokenType) - { - // Define precedence levels: higher number means higher precedence - return tokenType switch - { - TokenType.And => 2, - TokenType.Or => 1, - _ => 0, - }; - } - - /// - /// Parses a term, which may be an IN expression, a NOT expression, or a simple factor. - /// - /// A representing the parsed term. - QueryNode ParseTerm() - { - if (Check(TokenType.In)) - { - return ParseInExpression(); - } - if (Match(TokenType.Not)) - { - if (Check(TokenType.In)) - { - return ParseNotInExpression(); - } - var factor = ParseFactor(); - return new QueryNode(QueryNodeType.Not, children: [factor]); - } - return ParseFactor(); - } - - /// - /// Parses a factor, which may be a simple keyword, a phrase, a facet expression, or a nested expression. - /// - /// A representing the parsed factor. - QueryNode ParseFactor() - { - if (Match(TokenType.OpenParenthesis)) - { - if (IsAtEnd()) throw new UnexpectedTokenException("OpenParenthesis found at the end of the query."); - var expression = ParseExpression(); - if (IsAtEnd()) return expression; // tolerate not properly closed parenthesis. - Consume(TokenType.CloseParenthesis, "Expect ')' after expression."); - return expression; - } - else if (IsFacetExpression()) - { - return ParseFacetExpression(); - } - else if (IsFacetInExpression()) - { - return ParseFacetInExpression(); - } - else if (IsFacetNotInExpression()) - { - return ParseFacetNotInExpression(); - } - else if (Match(TokenType.Word, TokenType.Phrase)) - { - var list = new List(); - list.Add(Previous().Value); - while (!IsFacetExpression() && Match(TokenType.Word, TokenType.Phrase)) - { - list.Add(Previous().Value); - } - return new QueryNode(QueryNodeType.And, tokens: list.ToArray(), respectTokenOrder: false); - } - else if (Match(TokenType.KeywordListOpen)) - { - return ParseKeywordListExpression(); - } - else if (Match(TokenType.Comma)) - { - // skip comma. - return ParseFactor(); - } - throw new UnexpectedTokenException($"Unexpected token: {Peek().Type}"); - } - - /// - /// Checks if the current token is the start of a facet expression. - /// - /// true if the current token starts a facet expression; otherwise, false. - bool IsFacetExpression() - { - return (Check(TokenType.Phrase) || Check(TokenType.Word)) && LookAhead(TokenType.Colon); - } + int currentPrecedence = GetPrecedence(opType); - /// - /// Checks if the current token is the start of a facet IN expression. - /// - /// true if the current token starts a facet IN expression; otherwise, false. - bool IsFacetInExpression() - { - return (Check(TokenType.Phrase) || Check(TokenType.Word)) && LookAhead(TokenType.In); - } + if (currentPrecedence < precedence) break; - /// - /// Checks if the current token is the start of a facet NOT IN expression. - /// - /// true if the current token starts a facet NOT IN expression; otherwise, false. - bool IsFacetNotInExpression() - { - return (Check(TokenType.Phrase) || Check(TokenType.Word)) && - LookAhead(TokenType.Not) && - LookAhead2(TokenType.In); - } + Advance(); - /// - /// Determines whether the current token is a phrase or a word. - /// - /// The type of the token (Phrase or Word). - TokenType PhraseOrWord() - { - return Check(TokenType.Phrase) ? TokenType.Phrase : TokenType.Word; - } + if (IsAtEnd()) break; // tolerate operator in the end. - /// - /// Parses an IN expression. - /// - /// A representing the parsed IN expression. - QueryNode ParseInExpression() - { - Consume(TokenType.In, "Expect 'IN'."); - Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); - return ParseKeywordListExpression(); - } + var right = ParseExpression(currentPrecedence + 1); - /// - /// Parses a NOT IN expression. - /// - /// A representing the parsed NOT IN expression. - QueryNode ParseNotInExpression() - { - Consume(TokenType.In, "Expect 'IN' after 'NOT'."); - Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); - var result = ParseKeywordListExpression(); - result.NodeType = QueryNodeType.Not; - result.RespectTokenOrder = false; - return result; - } + if (opType == TokenType.Not) + { + right = new QueryNode(QueryNodeType.Not, children: new[] { right }); + left = new QueryNode(QueryNodeType.And, children: new[] { left, right }); + continue; + } - /// - /// Parses a facet expression. - /// - /// A representing the parsed facet expression. - QueryNode ParseFacetExpression() - { - var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; - Consume(TokenType.Colon, "Expect ':' after facet name."); - var value = Consume(PhraseOrWord(), "Expect a facet value.").Value; - var facet = $"{name}:{value}"; - return new QueryNode(QueryNodeType.And, tokens: [facet], isFacetNode: true); + var nodeType = opType == TokenType.And ? QueryNodeType.And : QueryNodeType.Or; + left = new QueryNode(nodeType, children: new[] { left, right }); } - /// - /// Parses a facet IN expression. - /// - /// A representing the parsed facet IN expression. - QueryNode ParseFacetInExpression() + return left; + } + + /// + /// Gets the precedence level for a given operator token type. + /// + /// The type of the operator token. + /// The precedence level, where a higher number indicates higher precedence. + int GetPrecedence(TokenType tokenType) + { + // Define precedence levels: higher number means higher precedence + return tokenType switch { - var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; - Consume(TokenType.In, "Expect 'IN' after facet name."); - Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); - return ParseFacetValuesListExpresison(name); - } - - /// - /// Parses a facet NOT IN expression. - /// - /// A representing the parsed facet NOT IN expression. - QueryNode ParseFacetNotInExpression() + TokenType.And => 2, + TokenType.Or => 1, + _ => 0, + }; + } + + /// + /// Parses a term, which may be an IN expression, a NOT expression, or a simple factor. + /// + /// A representing the parsed term. + QueryNode ParseTerm() + { + if (Check(TokenType.In)) { - var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; - Consume(TokenType.Not, "Expect 'NOT' after facet name."); - Consume(TokenType.In, "Expect 'IN' after facet name."); - Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); - var result = ParseFacetValuesListExpresison(name); - result.NodeType = QueryNodeType.Not; - result.RespectTokenOrder = false; - return result; + return ParseInExpression(); } - - /// - /// Parses a list of facet values in an IN or NOT IN expression. - /// - /// The name of the facet. - /// A representing the parsed facet values list. - QueryNode ParseFacetValuesListExpresison(string facetName) + if (Match(TokenType.Not)) { - var facetValues = new List(); - do - { - var keyword = Consume(PhraseOrWord(), "Expect a word or phrase in the list.").Value; - facetValues.Add($"{facetName}:{keyword}"); - } while (Match(TokenType.Comma)); - - if (!IsAtEnd()) // Tolerate not properly closed keyword list. - Consume(TokenType.KeywordListClose, "Expect ']' after the keyword list."); - - return new QueryNode(QueryNodeType.Or, tokens: facetValues.ToArray(), isFacetNode: true); + if (Check(TokenType.In)) + { + return ParseNotInExpression(); + } + var factor = ParseFactor(); + return new QueryNode(QueryNodeType.Not, children: [factor]); } - - /// - /// Parses a list of keywords. - /// - /// A representing the parsed keyword list. - QueryNode ParseKeywordListExpression() + return ParseFactor(); + } + + /// + /// Parses a factor, which may be a simple keyword, a phrase, a facet expression, or a nested expression. + /// + /// A representing the parsed factor. + QueryNode ParseFactor() + { + if (Match(TokenType.OpenParenthesis)) { - var keywords = new List(); - do - { - var keyword = Consume(PhraseOrWord(), "Expect a word or phrase in the list.").Value; - keywords.Add(keyword); - } while (Match(TokenType.Comma)); - - if (!IsAtEnd()) // Tolerate not properly closed keyword list. - Consume(TokenType.KeywordListClose, "Expect ']' after the keyword list."); - - return new QueryNode(QueryNodeType.Or, tokens: keywords.ToArray()); + if (IsAtEnd()) throw new UnexpectedTokenException("OpenParenthesis found at the end of the query."); + var expression = ParseExpression(); + if (IsAtEnd()) return expression; // tolerate not properly closed parenthesis. + Consume(TokenType.CloseParenthesis, "Expect ')' after expression."); + return expression; } - - /// - /// Consumes the current token if it matches the expected type, otherwise throws an exception. - /// - /// The expected token type. - /// The error message to include if the token does not match. - /// The consumed token. - /// Thrown if the current token does not match the expected type. - Token Consume(TokenType type, string errorMessage) + else if (IsFacetExpression()) { - if (Check(type)) return Advance(); - throw new UnexpectedTokenException(errorMessage); + return ParseFacetExpression(); } - - /// - /// Advances the parser if the current token matches any of the given types. - /// - /// The token types to match. - /// true if a matching token was found; otherwise, false. - bool Match(params TokenType[] types) + else if (IsFacetInExpression()) { - foreach (var type in types) - { - if (Check(type)) - { - Advance(); - return true; - } - } - return false; + return ParseFacetInExpression(); } - - /// - /// Checks if the current token matches the specified type. - /// - /// The type of token to check for. - /// true if the current token matches the specified type; otherwise, false. - bool Check(TokenType type) + else if (IsFacetNotInExpression()) { - if (IsAtEnd()) return false; - return Peek().Type == type; + return ParseFacetNotInExpression(); } - - /// - /// Looks ahead to check if the next token matches the specified type. - /// - /// The type of token to check for. - /// true if the next token matches the specified type; otherwise, false. - bool LookAhead(TokenType type) + else if (Match(TokenType.Word, TokenType.Phrase)) { - if (Position + 1 >= Tokens.Length) return false; - return Tokens[Position + 1].Type == type; + var list = new List(); + list.Add(Previous().Value); + while (!IsFacetExpression() && Match(TokenType.Word, TokenType.Phrase)) + { + list.Add(Previous().Value); + } + return new QueryNode(QueryNodeType.And, tokens: list.ToArray(), respectTokenOrder: false); } - - /// - /// Looks ahead two tokens to check if the second next token matches the specified type. - /// - /// The type of token to check for. - /// true if the second next token matches the specified type; otherwise, false. - bool LookAhead2(TokenType type) + else if (Match(TokenType.KeywordListOpen)) { - if (Position + 2 >= Tokens.Length) return false; - return Tokens[Position + 2].Type == type; + return ParseKeywordListExpression(); } - - /// - /// Advances to the next token. - /// - /// The token that was just consumed. - Token Advance() + else if (Match(TokenType.Comma)) { - if (!IsAtEnd()) Position++; - return Previous(); + // skip comma. + return ParseFactor(); } - - /// - /// Checks if the parser has reached the end of the token list. - /// - /// true if there are no more tokens to parse; otherwise, false. - bool IsAtEnd() + throw new UnexpectedTokenException($"Unexpected token: {Peek().Type}"); + } + + /// + /// Checks if the current token is the start of a facet expression. + /// + /// true if the current token starts a facet expression; otherwise, false. + bool IsFacetExpression() + { + return (Check(TokenType.Phrase) || Check(TokenType.Word)) && LookAhead(TokenType.Colon); + } + + /// + /// Checks if the current token is the start of a facet IN expression. + /// + /// true if the current token starts a facet IN expression; otherwise, false. + bool IsFacetInExpression() + { + return (Check(TokenType.Phrase) || Check(TokenType.Word)) && LookAhead(TokenType.In); + } + + /// + /// Checks if the current token is the start of a facet NOT IN expression. + /// + /// true if the current token starts a facet NOT IN expression; otherwise, false. + bool IsFacetNotInExpression() + { + return (Check(TokenType.Phrase) || Check(TokenType.Word)) && + LookAhead(TokenType.Not) && + LookAhead2(TokenType.In); + } + + /// + /// Determines whether the current token is a phrase or a word. + /// + /// The type of the token (Phrase or Word). + TokenType PhraseOrWord() + { + return Check(TokenType.Phrase) ? TokenType.Phrase : TokenType.Word; + } + + /// + /// Parses an IN expression. + /// + /// A representing the parsed IN expression. + QueryNode ParseInExpression() + { + Consume(TokenType.In, "Expect 'IN'."); + Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); + return ParseKeywordListExpression(); + } + + /// + /// Parses a NOT IN expression. + /// + /// A representing the parsed NOT IN expression. + QueryNode ParseNotInExpression() + { + Consume(TokenType.In, "Expect 'IN' after 'NOT'."); + Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); + var result = ParseKeywordListExpression(); + result.NodeType = QueryNodeType.Not; + result.RespectTokenOrder = false; + return result; + } + + /// + /// Parses a facet expression. + /// + /// A representing the parsed facet expression. + QueryNode ParseFacetExpression() + { + var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; + Consume(TokenType.Colon, "Expect ':' after facet name."); + var value = Consume(PhraseOrWord(), "Expect a facet value.").Value; + var facet = $"{name}:{value}"; + return new QueryNode(QueryNodeType.And, tokens: [facet], isFacetNode: true); + } + + /// + /// Parses a facet IN expression. + /// + /// A representing the parsed facet IN expression. + QueryNode ParseFacetInExpression() + { + var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; + Consume(TokenType.In, "Expect 'IN' after facet name."); + Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); + return ParseFacetValuesListExpresison(name); + } + + /// + /// Parses a facet NOT IN expression. + /// + /// A representing the parsed facet NOT IN expression. + QueryNode ParseFacetNotInExpression() + { + var name = Consume(PhraseOrWord(), "Expect a facet name.").Value; + Consume(TokenType.Not, "Expect 'NOT' after facet name."); + Consume(TokenType.In, "Expect 'IN' after facet name."); + Consume(TokenType.KeywordListOpen, "Expect '[' after IN operator."); + var result = ParseFacetValuesListExpresison(name); + result.NodeType = QueryNodeType.Not; + result.RespectTokenOrder = false; + return result; + } + + /// + /// Parses a list of facet values in an IN or NOT IN expression. + /// + /// The name of the facet. + /// A representing the parsed facet values list. + QueryNode ParseFacetValuesListExpresison(string facetName) + { + var facetValues = new List(); + do { - return Position >= Tokens.Length; - } - - /// - /// Peeks at the current token without advancing the parser. - /// - /// The current token. - Token Peek() + var keyword = Consume(PhraseOrWord(), "Expect a word or phrase in the list.").Value; + facetValues.Add($"{facetName}:{keyword}"); + } while (Match(TokenType.Comma)); + + if (!IsAtEnd()) // Tolerate not properly closed keyword list. + Consume(TokenType.KeywordListClose, "Expect ']' after the keyword list."); + + return new QueryNode(QueryNodeType.Or, tokens: facetValues.ToArray(), isFacetNode: true); + } + + /// + /// Parses a list of keywords. + /// + /// A representing the parsed keyword list. + QueryNode ParseKeywordListExpression() + { + var keywords = new List(); + do { - return Tokens[Position]; - } - - /// - /// Gets the previous token in the token list. - /// - /// The previous token. - Token Previous() + var keyword = Consume(PhraseOrWord(), "Expect a word or phrase in the list.").Value; + keywords.Add(keyword); + } while (Match(TokenType.Comma)); + + if (!IsAtEnd()) // Tolerate not properly closed keyword list. + Consume(TokenType.KeywordListClose, "Expect ']' after the keyword list."); + + return new QueryNode(QueryNodeType.Or, tokens: keywords.ToArray()); + } + + /// + /// Consumes the current token if it matches the expected type, otherwise throws an exception. + /// + /// The expected token type. + /// The error message to include if the token does not match. + /// The consumed token. + /// Thrown if the current token does not match the expected type. + Token Consume(TokenType type, string errorMessage) + { + if (Check(type)) return Advance(); + throw new UnexpectedTokenException(errorMessage); + } + + /// + /// Advances the parser if the current token matches any of the given types. + /// + /// The token types to match. + /// true if a matching token was found; otherwise, false. + bool Match(params TokenType[] types) + { + foreach (var type in types) { - return Tokens[Position - 1]; + if (Check(type)) + { + Advance(); + return true; + } } + return false; + } + + /// + /// Checks if the current token matches the specified type. + /// + /// The type of token to check for. + /// true if the current token matches the specified type; otherwise, false. + bool Check(TokenType type) + { + if (IsAtEnd()) return false; + return Peek().Type == type; + } + + /// + /// Looks ahead to check if the next token matches the specified type. + /// + /// The type of token to check for. + /// true if the next token matches the specified type; otherwise, false. + bool LookAhead(TokenType type) + { + if (Position + 1 >= Tokens.Length) return false; + return Tokens[Position + 1].Type == type; + } + + /// + /// Looks ahead two tokens to check if the second next token matches the specified type. + /// + /// The type of token to check for. + /// true if the second next token matches the specified type; otherwise, false. + bool LookAhead2(TokenType type) + { + if (Position + 2 >= Tokens.Length) return false; + return Tokens[Position + 2].Type == type; + } + + /// + /// Advances to the next token. + /// + /// The token that was just consumed. + Token Advance() + { + if (!IsAtEnd()) Position++; + return Previous(); + } + + /// + /// Checks if the parser has reached the end of the token list. + /// + /// true if there are no more tokens to parse; otherwise, false. + bool IsAtEnd() + { + return Position >= Tokens.Length; + } + + /// + /// Peeks at the current token without advancing the parser. + /// + /// The current token. + Token Peek() + { + return Tokens[Position]; + } + + /// + /// Gets the previous token in the token list. + /// + /// The previous token. + Token Previous() + { + return Tokens[Position - 1]; + } } diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/Token.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/Token.cs index 86329aa..0108c8f 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/Token.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/Token.cs @@ -1,45 +1,45 @@ -namespace ZoneTree.FullTextSearch.QueryLanguage; +namespace ZoneTree.FullTextSearch.QueryLanguage; /// /// Represents a token in the query language. /// public sealed class Token { - /// - /// Gets the type of the token. - /// - public TokenType Type { get; } + /// + /// Gets the type of the token. + /// + public TokenType Type { get; } - /// - /// Gets the value of the token. - /// + /// Gets the value of the token. + /// - /// Determines whether the token is an operator (AND, OR, NOT). - /// - public bool IsOperator => - Type == TokenType.And || - Type == TokenType.Or || - Type == TokenType.Not; + /// + /// Determines whether the token is an operator (AND, OR, NOT). + /// + public bool IsOperator => + Type == TokenType.And || + Type == TokenType.Or || + Type == TokenType.Not; - /// - /// Initializes a new instance of the class with the specified type and value. - /// - /// The type of the token. - /// The value of the token. - public Token(TokenType type, string value) - { - Type = type; - Value = value; - } + /// + /// Initializes a new instance of the class with the specified type and value. + /// + /// The type of the token. + /// The value of the token. + public Token(TokenType type, string value) + { + Type = type; + Value = value; + } - /// - /// Returns a string that represents the current token. - /// - /// A string in the format "Type: Value". - public override string ToString() - { - return $"{Type}: {Value}"; - } + /// + /// Returns a string that represents the current token. + /// + /// A string in the format "Type: Value". + public override string ToString() + { + return $"{Type}: {Value}"; + } } diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/TokenType.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/TokenType.cs index d71e429..6bee555 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/TokenType.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/TokenType.cs @@ -1,20 +1,20 @@ -namespace ZoneTree.FullTextSearch.QueryLanguage; +namespace ZoneTree.FullTextSearch.QueryLanguage; /// /// Defines the ypes of tokens that can be recognized by the tokenizer. /// public enum TokenType { - Word, - Phrase, - And, - Or, - Not, - In, - OpenParenthesis, - CloseParenthesis, - Comma, - Colon, - KeywordListOpen, - KeywordListClose + Word, + Phrase, + And, + Or, + Not, + In, + OpenParenthesis, + CloseParenthesis, + Comma, + Colon, + KeywordListOpen, + KeywordListClose } diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/Tokenizer.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/Tokenizer.cs index 1ed2de1..d9a30b6 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/Tokenizer.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/Tokenizer.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -11,22 +11,22 @@ namespace ZoneTree.FullTextSearch.QueryLanguage; /// public sealed class Tokenizer { - /// - /// The input string to tokenize. - /// - readonly string Input; - - /// - /// The current position in the input string. - /// - int Position; - - /// - /// A dictionary of recognized operators in the query language. - /// - static readonly Dictionary Operators = - new(StringComparer.OrdinalIgnoreCase) - { + /// + /// The input string to tokenize. + /// + readonly string Input; + + /// + /// The current position in the input string. + /// + int Position; + + /// + /// A dictionary of recognized operators in the query language. + /// + static readonly Dictionary Operators = + new(StringComparer.OrdinalIgnoreCase) + { { "AND", new Token(TokenType.And, "AND") }, { "OR", new Token(TokenType.Or, "OR") }, { "NOT", new Token(TokenType.Not, "NOT") }, @@ -35,174 +35,174 @@ public sealed class Tokenizer { "&", new Token(TokenType.And, "&") }, { "|", new Token(TokenType.Or, "|") }, { "-", new Token(TokenType.Not, "-") }, - }; - - /// - /// Initializes a new instance of the class with the specified input string. - /// - /// The input string to tokenize. - public Tokenizer(string input) - { - Input = input; - Position = 0; - } - - /// - /// Tokenizes the input string into a sequence of tokens. - /// - /// An enumerable sequence of tokens. - public IEnumerable Tokenize() + }; + + /// + /// Initializes a new instance of the class with the specified input string. + /// + /// The input string to tokenize. + public Tokenizer(string input) + { + Input = input; + Position = 0; + } + + /// + /// Tokenizes the input string into a sequence of tokens. + /// + /// An enumerable sequence of tokens. + public IEnumerable Tokenize() + { + while (Position < Input.Length) { - while (Position < Input.Length) - { - SkipWhitespace(); - - if (Position >= Input.Length) - yield break; - - var current = Input[Position]; - - if (current == '"' || current == '\'') - { - yield return TokenizePhrase(current); - } - else if (current == ':') - { - yield return new Token(TokenType.Colon, ":"); - Position++; - } - else if (current == ',') - { - yield return new Token(TokenType.Comma, ","); - Position++; - } - else if (current == '[') - { - yield return new Token(TokenType.KeywordListOpen, "["); - Position++; - } - else if (current == ']') - { - yield return new Token(TokenType.KeywordListClose, "]"); - Position++; - } - else if (current == '(') - { - yield return new Token(TokenType.OpenParenthesis, "("); - Position++; - } - else if (current == ')') - { - yield return new Token(TokenType.CloseParenthesis, ")"); - Position++; - } - else if (current == '&') - { - yield return Operators["&"]; - Position++; - } - else if (current == '|') - { - yield return Operators["|"]; - Position++; - } - else if (current == '-') - { - yield return Operators["-"]; - Position++; - } - else - { - yield return TokenizeWordOrOperator(); - } - } + SkipWhitespace(); + + if (Position >= Input.Length) + yield break; + + var current = Input[Position]; + + if (current == '"' || current == '\'') + { + yield return TokenizePhrase(current); + } + else if (current == ':') + { + yield return new Token(TokenType.Colon, ":"); + Position++; + } + else if (current == ',') + { + yield return new Token(TokenType.Comma, ","); + Position++; + } + else if (current == '[') + { + yield return new Token(TokenType.KeywordListOpen, "["); + Position++; + } + else if (current == ']') + { + yield return new Token(TokenType.KeywordListClose, "]"); + Position++; + } + else if (current == '(') + { + yield return new Token(TokenType.OpenParenthesis, "("); + Position++; + } + else if (current == ')') + { + yield return new Token(TokenType.CloseParenthesis, ")"); + Position++; + } + else if (current == '&') + { + yield return Operators["&"]; + Position++; + } + else if (current == '|') + { + yield return Operators["|"]; + Position++; + } + else if (current == '-') + { + yield return Operators["-"]; + Position++; + } + else + { + yield return TokenizeWordOrOperator(); + } } - - /// - /// Skips over any whitespace characters in the input string. - /// - void SkipWhitespace() + } + + /// + /// Skips over any whitespace characters in the input string. + /// + void SkipWhitespace() + { + while (Position < Input.Length && char.IsWhiteSpace(Input[Position])) { - while (Position < Input.Length && char.IsWhiteSpace(Input[Position])) - { - Position++; - } + Position++; } - - /// - /// Tokenizes a phrase enclosed in quotes. - /// - /// The character used to quote the phrase (' or "). - /// A representing the quoted phrase. - Token TokenizePhrase(char quoteType) + } + + /// + /// Tokenizes a phrase enclosed in quotes. + /// + /// The character used to quote the phrase (' or "). + /// A representing the quoted phrase. + Token TokenizePhrase(char quoteType) + { + var sb = new StringBuilder(); + Position++; // Skip the opening quote + + while (Position < Input.Length) { - var sb = new StringBuilder(); - Position++; // Skip the opening quote - - while (Position < Input.Length) - { - var current = Input[Position]; - - if (current == '\\' && Position + 1 < Input.Length) - { - // Handle escaped character - Position++; - sb.Append(Input[Position]); - } - else if (current == quoteType) - { - // End of phrase - Position++; - return new Token(TokenType.Phrase, sb.ToString()); - } - else - { - sb.Append(current); - } - - Position++; - } - - // Tolerate unterminated phrase. + var current = Input[Position]; + + if (current == '\\' && Position + 1 < Input.Length) + { + // Handle escaped character + Position++; + sb.Append(Input[Position]); + } + else if (current == quoteType) + { + // End of phrase + Position++; return new Token(TokenType.Phrase, sb.ToString()); + } + else + { + sb.Append(current); + } + + Position++; } - /// - /// Tokenizes a word or operator from the input string. - /// - /// A representing the word or operator. - Token TokenizeWordOrOperator() + // Tolerate unterminated phrase. + return new Token(TokenType.Phrase, sb.ToString()); + } + + /// + /// Tokenizes a word or operator from the input string. + /// + /// A representing the word or operator. + Token TokenizeWordOrOperator() + { + var sb = new StringBuilder(); + + while (Position < Input.Length) { - var sb = new StringBuilder(); - - while (Position < Input.Length) - { - var current = Input[Position]; - - if (current == '\\' && Position + 1 < Input.Length) - { - // Handle escaped character - Position++; - sb.Append(Input[Position]); - } - else if (char.IsWhiteSpace(current) || current == ':' || current == ',' || - current == '(' || current == ')' || current == '[' || current == ']' || - current == '&' || current == '|' || current == '-') - { - break; - } - else - { - sb.Append(current); - } - - Position++; - } - - var value = sb.ToString(); - if (Operators.TryGetValue(value, out var token)) - return token; - - return new Token(TokenType.Word, value); + var current = Input[Position]; + + if (current == '\\' && Position + 1 < Input.Length) + { + // Handle escaped character + Position++; + sb.Append(Input[Position]); + } + else if (char.IsWhiteSpace(current) || current == ':' || current == ',' || + current == '(' || current == ')' || current == '[' || current == ']' || + current == '&' || current == '|' || current == '-') + { + break; + } + else + { + sb.Append(current); + } + + Position++; } + + var value = sb.ToString(); + if (Operators.TryGetValue(value, out var token)) + return token; + + return new Token(TokenType.Word, value); + } } diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs index 2a8e989..7155e6e 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs @@ -5,11 +5,19 @@ /// public sealed class UnexpectedTokenException : Exception { - /// - /// Initializes a new instance of the class with a specified error message. - /// - /// The message that describes the error. - public UnexpectedTokenException(string message) : base(message) - { - } + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The message that describes the error. + public UnexpectedTokenException(string message) : base(message) + { + } + + public UnexpectedTokenException() + { + } + + public UnexpectedTokenException() + { + } } diff --git a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs index fa50b11..8108244 100644 --- a/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/AdvancedSearchOnIndexOfTokenRecordPreviousToken.cs @@ -1,4 +1,4 @@ -using System.Threading; +using System.Threading; using ZoneTree; using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Index; @@ -15,339 +15,339 @@ public sealed class AdvancedSearchOnIndexOfTokenRecordPreviousToken - /// Gets the index on which the search operations are performed. - /// - public IndexOfTokenRecordPreviousToken Index { get; } + /// + /// Gets the index on which the search operations are performed. + /// + public IndexOfTokenRecordPreviousToken Index { get; } - /// - /// Represents information about a token, including whether it is a facet. - /// - /// The token associated with this information. - /// Indicates whether the token is a facet. - readonly record struct TokenInfo(TToken Token, bool IsFacet); + /// + /// Represents information about a token, including whether it is a facet. + /// + /// The token associated with this information. + /// Indicates whether the token is a facet. + readonly record struct TokenInfo(TToken Token, bool IsFacet); - /// - /// Initializes a new instance of the class. - /// - /// The index on which the search operations are performed. - public AdvancedSearchOnIndexOfTokenRecordPreviousToken( - IndexOfTokenRecordPreviousToken index) + /// + /// Initializes a new instance of the class. + /// + /// The index on which the search operations are performed. + public AdvancedSearchOnIndexOfTokenRecordPreviousToken( + IndexOfTokenRecordPreviousToken index) + { + Index = index; + } + + /// + /// Finds and returns the list of tokens that need to be iterated over based on the query node's type and structure. + /// + /// The query node to analyze. + /// A read-only list of tokens that need iteration. + IReadOnlyList FindTokensNeedIteration( + QueryNode node) + { + var result = new List(); + if (node.NodeType == QueryNodeType.And) + { + if (node.HasTokens) + { + result.Add(new TokenInfo( + node.FirstLookAt, node.IsFacetNode)); + } + else if (node.HasChildren) + { + var tokens = node.Children + .Select(x => FindTokensNeedIteration(x)) + .Where(x => x.Count > 0) + .MinBy(x => x.Count); + if (tokens != null) + result.AddRange(tokens); + } + } + else if (node.NodeType == QueryNodeType.Or) + { + if (node.HasTokens) + { + result.AddRange(node.Tokens.Select(x => new TokenInfo( + x, node.IsFacetNode))); + } + else if (node.HasChildren) + { + // NOT queries in an OR node ends up with full index scan + if (node.Children.Any(x => x.NodeType == QueryNodeType.Not)) + return result; + var tokensArray = node.Children + .Select(x => FindTokensNeedIteration(x)).ToArray(); + foreach (var tokens in tokensArray) + result.AddRange(tokens); + } + } + return result; + } + + /// + /// Performs a search based on the specified query and returns the matching records. + /// + /// The search query to execute. + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// An array of records that match the search criteria. + public TRecord[] Search(SearchQuery query, CancellationToken cancellationToken = default) + { + Index.ThrowIfIndexIsDropped(); + + if (query.IsEmpty) + return []; + + var recordComparer = Index.RecordComparer; + var tokenComparer = Index.TokenComparer; + using var iterator1 = Index.ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + using var iterator2 = Index.ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + + if (!query.HasAnyPositiveCriteria) { - Index = index; + var result = ProcessEntireIndex(query.Skip, query.Limit); + return result.Count == 0 ? [] : result.ToArray(); } - /// - /// Finds and returns the list of tokens that need to be iterated over based on the query node's type and structure. - /// - /// The query node to analyze. - /// A read-only list of tokens that need iteration. - IReadOnlyList FindTokensNeedIteration( - QueryNode node) + var tokens = FindTokensNeedIteration(query.QueryNode); + if (tokens.Count == 0) + return []; + + var records = ProcessAllTokens(query.Skip, query.Limit); + return records.Count == 0 ? [] : records.ToArray(); + + bool DoesRecordContainAllTokens( + ReadOnlySpan tokens, + TRecord record, + bool respectTokenOrder, + bool isFacetNode) { - var result = new List(); - if (node.NodeType == QueryNodeType.And) + var len = tokens.Length; + if (len == 0) return false; + var previousTokenDoesNotExist = !isFacetNode; + var previousToken = default(TToken); + if (isFacetNode) respectTokenOrder = false; + for (var i = 0; i < len; ++i) + { + var token = tokens[i]; + if (isFacetNode) + previousToken = token; + iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() + { + Token = token, + Record = record, + PreviousToken = previousToken + }); + var hasRecordForCurrentToken = false; + if (iterator2.Next()) { - if (node.HasTokens) - { - result.Add(new TokenInfo( - node.FirstLookAt, node.IsFacetNode)); - } - else if (node.HasChildren) - { - var tokens = node.Children - .Select(x => FindTokensNeedIteration(x)) - .Where(x => x.Count > 0) - .MinBy(x => x.Count); - if (tokens != null) - result.AddRange(tokens); - } + var key = iterator2.CurrentKey; + hasRecordForCurrentToken = + tokenComparer.AreEqual(key.Token, token) && + recordComparer.AreEqual(key.Record, record) && + (previousTokenDoesNotExist || + tokenComparer.AreEqual(key.PreviousToken, previousToken)); } - else if (node.NodeType == QueryNodeType.Or) + if (!hasRecordForCurrentToken) + return false; + if (respectTokenOrder) { - if (node.HasTokens) - { - result.AddRange(node.Tokens.Select(x => new TokenInfo( - x, node.IsFacetNode))); - } - else if (node.HasChildren) - { - // NOT queries in an OR node ends up with full index scan - if (node.Children.Any(x => x.NodeType == QueryNodeType.Not)) - return result; - var tokensArray = node.Children - .Select(x => FindTokensNeedIteration(x)).ToArray(); - foreach (var tokens in tokensArray) - result.AddRange(tokens); - } + previousTokenDoesNotExist = false; + previousToken = token; } - return result; + } + return true; } - /// - /// Performs a search based on the specified query and returns the matching records. - /// - /// The search query to execute. - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// An array of records that match the search criteria. - public TRecord[] Search(SearchQuery query, CancellationToken cancellationToken = default) + bool DoesRecordContainAnyOfTheTokens( + ReadOnlySpan tokens, + TRecord record, + bool isFacet) { - Index.ThrowIfIndexIsDropped(); - - if (query.IsEmpty) - return []; - - var recordComparer = Index.RecordComparer; - var tokenComparer = Index.TokenComparer; - using var iterator1 = Index.ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - using var iterator2 = Index.ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - - if (!query.HasAnyPositiveCriteria) + var len = tokens.Length; + if (len == 0) return false; + for (var i = 0; i < len; ++i) + { + var token = tokens[i]; + iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() { - var result = ProcessEntireIndex(query.Skip, query.Limit); - return result.Count == 0 ? [] : result.ToArray(); + Token = token, + Record = record, + PreviousToken = isFacet ? token : default + }); + var hasRecordForCurrentToken = false; + if (iterator2.Next()) + { + var key = iterator2.CurrentKey; + hasRecordForCurrentToken = + tokenComparer.AreEqual(key.Token, token) && + recordComparer.AreEqual(key.Record, record) && + (!isFacet || + tokenComparer.AreEqual(key.PreviousToken, token)); } + if (hasRecordForCurrentToken) + return true; + } + return false; + } - var tokens = FindTokensNeedIteration(query.QueryNode); - if (tokens.Count == 0) - return []; - - var records = ProcessAllTokens(query.Skip, query.Limit); - return records.Count == 0 ? [] : records.ToArray(); - - bool DoesRecordContainAllTokens( - ReadOnlySpan tokens, - TRecord record, - bool respectTokenOrder, - bool isFacetNode) + bool DoesRecordMatchesTheQuery( + QueryNode node, + TRecord record) + { + if (node.NodeType == QueryNodeType.And) + { + if (node.HasTokens) { - var len = tokens.Length; - if (len == 0) return false; - var previousTokenDoesNotExist = !isFacetNode; - var previousToken = default(TToken); - if (isFacetNode) respectTokenOrder = false; - for (var i = 0; i < len; ++i) - { - var token = tokens[i]; - if (isFacetNode) - previousToken = token; - iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = previousToken - }); - var hasRecordForCurrentToken = false; - if (iterator2.Next()) - { - var key = iterator2.CurrentKey; - hasRecordForCurrentToken = - tokenComparer.AreEqual(key.Token, token) && - recordComparer.AreEqual(key.Record, record) && - (previousTokenDoesNotExist || - tokenComparer.AreEqual(key.PreviousToken, previousToken)); - } - if (!hasRecordForCurrentToken) - return false; - if (respectTokenOrder) - { - previousTokenDoesNotExist = false; - previousToken = token; - } - } - return true; + // if is facet node, process facets. + return DoesRecordContainAllTokens( + node.Tokens, record, node.RespectTokenOrder, node.IsFacetNode); } - - bool DoesRecordContainAnyOfTheTokens( - ReadOnlySpan tokens, - TRecord record, - bool isFacet) + else if (node.HasChildren) { - var len = tokens.Length; - if (len == 0) return false; - for (var i = 0; i < len; ++i) - { - var token = tokens[i]; - iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = isFacet ? token : default - }); - var hasRecordForCurrentToken = false; - if (iterator2.Next()) - { - var key = iterator2.CurrentKey; - hasRecordForCurrentToken = - tokenComparer.AreEqual(key.Token, token) && - recordComparer.AreEqual(key.Record, record) && - (!isFacet || - tokenComparer.AreEqual(key.PreviousToken, token)); - } - if (hasRecordForCurrentToken) - return true; - } - return false; + return node.Children + .All(x => DoesRecordMatchesTheQuery(x, record)); } - - bool DoesRecordMatchesTheQuery( - QueryNode node, - TRecord record) + } + else if (node.NodeType == QueryNodeType.Or) + { + if (node.HasTokens) + { + return DoesRecordContainAnyOfTheTokens( + node.Tokens, record, node.IsFacetNode); + } + else if (node.HasChildren) + { + return node.Children + .Any(x => DoesRecordMatchesTheQuery(x, record)); + } + } + else if (node.NodeType == QueryNodeType.Not) + { + if (node.HasTokens) + { + if (node.IsFacetNode) + { + return !DoesRecordContainAnyOfTheTokens(node.Tokens, record, true); + } + else if (node.RespectTokenOrder) + { + return !DoesRecordContainAllTokens(node.Tokens, record, true, false); + } + else + { + return !DoesRecordContainAnyOfTheTokens(node.Tokens, record, false); + } + } + else if (node.HasChildren) { - if (node.NodeType == QueryNodeType.And) - { - if (node.HasTokens) - { - // if is facet node, process facets. - return DoesRecordContainAllTokens( - node.Tokens, record, node.RespectTokenOrder, node.IsFacetNode); - } - else if (node.HasChildren) - { - return node.Children - .All(x => DoesRecordMatchesTheQuery(x, record)); - } - } - else if (node.NodeType == QueryNodeType.Or) - { - if (node.HasTokens) - { - return DoesRecordContainAnyOfTheTokens( - node.Tokens, record, node.IsFacetNode); - } - else if (node.HasChildren) - { - return node.Children - .Any(x => DoesRecordMatchesTheQuery(x, record)); - } - } - else if (node.NodeType == QueryNodeType.Not) - { - if (node.HasTokens) - { - if (node.IsFacetNode) - { - return !DoesRecordContainAnyOfTheTokens(node.Tokens, record, true); - } - else if (node.RespectTokenOrder) - { - return !DoesRecordContainAllTokens(node.Tokens, record, true, false); - } - else - { - return !DoesRecordContainAnyOfTheTokens(node.Tokens, record, false); - } - } - else if (node.HasChildren) - { - return node.Children - .All(x => !DoesRecordMatchesTheQuery(x, record)); - } - } - return false; + return node.Children + .All(x => !DoesRecordMatchesTheQuery(x, record)); } + } + return false; + } - HashSet ProcessAllTokens( - int skip, - int limit) + HashSet ProcessAllTokens( + int skip, + int limit) + { + var skipRecords = new HashSet(); + var records = new HashSet(); + var len = tokens.Count; + for (var i = 0; i < len; ++i) + { + (var token, var isFacet) = tokens[i]; + iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() { - var skipRecords = new HashSet(); - var records = new HashSet(); - var len = tokens.Count; - for (var i = 0; i < len; ++i) - { - (var token, var isFacet) = tokens[i]; - iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - }); + Token = token, + }); - var off = 0; - if (limit != 0) - limit += skip; - while (iterator1.Next()) - { - if (cancellationToken.IsCancellationRequested) return records; - var key = iterator1.CurrentKey; - var record = key.Record; - if (tokenComparer.AreNotEqual(key.Token, token)) break; - if (isFacet && tokenComparer.AreNotEqual(key.PreviousToken, token)) continue; - if (skipRecords.Contains(record)) continue; + var off = 0; + if (limit != 0) + limit += skip; + while (iterator1.Next()) + { + if (cancellationToken.IsCancellationRequested) return records; + var key = iterator1.CurrentKey; + var record = key.Record; + if (tokenComparer.AreNotEqual(key.Token, token)) break; + if (isFacet && tokenComparer.AreNotEqual(key.PreviousToken, token)) continue; + if (skipRecords.Contains(record)) continue; - // If the record is already processed, just skip it. - if (records.Contains(record)) continue; + // If the record is already processed, just skip it. + if (records.Contains(record)) continue; - if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) - { - skipRecords.Add(record); - continue; - } + if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) + { + skipRecords.Add(record); + continue; + } - if (off >= skip) - { - records.Add(record); - } - else - { - // if the current offset is skipped, we have to skip - // all records in the index to ensure - // the previously skipped records are excluded from the result. - skipRecords.Add(record); - } - ++off; - if (limit > 0 && off == limit) break; - } - if (limit > 0 && off == limit) break; - } - return records; + if (off >= skip) + { + records.Add(record); + } + else + { + // if the current offset is skipped, we have to skip + // all records in the index to ensure + // the previously skipped records are excluded from the result. + skipRecords.Add(record); + } + ++off; + if (limit > 0 && off == limit) break; } + if (limit > 0 && off == limit) break; + } + return records; + } - HashSet ProcessEntireIndex( - int skip, - int limit) - { - var skipRecords = new HashSet(); - var records = new HashSet(); - var off = 0; - if (limit != 0) - limit += skip; - while (iterator1.Next()) - { - if (cancellationToken.IsCancellationRequested) return records; - var key = iterator1.CurrentKey; - var record = key.Record; - if (skipRecords.Contains(record)) continue; + HashSet ProcessEntireIndex( + int skip, + int limit) + { + var skipRecords = new HashSet(); + var records = new HashSet(); + var off = 0; + if (limit != 0) + limit += skip; + while (iterator1.Next()) + { + if (cancellationToken.IsCancellationRequested) return records; + var key = iterator1.CurrentKey; + var record = key.Record; + if (skipRecords.Contains(record)) continue; - // If the record is already processed, just skip it. - if (records.Contains(record)) continue; + // If the record is already processed, just skip it. + if (records.Contains(record)) continue; - if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) - { - skipRecords.Add(record); - continue; - } + if (!DoesRecordMatchesTheQuery(query.QueryNode, record)) + { + skipRecords.Add(record); + continue; + } - if (off >= skip) - { - records.Add(record); - } - else - { - // if the current offset is skipped, we have to skip - // all records in the index to ensure - // the previously skipped records are excluded from the result. - skipRecords.Add(record); - } - ++off; - if (limit > 0 && off == limit) break; - } - return records; + if (off >= skip) + { + records.Add(record); + } + else + { + // if the current offset is skipped, we have to skip + // all records in the index to ensure + // the previously skipped records are excluded from the result. + skipRecords.Add(record); } + ++off; + if (limit > 0 && off == limit) break; + } + return records; } -} \ No newline at end of file + } +} diff --git a/src/ZoneTree.FullTextSearch/Search/HashedSearchQueryFactory.cs b/src/ZoneTree.FullTextSearch/Search/HashedSearchQueryFactory.cs index 38a2537..15f02f0 100644 --- a/src/ZoneTree.FullTextSearch/Search/HashedSearchQueryFactory.cs +++ b/src/ZoneTree.FullTextSearch/Search/HashedSearchQueryFactory.cs @@ -1,4 +1,4 @@ -using System.Drawing; +using System.Drawing; using ZoneTree.FullTextSearch.Hashing; using ZoneTree.FullTextSearch.Tokenizer; @@ -11,207 +11,207 @@ namespace ZoneTree.FullTextSearch.Search; /// public static class HashedSearchQueryFactory { - /// - /// Converts a QueryNode into a QueryNode by hashing the tokens. - /// - /// The original query node with string tokens. - /// The hash code generator for converting strings to ulong hashes. - /// The word tokenizer for splitting strings into tokens. - /// A new QueryNode with hashed tokens. - public static QueryNode FromStringQueryNode( - QueryNode node, - IHashCodeGenerator hashCodeGenerator, - IWordTokenizer wordTokenizer) + /// + /// Converts a QueryNode into a QueryNode by hashing the tokens. + /// + /// The original query node with string tokens. + /// The hash code generator for converting strings to ulong hashes. + /// The word tokenizer for splitting strings into tokens. + /// A new QueryNode with hashed tokens. + public static QueryNode FromStringQueryNode( + QueryNode node, + IHashCodeGenerator hashCodeGenerator, + IWordTokenizer wordTokenizer) + { + if (node.HasTokens) { - if (node.HasTokens) - { - var tokensOftokens = GetTokensOfTokens( - node, - hashCodeGenerator, - wordTokenizer); - - if (node.NodeType == QueryNodeType.Or) - return CreateOrNode(node, tokensOftokens); - else - return CreateAndOrNotNode(node, tokensOftokens); - } - else if (node.HasChildren) - { - return CreateNodeWithChildren(node, hashCodeGenerator, wordTokenizer); - } - - // Return an empty node if no tokens or children - return new QueryNode( - node.NodeType, - null, - null, - node.RespectTokenOrder, - node.IsFacetNode); + var tokensOftokens = GetTokensOfTokens( + node, + hashCodeGenerator, + wordTokenizer); + + if (node.NodeType == QueryNodeType.Or) + return CreateOrNode(node, tokensOftokens); + else + return CreateAndOrNotNode(node, tokensOftokens); } - - /// - /// Tokenizes the strings in the given QueryNode and returns a jagged array of hashed tokens. - /// - /// The original query node with string tokens. - /// The hash code generator for converting strings to ulong hashes. - /// The word tokenizer for splitting strings into tokens. - /// A jagged array where each element is an array of hashed tokens corresponding to a string in the original node's tokens. - static ulong[][] GetTokensOfTokens( - QueryNode node, - IHashCodeGenerator hashCodeGenerator, - IWordTokenizer wordTokenizer) + else if (node.HasChildren) { - var isFacetNode = node.IsFacetNode; - return node.Tokens.Select(x => - { - if (isFacetNode) - { - return [hashCodeGenerator.GetHashCode(x.AsSpan())]; - } - var tokens = wordTokenizer - .GetSlices(x) - .Select(slice => - hashCodeGenerator - .GetHashCode(x.AsSpan().Slice(slice))) - .ToArray(); - return tokens; - }).ToArray(); + return CreateNodeWithChildren(node, hashCodeGenerator, wordTokenizer); } - /// - /// Converts a QueryNode with child nodes into a QueryNode by recursively processing its children. - /// - /// The original query node with string tokens and child nodes. - /// The hash code generator for converting strings to ulong hashes. - /// The word tokenizer for splitting strings into tokens. - /// A new QueryNode with hashed tokens, including processed child nodes. - static QueryNode CreateNodeWithChildren(QueryNode node, IHashCodeGenerator hashCodeGenerator, IWordTokenizer wordTokenizer) + // Return an empty node if no tokens or children + return new QueryNode( + node.NodeType, + null, + null, + node.RespectTokenOrder, + node.IsFacetNode); + } + + /// + /// Tokenizes the strings in the given QueryNode and returns a jagged array of hashed tokens. + /// + /// The original query node with string tokens. + /// The hash code generator for converting strings to ulong hashes. + /// The word tokenizer for splitting strings into tokens. + /// A jagged array where each element is an array of hashed tokens corresponding to a string in the original node's tokens. + static ulong[][] GetTokensOfTokens( + QueryNode node, + IHashCodeGenerator hashCodeGenerator, + IWordTokenizer wordTokenizer) + { + var isFacetNode = node.IsFacetNode; + return node.Tokens.Select(x => { - // Recursively process child nodes - return new QueryNode( - node.NodeType, - null, - node.Children - .Select(x => - FromStringQueryNode( - x, hashCodeGenerator, wordTokenizer)).ToArray(), - node.RespectTokenOrder, - node.IsFacetNode); - } - - /// - /// Converts an AND or NOT type QueryNode into a QueryNode by hashing its tokens and processing them according to the node type. - /// - /// The original query node with string tokens. - /// A jagged array of hashed tokens corresponding to the original node's tokens. - /// A new QueryNode representing the AND or NOT logic with hashed tokens. - static QueryNode CreateAndOrNotNode( - QueryNode node, ulong[][] tokensOftokens) + if (isFacetNode) + { + return [hashCodeGenerator.GetHashCode(x.AsSpan())]; + } + var tokens = wordTokenizer + .GetSlices(x) + .Select(slice => + hashCodeGenerator + .GetHashCode(x.AsSpan().Slice(slice))) + .ToArray(); + return tokens; + }).ToArray(); + } + + /// + /// Converts a QueryNode with child nodes into a QueryNode by recursively processing its children. + /// + /// The original query node with string tokens and child nodes. + /// The hash code generator for converting strings to ulong hashes. + /// The word tokenizer for splitting strings into tokens. + /// A new QueryNode with hashed tokens, including processed child nodes. + static QueryNode CreateNodeWithChildren(QueryNode node, IHashCodeGenerator hashCodeGenerator, IWordTokenizer wordTokenizer) + { + // Recursively process child nodes + return new QueryNode( + node.NodeType, + null, + node.Children + .Select(x => + FromStringQueryNode( + x, hashCodeGenerator, wordTokenizer)).ToArray(), + node.RespectTokenOrder, + node.IsFacetNode); + } + + /// + /// Converts an AND or NOT type QueryNode into a QueryNode by hashing its tokens and processing them according to the node type. + /// + /// The original query node with string tokens. + /// A jagged array of hashed tokens corresponding to the original node's tokens. + /// A new QueryNode representing the AND or NOT logic with hashed tokens. + static QueryNode CreateAndOrNotNode( + QueryNode node, ulong[][] tokensOftokens) + { + var hasAnyTokenizedTokensLengthGreatherThanOne = + tokensOftokens.Any(x => x.Length > 1); + + if (!hasAnyTokenizedTokensLengthGreatherThanOne || + node.RespectTokenOrder || node.IsFacetNode) { - var hasAnyTokenizedTokensLengthGreatherThanOne = - tokensOftokens.Any(x => x.Length > 1); - - if (!hasAnyTokenizedTokensLengthGreatherThanOne || - node.RespectTokenOrder || node.IsFacetNode) - { - // NOT and AND nodes can be collected into a single node - return new QueryNode( - node.NodeType, - tokensOftokens.SelectMany(x => x).ToArray(), - null, - node.RespectTokenOrder, - node.IsFacetNode); - } - - // If we reach here: - // node.RespectTokenOrder always false - // node.IsFacetNode always false - var children = tokensOftokens - .Select(tokenizedTokens => - { - return new QueryNode( - node.NodeType, - tokenizedTokens, - null, - true, - false); - }).ToArray(); - - if (children.Length == 1) - { - // edge case, simplify tree. - children[0].NodeType = node.NodeType; - return children[0]; - } - - return new QueryNode( - QueryNodeType.And, - null, - children, - false, - false); + // NOT and AND nodes can be collected into a single node + return new QueryNode( + node.NodeType, + tokensOftokens.SelectMany(x => x).ToArray(), + null, + node.RespectTokenOrder, + node.IsFacetNode); } - /// - /// Converts an OR type QueryNode into a QueryNode by hashing its tokens and processing them according to the node type. - /// - /// The original query node with string tokens. - /// A jagged array of hashed tokens corresponding to the original node's tokens. - /// A new QueryNode representing the OR logic with hashed tokens. - static QueryNode CreateOrNode(QueryNode node, ulong[][] tokensOftokens) - { - var hasAnyTokenizedTokensLengthGreatherThanOne = - tokensOftokens.Any(x => x.Length > 1); - - if (hasAnyTokenizedTokensLengthGreatherThanOne) + // If we reach here: + // node.RespectTokenOrder always false + // node.IsFacetNode always false + var children = tokensOftokens + .Select(tokenizedTokens => { - // Group each set of tokenized tokens with AND - // and assign as children to the OR node - var children = tokensOftokens - .Select(tokenizedTokens => - { - return new QueryNode( - QueryNodeType.And, - tokenizedTokens, - null, - true, // This should be always true, as tokenized tokens are a combined group. - node.IsFacetNode); - }).ToArray(); - - // edge case, simplify tree. - if (children.Length == 1) - return children[0]; + return new QueryNode( + node.NodeType, + tokenizedTokens, + null, + true, + false); + }).ToArray(); - return new QueryNode( - QueryNodeType.Or, - null, - children, - node.RespectTokenOrder, - node.IsFacetNode); - } - return new QueryNode( - QueryNodeType.Or, - tokensOftokens.SelectMany(x => x).ToArray(), - null, - node.RespectTokenOrder, - node.IsFacetNode); + if (children.Length == 1) + { + // edge case, simplify tree. + children[0].NodeType = node.NodeType; + return children[0]; } - /// - /// Converts a SearchQuery into a SearchQuery by tokenizing each word and hashing the tokens in the query nodes. - /// - /// The original search query with string tokens. - /// The hash code generator for converting strings to ulong hashes. - /// The word tokenizer for splitting strings into tokens. - /// A new SearchQuery with hashed tokens. - public static SearchQuery FromStringSearchQuery( - SearchQuery query, - IHashCodeGenerator hashCodeGenerator, - IWordTokenizer wordTokenizer) + return new QueryNode( + QueryNodeType.And, + null, + children, + false, + false); + } + + /// + /// Converts an OR type QueryNode into a QueryNode by hashing its tokens and processing them according to the node type. + /// + /// The original query node with string tokens. + /// A jagged array of hashed tokens corresponding to the original node's tokens. + /// A new QueryNode representing the OR logic with hashed tokens. + static QueryNode CreateOrNode(QueryNode node, ulong[][] tokensOftokens) + { + var hasAnyTokenizedTokensLengthGreatherThanOne = + tokensOftokens.Any(x => x.Length > 1); + + if (hasAnyTokenizedTokensLengthGreatherThanOne) { - var node = FromStringQueryNode( - query.QueryNode, hashCodeGenerator, wordTokenizer); - var result = new SearchQuery(node, query.Skip, query.Limit); - return result; + // Group each set of tokenized tokens with AND + // and assign as children to the OR node + var children = tokensOftokens + .Select(tokenizedTokens => + { + return new QueryNode( + QueryNodeType.And, + tokenizedTokens, + null, + true, // This should be always true, as tokenized tokens are a combined group. + node.IsFacetNode); + }).ToArray(); + + // edge case, simplify tree. + if (children.Length == 1) + return children[0]; + + return new QueryNode( + QueryNodeType.Or, + null, + children, + node.RespectTokenOrder, + node.IsFacetNode); } + return new QueryNode( + QueryNodeType.Or, + tokensOftokens.SelectMany(x => x).ToArray(), + null, + node.RespectTokenOrder, + node.IsFacetNode); + } + + /// + /// Converts a SearchQuery into a SearchQuery by tokenizing each word and hashing the tokens in the query nodes. + /// + /// The original search query with string tokens. + /// The hash code generator for converting strings to ulong hashes. + /// The word tokenizer for splitting strings into tokens. + /// A new SearchQuery with hashed tokens. + public static SearchQuery FromStringSearchQuery( + SearchQuery query, + IHashCodeGenerator hashCodeGenerator, + IWordTokenizer wordTokenizer) + { + var node = FromStringQueryNode( + query.QueryNode, hashCodeGenerator, wordTokenizer); + var result = new SearchQuery(node, query.Skip, query.Limit); + return result; + } } diff --git a/src/ZoneTree.FullTextSearch/Search/QueryNode.cs b/src/ZoneTree.FullTextSearch/Search/QueryNode.cs index 04d3268..3602b2a 100644 --- a/src/ZoneTree.FullTextSearch/Search/QueryNode.cs +++ b/src/ZoneTree.FullTextSearch/Search/QueryNode.cs @@ -1,4 +1,4 @@ - + namespace ZoneTree.FullTextSearch.Search; /// @@ -8,159 +8,159 @@ namespace ZoneTree.FullTextSearch.Search; /// The type of tokens contained in the query node. public sealed class QueryNode : IEquatable> { - /// - /// Gets or sets the logical operation type of the query node (AND, OR, NOT). - /// - public QueryNodeType NodeType { get; set; } - - /// - /// Gets or sets the tokens associated with this query node. - /// - public TToken[] Tokens { get; set; } - - /// - /// Gets or sets the child nodes of this query node. - /// - public QueryNode[] Children { get; set; } - - /// - /// Gets or sets a value indicating whether the order of tokens should be respected - /// during the evaluation of this node. - /// - public bool RespectTokenOrder { get; set; } = true; - - /// - /// Gets or sets a value indicating whether this node is a facet node. - /// - public bool IsFacetNode { get; set; } - - bool _hasFirstLookAt; - - TToken _firstLookAt; - - /// - /// Gets or sets the first token to consider when evaluating this node. - /// Defaults to the first token in the array if not explicitly set. - /// - public TToken FirstLookAt - { - get => _hasFirstLookAt ? _firstLookAt : (HasTokens ? Tokens[0] : default); - set - { - _firstLookAt = value; - _hasFirstLookAt = true; - } - } - - /// - /// Gets a value indicating whether this node or any of its child nodes contain positive criteria for evaluation. - /// Empty nodes and NOT queries are not counted as a positive criteria. - /// - public bool HasAnyPositiveCriteria => - NodeType != QueryNodeType.Not && - (NodeType != QueryNodeType.Or || - (NodeType == QueryNodeType.Or && - (Children == null || - !Children.Any(c => c.NodeType == QueryNodeType.Not)))) && - (HasTokens || - (Children != null && Children.Any(c => c.HasAnyPositiveCriteria))); - - /// - /// Gets a value indicating whether this query node is empty. - /// - public bool IsEmpty => !HasTokens && !HasChildren; - - /// - /// Gets a value indicating whether this node contains tokens. - /// - public bool HasTokens => Tokens != null && Tokens.Length > 0; - - /// - /// Gets a value indicating whether this node has child nodes. - /// - public bool HasChildren => Children != null && Children.Length > 0; - - /// - /// Initializes a new instance of the class. - /// - /// The logical operation type of the node. - /// The tokens associated with this node. - /// The child nodes of this node. - /// Whether the order of tokens should be respected. - /// Whether this node is a facet node. - public QueryNode( - QueryNodeType nodeType, - TToken[] tokens = null, - QueryNode[] children = null, - bool respectTokenOrder = true, - bool isFacetNode = false) + /// + /// Gets or sets the logical operation type of the query node (AND, OR, NOT). + /// + public QueryNodeType NodeType { get; set; } + + /// + /// Gets or sets the tokens associated with this query node. + /// + public TToken[] Tokens { get; set; } + + /// + /// Gets or sets the child nodes of this query node. + /// + public QueryNode[] Children { get; set; } + + /// + /// Gets or sets a value indicating whether the order of tokens should be respected + /// during the evaluation of this node. + /// + public bool RespectTokenOrder { get; set; } = true; + + /// + /// Gets or sets a value indicating whether this node is a facet node. + /// + public bool IsFacetNode { get; set; } + + bool _hasFirstLookAt; + + TToken _firstLookAt; + + /// + /// Gets or sets the first token to consider when evaluating this node. + /// Defaults to the first token in the array if not explicitly set. + /// + public TToken FirstLookAt + { + get => _hasFirstLookAt ? _firstLookAt : (HasTokens ? Tokens[0] : default); + set { - NodeType = nodeType; - Tokens = tokens; - Children = children; - RespectTokenOrder = respectTokenOrder; - IsFacetNode = isFacetNode; + _firstLookAt = value; + _hasFirstLookAt = true; } - - public override bool Equals(object obj) - { - return Equals(obj as QueryNode); - } - - public bool Equals(QueryNode other) + } + + /// + /// Gets a value indicating whether this node or any of its child nodes contain positive criteria for evaluation. + /// Empty nodes and NOT queries are not counted as a positive criteria. + /// + public bool HasAnyPositiveCriteria => + NodeType != QueryNodeType.Not && + (NodeType != QueryNodeType.Or || + (NodeType == QueryNodeType.Or && + (Children == null || + !Children.Any(c => c.NodeType == QueryNodeType.Not)))) && + (HasTokens || + (Children != null && Children.Any(c => c.HasAnyPositiveCriteria))); + + /// + /// Gets a value indicating whether this query node is empty. + /// + public bool IsEmpty => !HasTokens && !HasChildren; + + /// + /// Gets a value indicating whether this node contains tokens. + /// + public bool HasTokens => Tokens != null && Tokens.Length > 0; + + /// + /// Gets a value indicating whether this node has child nodes. + /// + public bool HasChildren => Children != null && Children.Length > 0; + + /// + /// Initializes a new instance of the class. + /// + /// The logical operation type of the node. + /// The tokens associated with this node. + /// The child nodes of this node. + /// Whether the order of tokens should be respected. + /// Whether this node is a facet node. + public QueryNode( + QueryNodeType nodeType, + TToken[] tokens = null, + QueryNode[] children = null, + bool respectTokenOrder = true, + bool isFacetNode = false) + { + NodeType = nodeType; + Tokens = tokens; + Children = children; + RespectTokenOrder = respectTokenOrder; + IsFacetNode = isFacetNode; + } + + public override bool Equals(object obj) + { + return Equals(obj as QueryNode); + } + + public bool Equals(QueryNode other) + { + if (other is null) return false; + + if (NodeType != other.NodeType) return false; + if (HasTokens != other.HasTokens) return false; + if (HasChildren != other.HasChildren) return false; + + if (HasTokens && + !Enumerable.SequenceEqual(Tokens, other.Tokens)) + return false; + + if (HasChildren && + !Enumerable.SequenceEqual(Children, other.Children)) + return false; + + return RespectTokenOrder == other.RespectTokenOrder && + IsFacetNode == other.IsFacetNode && + _hasFirstLookAt == other._hasFirstLookAt && + EqualityComparer + .Default + .Equals(_firstLookAt, other._firstLookAt); + } + + public override int GetHashCode() + { + HashCode hash = new HashCode(); + hash.Add(NodeType); + if (HasTokens) { - if (other is null) return false; - - if (NodeType != other.NodeType) return false; - if (HasTokens != other.HasTokens) return false; - if (HasChildren != other.HasChildren) return false; - - if (HasTokens && - !Enumerable.SequenceEqual(Tokens, other.Tokens)) - return false; - - if (HasChildren && - !Enumerable.SequenceEqual(Children, other.Children)) - return false; - - return RespectTokenOrder == other.RespectTokenOrder && - IsFacetNode == other.IsFacetNode && - _hasFirstLookAt == other._hasFirstLookAt && - EqualityComparer - .Default - .Equals(_firstLookAt, other._firstLookAt); + foreach (var token in Tokens) + hash.Add(token); } - - public override int GetHashCode() - { - HashCode hash = new HashCode(); - hash.Add(NodeType); - if (HasTokens) - { - foreach (var token in Tokens) - hash.Add(token); - } - if (HasChildren) - { - foreach (var child in Children) - hash.Add(child); - } - hash.Add(RespectTokenOrder); - hash.Add(IsFacetNode); - hash.Add(_hasFirstLookAt); - hash.Add(FirstLookAt); - hash.Add(HasTokens); - hash.Add(HasChildren); - return hash.ToHashCode(); - } - - public static bool operator ==(QueryNode left, QueryNode right) - { - return EqualityComparer>.Default.Equals(left, right); - } - - public static bool operator !=(QueryNode left, QueryNode right) + if (HasChildren) { - return !(left == right); + foreach (var child in Children) + hash.Add(child); } + hash.Add(RespectTokenOrder); + hash.Add(IsFacetNode); + hash.Add(_hasFirstLookAt); + hash.Add(FirstLookAt); + hash.Add(HasTokens); + hash.Add(HasChildren); + return hash.ToHashCode(); + } + + public static bool operator ==(QueryNode left, QueryNode right) + { + return EqualityComparer>.Default.Equals(left, right); + } + + public static bool operator !=(QueryNode left, QueryNode right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/Search/QueryNodeType.cs b/src/ZoneTree.FullTextSearch/Search/QueryNodeType.cs index 85d648b..8ece8ff 100644 --- a/src/ZoneTree.FullTextSearch/Search/QueryNodeType.cs +++ b/src/ZoneTree.FullTextSearch/Search/QueryNodeType.cs @@ -1,4 +1,4 @@ -namespace ZoneTree.FullTextSearch.Search; +namespace ZoneTree.FullTextSearch.Search; /// /// Represents the type of a query node, which defines the logical operation @@ -6,7 +6,7 @@ /// public enum QueryNodeType { - Not, - And, - Or, + Not, + And, + Or, } diff --git a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs index fbece98..1cf0bad 100644 --- a/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs +++ b/src/ZoneTree.FullTextSearch/Search/SearchOnIndexOfTokenRecordPreviousToken.cs @@ -1,4 +1,4 @@ -using ZoneTree; +using ZoneTree; using ZoneTree.Comparers; using ZoneTree.FullTextSearch.Index; @@ -14,259 +14,259 @@ public sealed class SearchOnIndexOfTokenRecordPreviousToken where TRecord : unmanaged where TToken : unmanaged { - /// - /// Gets the index associated with this search algorithm, which is used to perform the search operations. - /// - public IndexOfTokenRecordPreviousToken Index { get; } + /// + /// Gets the index associated with this search algorithm, which is used to perform the search operations. + /// + public IndexOfTokenRecordPreviousToken Index { get; } - /// - /// Initializes a new instance of the class, - /// associating it with a specific . - /// - /// The index to use for searching records. - public SearchOnIndexOfTokenRecordPreviousToken( - IndexOfTokenRecordPreviousToken index) - { - Index = index; - } + /// + /// Initializes a new instance of the class, + /// associating it with a specific . + /// + /// The index to use for searching records. + public SearchOnIndexOfTokenRecordPreviousToken( + IndexOfTokenRecordPreviousToken index) + { + Index = index; + } - /// - /// Searches the index for records that match the specified tokens, with optional support for facets, token order respect, and pagination. - /// - /// - /// A read-only span of tokens that the records must contain. This parameter is mandatory unless facets are provided. - /// The tokens are logically grouped using "AND", meaning all tokens must be present in the matching records. - /// If both the tokens span and the facets span are empty, the result will be an empty array, as searching without tokens and facets is not supported. - /// Tokens can be empty if facets are provided; in this case, the search will be based solely on the facets. - /// To retrieve records without specific search tokens or facets, consider fetching them from the actual record source instead of using the search index. - /// - /// - /// An optional token that the search will prioritize when searching. - /// If not specified, the first token in the tokens span is used. - /// - /// - /// A boolean indicating whether the search should respect the order of tokens in the record. - /// If true, the records must contain the tokens in the specified order. - /// - /// - /// An optional read-only span of tokens that can be used to filter the search results. - /// If any facets are provided, records must contain at least one of these facet tokens to be included in the results. - /// If the span is empty or not provided, no facet filtering is applied, and all matching records are returned regardless of facet values. - /// - /// - /// The number of matching records to skip in the result set, useful for pagination. - /// Defaults to 0. - /// - /// - /// The maximum number of records to return, useful for limiting the result set size. - /// Defaults to 0, which indicates no limit. - /// - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// - /// An array of records that match the specified tokens and facets, respecting the token order if specified. - /// The array may be empty if no matching records are found. - /// - /// - /// The search process begins by identifying records that match the specified tokens. - /// If a `firstLookAt` token is provided, it prioritizes that token in the search. - /// It then filters these records based on whether they contain all the specified tokens and, if facets are provided, - /// whether they contain any of the facet tokens. - /// Pagination is supported through the `skip` and `limit` parameters. - /// - public TRecord[] Search( - ReadOnlySpan tokens, - TToken? firstLookAt = null, - bool respectTokenOrder = true, - ReadOnlySpan facets = default, - int skip = 0, - int limit = 0, - CancellationToken cancellationToken = default) - { - Index.ThrowIfIndexIsDropped(); - if (tokens.Length == 0 && facets.Length == 0) - return []; + /// + /// Searches the index for records that match the specified tokens, with optional support for facets, token order respect, and pagination. + /// + /// + /// A read-only span of tokens that the records must contain. This parameter is mandatory unless facets are provided. + /// The tokens are logically grouped using "AND", meaning all tokens must be present in the matching records. + /// If both the tokens span and the facets span are empty, the result will be an empty array, as searching without tokens and facets is not supported. + /// Tokens can be empty if facets are provided; in this case, the search will be based solely on the facets. + /// To retrieve records without specific search tokens or facets, consider fetching them from the actual record source instead of using the search index. + /// + /// + /// An optional token that the search will prioritize when searching. + /// If not specified, the first token in the tokens span is used. + /// + /// + /// A boolean indicating whether the search should respect the order of tokens in the record. + /// If true, the records must contain the tokens in the specified order. + /// + /// + /// An optional read-only span of tokens that can be used to filter the search results. + /// If any facets are provided, records must contain at least one of these facet tokens to be included in the results. + /// If the span is empty or not provided, no facet filtering is applied, and all matching records are returned regardless of facet values. + /// + /// + /// The number of matching records to skip in the result set, useful for pagination. + /// Defaults to 0. + /// + /// + /// The maximum number of records to return, useful for limiting the result set size. + /// Defaults to 0, which indicates no limit. + /// + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// + /// An array of records that match the specified tokens and facets, respecting the token order if specified. + /// The array may be empty if no matching records are found. + /// + /// + /// The search process begins by identifying records that match the specified tokens. + /// If a `firstLookAt` token is provided, it prioritizes that token in the search. + /// It then filters these records based on whether they contain all the specified tokens and, if facets are provided, + /// whether they contain any of the facet tokens. + /// Pagination is supported through the `skip` and `limit` parameters. + /// + public TRecord[] Search( + ReadOnlySpan tokens, + TToken? firstLookAt = null, + bool respectTokenOrder = true, + ReadOnlySpan facets = default, + int skip = 0, + int limit = 0, + CancellationToken cancellationToken = default) + { + Index.ThrowIfIndexIsDropped(); + if (tokens.Length == 0 && facets.Length == 0) + return []; - var hasTokens = tokens.Length > 0; - var recordComparer = Index.RecordComparer; - var tokenComparer = Index.TokenComparer; - using var iterator1 = Index.ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - using var iterator2 = Index.ZoneTree1.CreateIterator( - IteratorType.NoRefresh, - contributeToTheBlockCache: false); - var facet = firstLookAt ?? (hasTokens ? tokens[0] : facets[0]); - var records = hasTokens ? - FindRecordsMatchingAllTokens(tokens, facets, skip, limit) : - FindRecordsMatchingAnyOfTheFacets(facets, skip, limit); - return records.Count == 0 ? [] : records.ToArray(); + var hasTokens = tokens.Length > 0; + var recordComparer = Index.RecordComparer; + var tokenComparer = Index.TokenComparer; + using var iterator1 = Index.ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + using var iterator2 = Index.ZoneTree1.CreateIterator( + IteratorType.NoRefresh, + contributeToTheBlockCache: false); + var facet = firstLookAt ?? (hasTokens ? tokens[0] : facets[0]); + var records = hasTokens ? + FindRecordsMatchingAllTokens(tokens, facets, skip, limit) : + FindRecordsMatchingAnyOfTheFacets(facets, skip, limit); + return records.Count == 0 ? [] : records.ToArray(); - bool DoesRecordContainAllTokens(ReadOnlySpan tokens, TRecord record) + bool DoesRecordContainAllTokens(ReadOnlySpan tokens, TRecord record) + { + var len = tokens.Length; + if (len == 0) return false; + var previousTokenDoesNotExist = true; + var previousToken = default(TToken); + for (var i = 0; i < len; ++i) + { + var token = tokens[i]; + iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() + { + Token = token, + Record = record, + PreviousToken = previousToken + }); + var hasRecordForCurrentToken = false; + if (iterator2.Next()) { - var len = tokens.Length; - if (len == 0) return false; - var previousTokenDoesNotExist = true; - var previousToken = default(TToken); - for (var i = 0; i < len; ++i) - { - var token = tokens[i]; - iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = previousToken - }); - var hasRecordForCurrentToken = false; - if (iterator2.Next()) - { - var key = iterator2.CurrentKey; - hasRecordForCurrentToken = - tokenComparer.AreEqual(key.Token, token) && - recordComparer.AreEqual(key.Record, record) && - (previousTokenDoesNotExist || - tokenComparer.AreEqual(key.PreviousToken, previousToken)); - } - if (!hasRecordForCurrentToken) - return false; - if (respectTokenOrder) - { - previousTokenDoesNotExist = false; - previousToken = token; - } - } - return true; + var key = iterator2.CurrentKey; + hasRecordForCurrentToken = + tokenComparer.AreEqual(key.Token, token) && + recordComparer.AreEqual(key.Record, record) && + (previousTokenDoesNotExist || + tokenComparer.AreEqual(key.PreviousToken, previousToken)); } - - bool DoesRecordContainAnyOfTheFacets(ReadOnlySpan facets, TRecord record) + if (!hasRecordForCurrentToken) + return false; + if (respectTokenOrder) { - var len = facets.Length; - if (len == 0) return true; // special case for facets. - var previousToken = default(TToken); - for (var i = 0; i < len; ++i) - { - var token = facets[i]; - iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = token, - Record = record, - PreviousToken = previousToken - }); - var hasRecordForCurrentToken = false; - if (iterator2.Next()) - { - var key = iterator2.CurrentKey; - hasRecordForCurrentToken = - tokenComparer.AreEqual(key.Token, token) && - recordComparer.AreEqual(key.Record, record) && - tokenComparer.AreEqual(key.PreviousToken, token); - } - if (hasRecordForCurrentToken) - return true; - } - return false; + previousTokenDoesNotExist = false; + previousToken = token; } + } + return true; + } - HashSet FindRecordsMatchingAllTokens( - ReadOnlySpan tokens, - ReadOnlySpan facets, - int skip, - int limit) + bool DoesRecordContainAnyOfTheFacets(ReadOnlySpan facets, TRecord record) + { + var len = facets.Length; + if (len == 0) return true; // special case for facets. + var previousToken = default(TToken); + for (var i = 0; i < len; ++i) + { + var token = facets[i]; + iterator2.Seek(new CompositeKeyOfTokenRecordPrevious() { - var records = new HashSet(); - iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = facet, - }); + Token = token, + Record = record, + PreviousToken = previousToken + }); + var hasRecordForCurrentToken = false; + if (iterator2.Next()) + { + var key = iterator2.CurrentKey; + hasRecordForCurrentToken = + tokenComparer.AreEqual(key.Token, token) && + recordComparer.AreEqual(key.Record, record) && + tokenComparer.AreEqual(key.PreviousToken, token); + } + if (hasRecordForCurrentToken) + return true; + } + return false; + } - var off = 0; - if (limit != 0) - limit += skip; - TRecord skipRecord = default; - while (iterator1.Next()) - { - if (cancellationToken.IsCancellationRequested) return records; - var key = iterator1.CurrentKey; - var record = key.Record; - if (recordComparer.AreEqual(skipRecord, record)) continue; - if (tokenComparer.AreNotEqual(key.Token, facet)) break; + HashSet FindRecordsMatchingAllTokens( + ReadOnlySpan tokens, + ReadOnlySpan facets, + int skip, + int limit) + { + var records = new HashSet(); + iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() + { + Token = facet, + }); - // If the record is already processed, just skip it. - // Multiple records are common - // since a token can appear in a document multiple times with - // different previous token. - if (records.Contains(record)) continue; + var off = 0; + if (limit != 0) + limit += skip; + TRecord skipRecord = default; + while (iterator1.Next()) + { + if (cancellationToken.IsCancellationRequested) return records; + var key = iterator1.CurrentKey; + var record = key.Record; + if (recordComparer.AreEqual(skipRecord, record)) continue; + if (tokenComparer.AreNotEqual(key.Token, facet)) break; - if (!DoesRecordContainAllTokens(tokens, record) || - !DoesRecordContainAnyOfTheFacets(facets, record)) - continue; + // If the record is already processed, just skip it. + // Multiple records are common + // since a token can appear in a document multiple times with + // different previous token. + if (records.Contains(record)) continue; - if (off >= skip) - { - records.Add(record); - } - else - { - // if the current offset is skipped, we have to skip - // all records in the index to ensure - // the previously skipped records are excluded from the result. - skipRecord = record; - } - ++off; - if (limit > 0 && off == limit) break; - } - return records; + if (!DoesRecordContainAllTokens(tokens, record) || + !DoesRecordContainAnyOfTheFacets(facets, record)) + continue; + + if (off >= skip) + { + records.Add(record); } + else + { + // if the current offset is skipped, we have to skip + // all records in the index to ensure + // the previously skipped records are excluded from the result. + skipRecord = record; + } + ++off; + if (limit > 0 && off == limit) break; + } + return records; + } - HashSet FindRecordsMatchingAnyOfTheFacets( - ReadOnlySpan facets, - int skip, - int limit) + HashSet FindRecordsMatchingAnyOfTheFacets( + ReadOnlySpan facets, + int skip, + int limit) + { + var skipRecords = new HashSet(); + var records = new HashSet(); + var len = facets.Length; + for (var i = 0; i < len; ++i) + { + var facet = facets[i]; + iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() { - var skipRecords = new HashSet(); - var records = new HashSet(); - var len = facets.Length; - for (var i = 0; i < len; ++i) - { - var facet = facets[i]; - iterator1.Seek(new CompositeKeyOfTokenRecordPrevious() - { - Token = facet, - }); + Token = facet, + }); - var off = 0; - if (limit != 0) - limit += skip; - while (iterator1.Next()) - { - if (cancellationToken.IsCancellationRequested) return records; - var key = iterator1.CurrentKey; - var record = key.Record; - if (tokenComparer.AreNotEqual(key.Token, facet)) break; - if (tokenComparer.AreNotEqual(key.PreviousToken, facet)) continue; - if (skipRecords.Contains(record)) continue; + var off = 0; + if (limit != 0) + limit += skip; + while (iterator1.Next()) + { + if (cancellationToken.IsCancellationRequested) return records; + var key = iterator1.CurrentKey; + var record = key.Record; + if (tokenComparer.AreNotEqual(key.Token, facet)) break; + if (tokenComparer.AreNotEqual(key.PreviousToken, facet)) continue; + if (skipRecords.Contains(record)) continue; - // If the record is already processed, just skip it. - if (records.Contains(record)) continue; + // If the record is already processed, just skip it. + if (records.Contains(record)) continue; - if (off >= skip) - { - records.Add(record); - } - else - { - // if the current offset is skipped, we have to skip - // all records in the index to ensure - // the previously skipped records are excluded from the result. - skipRecords.Add(record); - } - ++off; - if (limit > 0 && off == limit) break; - } - if (limit > 0 && off == limit) break; - } - return records; + if (off >= skip) + { + records.Add(record); + } + else + { + // if the current offset is skipped, we have to skip + // all records in the index to ensure + // the previously skipped records are excluded from the result. + skipRecords.Add(record); + } + ++off; + if (limit > 0 && off == limit) break; } + if (limit > 0 && off == limit) break; + } + return records; } + } } diff --git a/src/ZoneTree.FullTextSearch/Search/SearchQuery.cs b/src/ZoneTree.FullTextSearch/Search/SearchQuery.cs index 43a54ca..06d2384 100644 --- a/src/ZoneTree.FullTextSearch/Search/SearchQuery.cs +++ b/src/ZoneTree.FullTextSearch/Search/SearchQuery.cs @@ -1,4 +1,4 @@ - + namespace ZoneTree.FullTextSearch.Search; /// @@ -7,76 +7,76 @@ namespace ZoneTree.FullTextSearch.Search; /// The type of tokens in the query. public sealed class SearchQuery : IEquatable> { - /// - /// Gets or sets the root query node for this search query. - /// - public QueryNode QueryNode { get; set; } + /// + /// Gets or sets the root query node for this search query. + /// + public QueryNode QueryNode { get; set; } - /// - /// Gets or sets the number of records to skip in the search results. - /// - public int Skip { get; set; } + /// + /// Gets or sets the number of records to skip in the search results. + /// + public int Skip { get; set; } - /// - /// Gets or sets the maximum number of records to return in the search results. - /// - public int Limit { get; set; } + /// + /// Gets or sets the maximum number of records to return in the search results. + /// + public int Limit { get; set; } - /// - /// Gets a value indicating whether this search query contains any positive criteria for evaluation. - /// Empty nodes and NOT queries are not counted as a positive criteria. - /// - public bool HasAnyPositiveCriteria => QueryNode != null && QueryNode.HasAnyPositiveCriteria; + /// + /// Gets a value indicating whether this search query contains any positive criteria for evaluation. + /// Empty nodes and NOT queries are not counted as a positive criteria. + /// + public bool HasAnyPositiveCriteria => QueryNode != null && QueryNode.HasAnyPositiveCriteria; - /// - /// Gets a value indicating whether this search query is empty. - /// - public bool IsEmpty => QueryNode == null || QueryNode.IsEmpty; + /// + /// Gets a value indicating whether this search query is empty. + /// + public bool IsEmpty => QueryNode == null || QueryNode.IsEmpty; - /// - /// Initializes a new instance of the class. - /// - /// The root query node for this search query. - /// The number of records to skip in the search results. - /// The maximum number of records to return in the search results. - public SearchQuery( - QueryNode queryNode, - int skip = 0, - int limit = 0 - ) - { - QueryNode = queryNode; - Skip = skip; - Limit = limit; - } + /// + /// Initializes a new instance of the class. + /// + /// The root query node for this search query. + /// The number of records to skip in the search results. + /// The maximum number of records to return in the search results. + public SearchQuery( + QueryNode queryNode, + int skip = 0, + int limit = 0 + ) + { + QueryNode = queryNode; + Skip = skip; + Limit = limit; + } - public override bool Equals(object obj) - { - return Equals(obj as SearchQuery); - } + public override bool Equals(object obj) + { + return Equals(obj as SearchQuery); + } - public bool Equals(SearchQuery other) - { - return other is not null && - EqualityComparer> - .Default - .Equals(QueryNode, other.QueryNode) && - Skip == other.Skip && - Limit == other.Limit; - } + public bool Equals(SearchQuery other) + { + return other is not null && + EqualityComparer> + .Default + .Equals(QueryNode, other.QueryNode) && + Skip == other.Skip && + Limit == other.Limit; + } - public override int GetHashCode() - { - return HashCode.Combine(QueryNode, Skip, Limit); - } + public override int GetHashCode() + { + return HashCode.Combine(QueryNode, Skip, Limit); + } - public static bool operator ==(SearchQuery left, SearchQuery right) - { - return EqualityComparer>.Default.Equals(left, right); - } + public static bool operator ==(SearchQuery left, SearchQuery right) + { + return EqualityComparer>.Default.Equals(left, right); + } - public static bool operator !=(SearchQuery left, SearchQuery right) - { - return !(left == right); - } + public static bool operator !=(SearchQuery left, SearchQuery right) + { + return !(left == right); + } } diff --git a/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs b/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs index 8ec221f..0cb869a 100644 --- a/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs +++ b/src/ZoneTree.FullTextSearch/SearchEngines/AdvancedZoneTreeOptions.cs @@ -1,4 +1,4 @@ -using ZoneTree; +using ZoneTree; using ZoneTree.FullTextSearch.Model; using ZoneTree.AbstractFileStream; @@ -18,49 +18,49 @@ public sealed class AdvancedZoneTreeOptions where TRecord : unmanaged where TToken : unmanaged { - /// - /// Gets or sets the used to manage file streams - /// for storing ZoneTree data. If this is null, the default implementation - /// provided by ZoneTree will be used. - /// - public IFileStreamProvider FileStreamProvider { get; set; } + /// + /// Gets or sets the used to manage file streams + /// for storing ZoneTree data. If this is null, the default implementation + /// provided by ZoneTree will be used. + /// + public IFileStreamProvider FileStreamProvider { get; set; } - /// - /// Gets or sets an optional delegate that configures the - /// for the keys - /// and values. - /// - /// This is called before the factory builds its internal ZoneTree. You can use it - /// to configure advanced settings such as in-memory or on-disk data paths, caching, - /// block sizes, compression, or other low-level ZoneTree behaviors. - /// - /// - /// - /// This configurator applies specifically to data indexed by the hashed token - /// combined with the "previous token" to enforce token order. - /// - public Action< - ZoneTreeFactory< - CompositeKeyOfTokenRecordPrevious, - byte>> FactoryConfigurator1 - { get; set; } + /// + /// Gets or sets an optional delegate that configures the + /// for the keys + /// and values. + /// + /// This is called before the factory builds its internal ZoneTree. You can use it + /// to configure advanced settings such as in-memory or on-disk data paths, caching, + /// block sizes, compression, or other low-level ZoneTree behaviors. + /// + /// + /// + /// This configurator applies specifically to data indexed by the hashed token + /// combined with the "previous token" to enforce token order. + /// + public Action< + ZoneTreeFactory< + CompositeKeyOfTokenRecordPrevious, + byte>> FactoryConfigurator1 + { get; set; } - /// - /// Gets or sets an optional delegate that configures the - /// for the keys and values. - /// - /// Similar to , this is invoked before the factory - /// completes its setup, allowing custom adjustments for storage, caching, and other - /// advanced ZoneTree features. - /// - /// - /// - /// This configurator applies specifically to data indexed by the record combined - /// with the token, often used for efficient record deletion or secondary indexing. - /// - public Action< - ZoneTreeFactory< - CompositeKeyOfRecordToken, - byte>> FactoryConfigurator2 - { get; set; } + /// + /// Gets or sets an optional delegate that configures the + /// for the keys and values. + /// + /// Similar to , this is invoked before the factory + /// completes its setup, allowing custom adjustments for storage, caching, and other + /// advanced ZoneTree features. + /// + /// + /// + /// This configurator applies specifically to data indexed by the record combined + /// with the token, often used for efficient record deletion or secondary indexing. + /// + public Action< + ZoneTreeFactory< + CompositeKeyOfRecordToken, + byte>> FactoryConfigurator2 + { get; set; } } diff --git a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs index fdb7a60..f7d86ef 100644 --- a/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs +++ b/src/ZoneTree.FullTextSearch/SearchEngines/HashedSearchEngine.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Drawing; using ZoneTree; using ZoneTree.Comparers; @@ -22,416 +22,416 @@ namespace ZoneTree.FullTextSearch.SearchEngines; public sealed class HashedSearchEngine : IDisposable where TRecord : unmanaged { - /// - /// Gets the index used by the search engine to store and retrieve records. - /// - public readonly IndexOfTokenRecordPreviousToken Index; + /// + /// Gets the index used by the search engine to store and retrieve records. + /// + public readonly IndexOfTokenRecordPreviousToken Index; - /// - /// The tokenizer used to split text into word slices for hashing. - /// - readonly IWordTokenizer WordTokenizer; + /// + /// The tokenizer used to split text into word slices for hashing. + /// + readonly IWordTokenizer WordTokenizer; - /// - /// The hash code generator used to generate hash codes for tokens. - /// - readonly IHashCodeGenerator HashCodeGenerator; + /// + /// The hash code generator used to generate hash codes for tokens. + /// + readonly IHashCodeGenerator HashCodeGenerator; - /// - /// The flag that describes if the instance is disposed. - /// - bool isDisposed; + /// + /// The flag that describes if the instance is disposed. + /// + bool isDisposed; - /// - /// Initializes a new instance of the class. - /// - /// The path to the data storage, defaulting to "data". - /// Indicates whether a secondary index should be used to perform faster deletion. - /// The comparer used to manage references to records. - /// The tokenizer used to split words. If null, a default tokenizer is used. - /// The hash code generator used to generate hash codes for the tokens. If null, a default generator is used. - /// Defines the life time of cached blocks. Default is 1 minute. - /// Advanced ZoneTree Options enabling customization of underlying ZoneTree instances. - public HashedSearchEngine( - string dataPath = "data", - bool useSecondaryIndex = false, - IWordTokenizer wordTokenizer = null, - IRefComparer recordComparer = null, - IHashCodeGenerator hashCodeGenerator = null, - long blockCacheLifeTimeInMilliseconds = 60_000, - AdvancedZoneTreeOptions advancedOptions = null) - { - HashCodeGenerator = hashCodeGenerator ?? new DefaultHashCodeGenerator(); - Index = new( - dataPath, - recordComparer, - new UInt64ComparerAscending(), - useSecondaryIndex, - blockCacheLifeTimeInMilliseconds, - advancedOptions); - WordTokenizer = - wordTokenizer ?? - new WordTokenizer(hashCodeGenerator: HashCodeGenerator); - } + /// + /// Initializes a new instance of the class. + /// + /// The path to the data storage, defaulting to "data". + /// Indicates whether a secondary index should be used to perform faster deletion. + /// The comparer used to manage references to records. + /// The tokenizer used to split words. If null, a default tokenizer is used. + /// The hash code generator used to generate hash codes for the tokens. If null, a default generator is used. + /// Defines the life time of cached blocks. Default is 1 minute. + /// Advanced ZoneTree Options enabling customization of underlying ZoneTree instances. + public HashedSearchEngine( + string dataPath = "data", + bool useSecondaryIndex = false, + IWordTokenizer wordTokenizer = null, + IRefComparer recordComparer = null, + IHashCodeGenerator hashCodeGenerator = null, + long blockCacheLifeTimeInMilliseconds = 60_000, + AdvancedZoneTreeOptions advancedOptions = null) + { + HashCodeGenerator = hashCodeGenerator ?? new DefaultHashCodeGenerator(); + Index = new( + dataPath, + recordComparer, + new UInt64ComparerAscending(), + useSecondaryIndex, + blockCacheLifeTimeInMilliseconds, + advancedOptions); + WordTokenizer = + wordTokenizer ?? + new WordTokenizer(hashCodeGenerator: HashCodeGenerator); + } - /// - /// Initializes a new instance of the class using an existing index. - /// - /// The pre-existing index to use for this search engine. - /// The tokenizer used to split words. If null, a default tokenizer is used. - public HashedSearchEngine( - IndexOfTokenRecordPreviousToken index, - IWordTokenizer wordTokenizer = null) - { - Index = index; - WordTokenizer = wordTokenizer ?? new WordTokenizer(); - } + /// + /// Initializes a new instance of the class using an existing index. + /// + /// The pre-existing index to use for this search engine. + /// The tokenizer used to split words. If null, a default tokenizer is used. + public HashedSearchEngine( + IndexOfTokenRecordPreviousToken index, + IWordTokenizer wordTokenizer = null) + { + Index = index; + WordTokenizer = wordTokenizer ?? new WordTokenizer(); + } - /// - /// Adds a new record to the index, associating it with the hashed tokens from the provided text. - /// - /// The record to add to the index. - /// The text to tokenize and hash for indexing. - public void AddRecord(TRecord record, string text) + /// + /// Adds a new record to the index, associating it with the hashed tokens from the provided text. + /// + /// The record to add to the index. + /// The text to tokenize and hash for indexing. + public void AddRecord(TRecord record, string text) + { + var memory = text.AsMemory(); + var previousToken = 0ul; + var slices = WordTokenizer.EnumerateSlices(memory).ToArray(); + var len = slices.Length; + for (int i = 0; i < len; i++) { - var memory = text.AsMemory(); - var previousToken = 0ul; - var slices = WordTokenizer.EnumerateSlices(memory).ToArray(); - var len = slices.Length; - for (int i = 0; i < len; i++) - { - var slice = slices[i]; - var token = HashCodeGenerator.GetHashCode(memory.Slice(slice)); - Index.UpsertRecord(token, record, previousToken); - previousToken = token; - } + var slice = slices[i]; + var token = HashCodeGenerator.GetHashCode(memory.Slice(slice)); + Index.UpsertRecord(token, record, previousToken); + previousToken = token; } + } - /// - /// Updates a record in the search engine by deleting tokens from the old text - /// and inserting tokens from the new text in a single operation. - /// This method ensures that only the tokens that have changed between the old and new text - /// are deleted and added to the index, optimizing the update process. - /// - /// The record to update. - /// The original text of the record that needs to be updated. - /// The new text that will replace the old text in the record. - public void UpdateRecord(TRecord record, string oldText, string newText) - { - // Tokenize the old text and store tokens and their previous tokens - var memory1 = (oldText ?? string.Empty).AsMemory(); - var previousToken1 = 0ul; - var slices1 = WordTokenizer.EnumerateSlices(memory1).ToArray(); - var len1 = slices1.Length; - - // Tokenize the new text and store tokens and their previous tokens - var memory2 = (newText ?? string.Empty).AsMemory(); - var previousToken2 = 0ul; - var slices2 = WordTokenizer.EnumerateSlices(memory2).ToArray(); - var len2 = slices2.Length; - - // Create sets to track tokens that need to be created and deleted - HashSet> itemsInTheNewText = new(); - HashSet> itemsInTheOldText = new(); - - for (var i = 0; i < len2; i++) - { - var slice = slices2[i]; - var token = HashCodeGenerator.GetHashCode(memory2.Slice(slice)); - itemsInTheNewText.Add(new() - { - Token = token, - PreviousToken = previousToken2 - }); - previousToken2 = token; - } + /// + /// Updates a record in the search engine by deleting tokens from the old text + /// and inserting tokens from the new text in a single operation. + /// This method ensures that only the tokens that have changed between the old and new text + /// are deleted and added to the index, optimizing the update process. + /// + /// The record to update. + /// The original text of the record that needs to be updated. + /// The new text that will replace the old text in the record. + public void UpdateRecord(TRecord record, string oldText, string newText) + { + // Tokenize the old text and store tokens and their previous tokens + var memory1 = (oldText ?? string.Empty).AsMemory(); + var previousToken1 = 0ul; + var slices1 = WordTokenizer.EnumerateSlices(memory1).ToArray(); + var len1 = slices1.Length; - for (var i = 0; i < len1; i++) - { - var slice = slices1[i]; - var token = HashCodeGenerator.GetHashCode(memory1.Slice(slice)); - var item = new TokenPair() - { - Token = token, - PreviousToken = previousToken1 - }; - itemsInTheOldText.Add(item); + // Tokenize the new text and store tokens and their previous tokens + var memory2 = (newText ?? string.Empty).AsMemory(); + var previousToken2 = 0ul; + var slices2 = WordTokenizer.EnumerateSlices(memory2).ToArray(); + var len2 = slices2.Length; - // Tokens in the old text that do not appear in the new text are deleted. - if (!itemsInTheNewText.Contains(item)) - { - Index.DeleteRecord(token, record, previousToken1); - } + // Create sets to track tokens that need to be created and deleted + HashSet> itemsInTheNewText = new(); + HashSet> itemsInTheOldText = new(); - previousToken1 = token; - } - - foreach (var item in itemsInTheNewText) - { - // Tokens in the new text that do not appear in the old text are inserted. - if (!itemsInTheOldText.Contains(item)) - { - Index.UpsertRecord(item.Token, record, item.PreviousToken); - } - } - } - - /// - /// Deletes a record from the index. - /// This method removes all entries of the record from the index. - /// If no secondary index is used, this method can be extremely slow - /// as it requires a full index scan to remove all associated tokens. - /// - /// The record identifier to delete. - /// The number of tokens deleted. - public long DeleteRecord(TRecord record) + for (var i = 0; i < len2; i++) { - return Index.DeleteRecord(record); + var slice = slices2[i]; + var token = HashCodeGenerator.GetHashCode(memory2.Slice(slice)); + itemsInTheNewText.Add(new() + { + Token = token, + PreviousToken = previousToken2 + }); + previousToken2 = token; } - /// - /// Deletes a record from the search engine by re-tokenizing its text. - /// This method is faster than in both scenarios (with or without a secondary index), - /// but it requires the original text of the record to re-generate and delete all associated tokens. - /// - /// The record identifier to delete. - /// The original text of the record, used for tokenization. - /// The number of tokens deleted. - public long DeleteTokens(TRecord record, string text) + for (var i = 0; i < len1; i++) { - var memory = text.AsMemory(); - var previousToken = 0ul; - var slices = WordTokenizer.EnumerateSlices(memory).ToArray(); - var len = slices.Length; - for (int i = 0; i < len; i++) - { - var slice = slices[i]; - var token = HashCodeGenerator.GetHashCode(memory.Slice(slice)); - Index.DeleteRecord(token, record, previousToken); - previousToken = token; - } + var slice = slices1[i]; + var token = HashCodeGenerator.GetHashCode(memory1.Slice(slice)); + var item = new TokenPair() + { + Token = token, + PreviousToken = previousToken1 + }; + itemsInTheOldText.Add(item); - return Index.DeleteRecord(record); - } + // Tokens in the old text that do not appear in the new text are deleted. + if (!itemsInTheNewText.Contains(item)) + { + Index.DeleteRecord(token, record, previousToken1); + } - ulong GetFacetToken(string name, string value) - { - var text = $"{name}:{value}"; - var memory = text.AsMemory(); - return HashCodeGenerator.GetHashCode(memory); + previousToken1 = token; } - ulong[] GetFacetTokens(IReadOnlyDictionary facets) + foreach (var item in itemsInTheNewText) { - if (facets == null) return []; - return facets.Select(x => GetFacetToken(x.Key, x.Value)).ToArray(); + // Tokens in the new text that do not appear in the old text are inserted. + if (!itemsInTheOldText.Contains(item)) + { + Index.UpsertRecord(item.Token, record, item.PreviousToken); + } } + } - /// - /// Adds or updates a single facet for the specified record. - /// - /// The record to which the facet will be added or updated. - /// The name of the facet (e.g., "category", "author"). - /// The value of the facet (e.g., "books", "John Doe"). - public void AddFacet(TRecord record, string name, string value) - { - var token = GetFacetToken(name, value); - Index.UpsertRecord(token, record, token); - } + /// + /// Deletes a record from the index. + /// This method removes all entries of the record from the index. + /// If no secondary index is used, this method can be extremely slow + /// as it requires a full index scan to remove all associated tokens. + /// + /// The record identifier to delete. + /// The number of tokens deleted. + public long DeleteRecord(TRecord record) + { + return Index.DeleteRecord(record); + } - /// - /// Deletes a specific facet associated with the specified record from the index. - /// - /// The record from which the facet will be deleted. - /// The name of the facet to delete (e.g., "category", "author"). - /// The value of the facet to delete (e.g., "books", "John Doe"). - public void DeleteFacet(TRecord record, string name, string value) + /// + /// Deletes a record from the search engine by re-tokenizing its text. + /// This method is faster than in both scenarios (with or without a secondary index), + /// but it requires the original text of the record to re-generate and delete all associated tokens. + /// + /// The record identifier to delete. + /// The original text of the record, used for tokenization. + /// The number of tokens deleted. + public long DeleteTokens(TRecord record, string text) + { + var memory = text.AsMemory(); + var previousToken = 0ul; + var slices = WordTokenizer.EnumerateSlices(memory).ToArray(); + var len = slices.Length; + for (int i = 0; i < len; i++) { - var token = GetFacetToken(name, value); - Index.DeleteRecord(token, record, token); + var slice = slices[i]; + var token = HashCodeGenerator.GetHashCode(memory.Slice(slice)); + Index.DeleteRecord(token, record, previousToken); + previousToken = token; } - /// - /// Searches the index based on a search string, with optional token order respect and pagination. - /// - /// - /// The search string containing the terms to look for in the index. - /// This string is tokenized internally to identify individual search tokens. - /// The search terms are logically grouped using "AND", meaning all terms must be present in the matching records. - /// If the search string is empty or null, the result will be an empty array. - /// Retrieving all records via the full-text index is avoided for performance reasons. - /// In such cases, it is recommended to fetch records from the actual record source instead of using the search index. - /// - /// - /// A boolean indicating whether the search should respect the order of tokens in the search string. - /// If true, the records must contain the tokens in the same order as they appear in the search string. - /// - /// - /// The number of matching records to skip in the result set, useful for pagination. - /// Defaults to 0. - /// - /// - /// The maximum number of records to return, useful for limiting the result set size. - /// Defaults to 0, which indicates no limit. - /// - /// - /// An array of records that match the search string, respecting the token order if specified. - /// The array may be empty if no matching records are found. - /// - public TRecord[] SimpleSearch( - string search, - bool respectTokenOrder = true, - int skip = 0, - int limit = 0, - CancellationToken cancellationToken = default) - { - if (string.IsNullOrWhiteSpace(search)) return []; - var memory = search.AsMemory(); - var slices = WordTokenizer.GetSlices(search); - if (slices.Count == 0) return []; - var longestSlice = slices.MaxBy(x => x.Length); - var longestToken = HashCodeGenerator.GetHashCode(memory.Slice(longestSlice)); - var tokens = slices - .Select(slice => HashCodeGenerator.GetHashCode(memory.Slice(slice))) - .ToArray(); - return Index - .SimpleSearch( - tokens, longestToken, respectTokenOrder, - default, skip, limit, cancellationToken); - } + return Index.DeleteRecord(record); + } - /// - /// Searches the index based on a search string, with optional facet filters, token order respect, and pagination. - /// - /// - /// The search string containing the terms to look for in the index. - /// This string is tokenized internally to identify individual search tokens. - /// The search terms are logically grouped using "AND", meaning all terms must be present in the matching records. - /// If the search string is empty or null, the search will still be performed if facets are provided. - /// However, if both the search string and facets are empty or null, the result will be an empty array, as searching without any criteria is not supported. - /// Retrieving all records via the full-text index is avoided for performance reasons. - /// In such cases, it is recommended to fetch records from the actual record source instead of using the search index. - /// - /// - /// A dictionary of facet filters where the key represents the facet field and the value represents the required facet value. - /// The facets are logically grouped using "OR", meaning the records must match at least one of the specified facet values if any facets are provided. - /// If the dictionary is empty or null, no facet filtering is applied, and all matching records are returned regardless of facet values. - /// - /// - /// A boolean indicating whether the search should respect the order of tokens in the search string. - /// If true, the records must contain the tokens in the same order as they appear in the search string. - /// - /// - /// The number of matching records to skip in the result set, useful for pagination. - /// Defaults to 0. - /// - /// - /// The maximum number of records to return, useful for limiting the result set size. - /// Defaults to 0, which indicates no limit. - /// - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// - /// An array of records that match the search string and facet filters, respecting the token order if specified. - /// The array may be empty if no matching records are found. - /// - public TRecord[] SimpleSearch( - string search, - IReadOnlyDictionary facets, - bool respectTokenOrder = true, - int skip = 0, - int limit = 0, - CancellationToken cancellationToken = default) - { - if (string.IsNullOrWhiteSpace(search)) - { - var facetTokens = GetFacetTokens(facets); - if (facetTokens.Length == 0) return []; - return Index - .SimpleSearch( - [], - default, - respectTokenOrder, - facetTokens, - skip, - limit, - cancellationToken); - } - var memory = search.AsMemory(); - var slices = WordTokenizer.GetSlices(search); - if (slices.Count == 0) return []; - var longestSlice = slices.MaxBy(x => x.Length); - var longestToken = HashCodeGenerator.GetHashCode(memory.Slice(longestSlice)); - var tokens = slices - .Select(slice => HashCodeGenerator.GetHashCode(memory.Slice(slice))) - .ToArray(); - return Index - .SimpleSearch( - tokens, - longestToken, - respectTokenOrder, - GetFacetTokens(facets), - skip, - limit, - cancellationToken); - } + ulong GetFacetToken(string name, string value) + { + var text = $"{name}:{value}"; + var memory = text.AsMemory(); + return HashCodeGenerator.GetHashCode(memory); + } - /// - /// Performs a search based on the specified query and returns the matching records. - /// - /// The search query to execute. - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// An array of records that match the search criteria. - public TRecord[] Search(SearchQuery query, CancellationToken cancellationToken = default) - { - var hashedQuery = HashedSearchQueryFactory - .FromStringSearchQuery(query, HashCodeGenerator, WordTokenizer); - return Index.Search(hashedQuery, cancellationToken); - } + ulong[] GetFacetTokens(IReadOnlyDictionary facets) + { + if (facets == null) return []; + return facets.Select(x => GetFacetToken(x.Key, x.Value)).ToArray(); + } - /// - /// Performs a search based on the specified query and returns the matching records. - /// - /// The search query to execute. - /// - /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. - /// - /// An array of records that match the search criteria. - public TRecord[] Search( - string search, - int skip = 0, - int limit = 0, - CancellationToken cancellationToken = default) - { - var parser = new Parser(search); - var query = parser.Parse(); - query.Limit = limit; - query.Skip = skip; - var hashedQuery = HashedSearchQueryFactory - .FromStringSearchQuery(query, HashCodeGenerator, WordTokenizer); - return Index.Search(hashedQuery, cancellationToken); - } + /// + /// Adds or updates a single facet for the specified record. + /// + /// The record to which the facet will be added or updated. + /// The name of the facet (e.g., "category", "author"). + /// The value of the facet (e.g., "books", "John Doe"). + public void AddFacet(TRecord record, string name, string value) + { + var token = GetFacetToken(name, value); + Index.UpsertRecord(token, record, token); + } - /// - /// Drops the search engine. - /// - public void Drop() - { - Index.Drop(); - } + /// + /// Deletes a specific facet associated with the specified record from the index. + /// + /// The record from which the facet will be deleted. + /// The name of the facet to delete (e.g., "category", "author"). + /// The value of the facet to delete (e.g., "books", "John Doe"). + public void DeleteFacet(TRecord record, string name, string value) + { + var token = GetFacetToken(name, value); + Index.DeleteRecord(token, record, token); + } - /// - /// Disposes the resources used by the search engine. - /// - public void Dispose() + /// + /// Searches the index based on a search string, with optional token order respect and pagination. + /// + /// + /// The search string containing the terms to look for in the index. + /// This string is tokenized internally to identify individual search tokens. + /// The search terms are logically grouped using "AND", meaning all terms must be present in the matching records. + /// If the search string is empty or null, the result will be an empty array. + /// Retrieving all records via the full-text index is avoided for performance reasons. + /// In such cases, it is recommended to fetch records from the actual record source instead of using the search index. + /// + /// + /// A boolean indicating whether the search should respect the order of tokens in the search string. + /// If true, the records must contain the tokens in the same order as they appear in the search string. + /// + /// + /// The number of matching records to skip in the result set, useful for pagination. + /// Defaults to 0. + /// + /// + /// The maximum number of records to return, useful for limiting the result set size. + /// Defaults to 0, which indicates no limit. + /// + /// + /// An array of records that match the search string, respecting the token order if specified. + /// The array may be empty if no matching records are found. + /// + public TRecord[] SimpleSearch( + string search, + bool respectTokenOrder = true, + int skip = 0, + int limit = 0, + CancellationToken cancellationToken = default) + { + if (string.IsNullOrWhiteSpace(search)) return []; + var memory = search.AsMemory(); + var slices = WordTokenizer.GetSlices(search); + if (slices.Count == 0) return []; + var longestSlice = slices.MaxBy(x => x.Length); + var longestToken = HashCodeGenerator.GetHashCode(memory.Slice(longestSlice)); + var tokens = slices + .Select(slice => HashCodeGenerator.GetHashCode(memory.Slice(slice))) + .ToArray(); + return Index + .SimpleSearch( + tokens, longestToken, respectTokenOrder, + default, skip, limit, cancellationToken); + } + + /// + /// Searches the index based on a search string, with optional facet filters, token order respect, and pagination. + /// + /// + /// The search string containing the terms to look for in the index. + /// This string is tokenized internally to identify individual search tokens. + /// The search terms are logically grouped using "AND", meaning all terms must be present in the matching records. + /// If the search string is empty or null, the search will still be performed if facets are provided. + /// However, if both the search string and facets are empty or null, the result will be an empty array, as searching without any criteria is not supported. + /// Retrieving all records via the full-text index is avoided for performance reasons. + /// In such cases, it is recommended to fetch records from the actual record source instead of using the search index. + /// + /// + /// A dictionary of facet filters where the key represents the facet field and the value represents the required facet value. + /// The facets are logically grouped using "OR", meaning the records must match at least one of the specified facet values if any facets are provided. + /// If the dictionary is empty or null, no facet filtering is applied, and all matching records are returned regardless of facet values. + /// + /// + /// A boolean indicating whether the search should respect the order of tokens in the search string. + /// If true, the records must contain the tokens in the same order as they appear in the search string. + /// + /// + /// The number of matching records to skip in the result set, useful for pagination. + /// Defaults to 0. + /// + /// + /// The maximum number of records to return, useful for limiting the result set size. + /// Defaults to 0, which indicates no limit. + /// + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// + /// An array of records that match the search string and facet filters, respecting the token order if specified. + /// The array may be empty if no matching records are found. + /// + public TRecord[] SimpleSearch( + string search, + IReadOnlyDictionary facets, + bool respectTokenOrder = true, + int skip = 0, + int limit = 0, + CancellationToken cancellationToken = default) + { + if (string.IsNullOrWhiteSpace(search)) { - if (isDisposed) return; - isDisposed = true; - Index.IsReadOnly = true; - Index.WaitForBackgroundThreads(); - Index.Dispose(); + var facetTokens = GetFacetTokens(facets); + if (facetTokens.Length == 0) return []; + return Index + .SimpleSearch( + [], + default, + respectTokenOrder, + facetTokens, + skip, + limit, + cancellationToken); } + var memory = search.AsMemory(); + var slices = WordTokenizer.GetSlices(search); + if (slices.Count == 0) return []; + var longestSlice = slices.MaxBy(x => x.Length); + var longestToken = HashCodeGenerator.GetHashCode(memory.Slice(longestSlice)); + var tokens = slices + .Select(slice => HashCodeGenerator.GetHashCode(memory.Slice(slice))) + .ToArray(); + return Index + .SimpleSearch( + tokens, + longestToken, + respectTokenOrder, + GetFacetTokens(facets), + skip, + limit, + cancellationToken); + } + + /// + /// Performs a search based on the specified query and returns the matching records. + /// + /// The search query to execute. + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// An array of records that match the search criteria. + public TRecord[] Search(SearchQuery query, CancellationToken cancellationToken = default) + { + var hashedQuery = HashedSearchQueryFactory + .FromStringSearchQuery(query, HashCodeGenerator, WordTokenizer); + return Index.Search(hashedQuery, cancellationToken); + } + + /// + /// Performs a search based on the specified query and returns the matching records. + /// + /// The search query to execute. + /// + /// A token to monitor for cancellation requests. This allows the search operation to be canceled if necessary. + /// + /// An array of records that match the search criteria. + public TRecord[] Search( + string search, + int skip = 0, + int limit = 0, + CancellationToken cancellationToken = default) + { + var parser = new Parser(search); + var query = parser.Parse(); + query.Limit = limit; + query.Skip = skip; + var hashedQuery = HashedSearchQueryFactory + .FromStringSearchQuery(query, HashCodeGenerator, WordTokenizer); + return Index.Search(hashedQuery, cancellationToken); + } + + /// + /// Drops the search engine. + /// + public void Drop() + { + Index.Drop(); + } + + /// + /// Disposes the resources used by the search engine. + /// + public void Dispose() + { + if (isDisposed) return; + isDisposed = true; + Index.IsReadOnly = true; + Index.WaitForBackgroundThreads(); + Index.Dispose(); + } } diff --git a/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs b/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs index 9f3942a..30c6649 100644 --- a/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs +++ b/src/ZoneTree.FullTextSearch/Storage/RecordTable.cs @@ -1,4 +1,4 @@ -using ZoneTree; +using ZoneTree; using ZoneTree.AbstractFileStream; namespace ZoneTree.FullTextSearch; @@ -10,187 +10,187 @@ namespace ZoneTree.FullTextSearch; /// The type of the value associated with each record. public sealed class RecordTable : IDisposable where TRecord : unmanaged { - /// - /// The primary ZoneTree used for storing records with their associated values. - /// - public readonly IZoneTree ZoneTree1; - - /// - /// The secondary ZoneTree used for storing values with their associated records, facilitating reverse lookups. - /// - public readonly IZoneTree ZoneTree2; - - /// - /// Maintainer for the primary ZoneTree, handles background maintenance tasks. - /// - public readonly IMaintainer Maintainer1; - - /// - /// Maintainer for the secondary ZoneTree, also handles background maintenance tasks. - /// - public readonly IMaintainer Maintainer2; - - /// - /// Returns true if the record table is dropped, otherwise false. - /// - public bool IsDropped { get => isDropped; } - - /// - /// The flag that describes if the instance is dropped. - /// - bool isDropped; - - /// - /// Initializes a new instance of the RecordTable class, setting up the two ZoneTrees and their maintainers. - /// - /// The base directory path where the data of both ZoneTrees will be stored. - /// Optional configuration action for the first ZoneTree factory. - /// Optional configuration action for the second ZoneTree factory. - /// Defines the life time of cached blocks. Default is 1 minute. - /// Optional custom file stream provider. - public RecordTable( - string dataPath = "data", - Action> factory1 = null, - Action> factory2 = null, - long blockCacheLifeTimeInMilliseconds = 60_000, - IFileStreamProvider fileStreamProvider = null) - { - var f1 = new ZoneTreeFactory(fileStreamProvider) - .SetDataDirectory($"{dataPath}/rectable1"); - var f2 = new ZoneTreeFactory(fileStreamProvider) - .SetDataDirectory($"{dataPath}/rectable2"); - factory1?.Invoke(f1); - factory2?.Invoke(f2); - ZoneTree1 = f1.OpenOrCreate(); - ZoneTree2 = f2.OpenOrCreate(); - - Maintainer1 = ZoneTree1.CreateMaintainer(); - Maintainer2 = ZoneTree2.CreateMaintainer(); - - Maintainer1.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); - Maintainer2.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); - - Maintainer1.BlockCacheLifeTime = TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); - Maintainer2.BlockCacheLifeTime = TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); - - Maintainer1.EnableJobForCleaningInactiveCaches = true; - Maintainer2.EnableJobForCleaningInactiveCaches = true; - } - - /// - /// Upserts a record and its associated value into both ZoneTrees, ensuring synchronization between the two. - /// - /// The record to upsert. - /// The value associated with the record. - public void UpsertRecord(TRecord record, TValue value) - { - ZoneTree1.Upsert(record, value); - ZoneTree2.Upsert(value, record); - } - - /// - /// Retrieves the last record from the primary ZoneTree based on the insertion order. - /// - /// The last record if available, otherwise null. - public TRecord? GetLastRecord() - { - using var iterator = ZoneTree1.CreateReverseIterator(IteratorType.NoRefresh); - if (iterator.Next()) - return iterator.CurrentKey; - return null; - } - - /// - /// Tries to retrieve a value associated with a given record. - /// - /// The record to look up the associated value for. - /// When this method returns, contains the value associated with the specified record, if the value is found. - /// true if the record is found; otherwise, false. - public bool TryGetValue(TRecord record, out TValue value) - { - return ZoneTree1.TryGet(record, out value); - } - - /// - /// Throws an exception if the index has been dropped, preventing further operations on a dropped index. - /// - void ThrowIfIndexIsDropped() - { - if (isDropped) - throw new Exception($"{nameof(RecordTable)} is dropped."); - } - - /// - /// Tries to retrieve a record associated with a given value. - /// - /// The value to look up the associated record for. - /// When this method returns, contains the record associated with the specified value, if the record is found. - /// true if the record is found; otherwise, false. - public bool TryGetRecord(TValue value, out TRecord record) - { - return ZoneTree2.TryGet(value, out record); - } - - /// - /// Evicts data from memory to disk in both primary and secondary zone trees. - /// - public void EvictToDisk() - { - ThrowIfIndexIsDropped(); - Maintainer1.EvictToDisk(); - Maintainer2.EvictToDisk(); - } - - /// - /// Attempts to cancel any background threads associated with maintenance tasks for both zone trees. - /// - public void TryCancelBackgroundThreads() - { - ThrowIfIndexIsDropped(); - Maintainer1.TryCancelBackgroundThreads(); - Maintainer2.TryCancelBackgroundThreads(); - } - - /// - /// Waits for all background threads associated with maintenance tasks to complete for both zone trees. - /// - public void WaitForBackgroundThreads() - { - ThrowIfIndexIsDropped(); - Maintainer1.WaitForBackgroundThreads(); - Maintainer2.WaitForBackgroundThreads(); - } - - /// - /// Drops the record table. - /// - public void Drop() - { - Maintainer1.TryCancelBackgroundThreads(); - Maintainer2.TryCancelBackgroundThreads(); - Maintainer1.WaitForBackgroundThreads(); - Maintainer2.WaitForBackgroundThreads(); - ZoneTree1.IsReadOnly = true; - ZoneTree2.IsReadOnly = true; - isDropped = true; - ZoneTree1.Maintenance.Drop(); - ZoneTree2.Maintenance.Drop(); - ZoneTree1.Dispose(); - ZoneTree2.Dispose(); - } - - /// - /// Disposes resources used by the ZoneTrees and their maintainers, ensuring a clean shutdown. - /// - public void Dispose() - { - Maintainer1.WaitForBackgroundThreads(); - Maintainer1.Dispose(); - Maintainer2.WaitForBackgroundThreads(); - Maintainer2.Dispose(); - ZoneTree1.IsReadOnly = true; - ZoneTree2.IsReadOnly = true; - ZoneTree1.Dispose(); - ZoneTree2.Dispose(); - } + /// + /// The primary ZoneTree used for storing records with their associated values. + /// + public readonly IZoneTree ZoneTree1; + + /// + /// The secondary ZoneTree used for storing values with their associated records, facilitating reverse lookups. + /// + public readonly IZoneTree ZoneTree2; + + /// + /// Maintainer for the primary ZoneTree, handles background maintenance tasks. + /// + public readonly IMaintainer Maintainer1; + + /// + /// Maintainer for the secondary ZoneTree, also handles background maintenance tasks. + /// + public readonly IMaintainer Maintainer2; + + /// + /// Returns true if the record table is dropped, otherwise false. + /// + public bool IsDropped { get => isDropped; } + + /// + /// The flag that describes if the instance is dropped. + /// + bool isDropped; + + /// + /// Initializes a new instance of the RecordTable class, setting up the two ZoneTrees and their maintainers. + /// + /// The base directory path where the data of both ZoneTrees will be stored. + /// Optional configuration action for the first ZoneTree factory. + /// Optional configuration action for the second ZoneTree factory. + /// Defines the life time of cached blocks. Default is 1 minute. + /// Optional custom file stream provider. + public RecordTable( + string dataPath = "data", + Action> factory1 = null, + Action> factory2 = null, + long blockCacheLifeTimeInMilliseconds = 60_000, + IFileStreamProvider fileStreamProvider = null) + { + var f1 = new ZoneTreeFactory(fileStreamProvider) + .SetDataDirectory($"{dataPath}/rectable1"); + var f2 = new ZoneTreeFactory(fileStreamProvider) + .SetDataDirectory($"{dataPath}/rectable2"); + factory1?.Invoke(f1); + factory2?.Invoke(f2); + ZoneTree1 = f1.OpenOrCreate(); + ZoneTree2 = f2.OpenOrCreate(); + + Maintainer1 = ZoneTree1.CreateMaintainer(); + Maintainer2 = ZoneTree2.CreateMaintainer(); + + Maintainer1.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); + Maintainer2.InactiveBlockCacheCleanupInterval = TimeSpan.FromSeconds(30); + + Maintainer1.BlockCacheLifeTime = TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); + Maintainer2.BlockCacheLifeTime = TimeSpan.FromMilliseconds(blockCacheLifeTimeInMilliseconds); + + Maintainer1.EnableJobForCleaningInactiveCaches = true; + Maintainer2.EnableJobForCleaningInactiveCaches = true; + } + + /// + /// Upserts a record and its associated value into both ZoneTrees, ensuring synchronization between the two. + /// + /// The record to upsert. + /// The value associated with the record. + public void UpsertRecord(TRecord record, TValue value) + { + ZoneTree1.Upsert(record, value); + ZoneTree2.Upsert(value, record); + } + + /// + /// Retrieves the last record from the primary ZoneTree based on the insertion order. + /// + /// The last record if available, otherwise null. + public TRecord? GetLastRecord() + { + using var iterator = ZoneTree1.CreateReverseIterator(IteratorType.NoRefresh); + if (iterator.Next()) + return iterator.CurrentKey; + return null; + } + + /// + /// Tries to retrieve a value associated with a given record. + /// + /// The record to look up the associated value for. + /// When this method returns, contains the value associated with the specified record, if the value is found. + /// true if the record is found; otherwise, false. + public bool TryGetValue(TRecord record, out TValue value) + { + return ZoneTree1.TryGet(record, out value); + } + + /// + /// Throws an exception if the index has been dropped, preventing further operations on a dropped index. + /// + void ThrowIfIndexIsDropped() + { + if (isDropped) + throw new Exception($"{nameof(RecordTable)} is dropped."); + } + + /// + /// Tries to retrieve a record associated with a given value. + /// + /// The value to look up the associated record for. + /// When this method returns, contains the record associated with the specified value, if the record is found. + /// true if the record is found; otherwise, false. + public bool TryGetRecord(TValue value, out TRecord record) + { + return ZoneTree2.TryGet(value, out record); + } + + /// + /// Evicts data from memory to disk in both primary and secondary zone trees. + /// + public void EvictToDisk() + { + ThrowIfIndexIsDropped(); + Maintainer1.EvictToDisk(); + Maintainer2.EvictToDisk(); + } + + /// + /// Attempts to cancel any background threads associated with maintenance tasks for both zone trees. + /// + public void TryCancelBackgroundThreads() + { + ThrowIfIndexIsDropped(); + Maintainer1.TryCancelBackgroundThreads(); + Maintainer2.TryCancelBackgroundThreads(); + } + + /// + /// Waits for all background threads associated with maintenance tasks to complete for both zone trees. + /// + public void WaitForBackgroundThreads() + { + ThrowIfIndexIsDropped(); + Maintainer1.WaitForBackgroundThreads(); + Maintainer2.WaitForBackgroundThreads(); + } + + /// + /// Drops the record table. + /// + public void Drop() + { + Maintainer1.TryCancelBackgroundThreads(); + Maintainer2.TryCancelBackgroundThreads(); + Maintainer1.WaitForBackgroundThreads(); + Maintainer2.WaitForBackgroundThreads(); + ZoneTree1.IsReadOnly = true; + ZoneTree2.IsReadOnly = true; + isDropped = true; + ZoneTree1.Maintenance.Drop(); + ZoneTree2.Maintenance.Drop(); + ZoneTree1.Dispose(); + ZoneTree2.Dispose(); + } + + /// + /// Disposes resources used by the ZoneTrees and their maintainers, ensuring a clean shutdown. + /// + public void Dispose() + { + Maintainer1.WaitForBackgroundThreads(); + Maintainer1.Dispose(); + Maintainer2.WaitForBackgroundThreads(); + Maintainer2.Dispose(); + ZoneTree1.IsReadOnly = true; + ZoneTree2.IsReadOnly = true; + ZoneTree1.Dispose(); + ZoneTree2.Dispose(); + } } diff --git a/src/ZoneTree.FullTextSearch/Tokenizer/IWordTokenizer.cs b/src/ZoneTree.FullTextSearch/Tokenizer/IWordTokenizer.cs index 7c17290..7dfad69 100644 --- a/src/ZoneTree.FullTextSearch/Tokenizer/IWordTokenizer.cs +++ b/src/ZoneTree.FullTextSearch/Tokenizer/IWordTokenizer.cs @@ -1,4 +1,4 @@ -namespace ZoneTree.FullTextSearch.Tokenizer; +namespace ZoneTree.FullTextSearch.Tokenizer; /// /// Defines an interface for tokenizing a text into slices. Implementations of this interface @@ -6,17 +6,17 @@ /// public interface IWordTokenizer { - /// - /// Splits the given text into a list of slices, where each slice represents a token. - /// - /// The text to tokenize. - /// A read-only list of objects, each representing a token within the text. - IReadOnlyList GetSlices(ReadOnlySpan text); + /// + /// Splits the given text into a list of slices, where each slice represents a token. + /// + /// The text to tokenize. + /// A read-only list of objects, each representing a token within the text. + IReadOnlyList GetSlices(ReadOnlySpan text); - /// - /// Enumerates the slices of the given text, where each slice represents a token. - /// - /// The text to tokenize. - /// An enumerable collection of objects, each representing a token within the text. - IEnumerable EnumerateSlices(ReadOnlyMemory text); + /// + /// Enumerates the slices of the given text, where each slice represents a token. + /// + /// The text to tokenize. + /// An enumerable collection of objects, each representing a token within the text. + IEnumerable EnumerateSlices(ReadOnlyMemory text); } diff --git a/src/ZoneTree.FullTextSearch/Tokenizer/Slice.cs b/src/ZoneTree.FullTextSearch/Tokenizer/Slice.cs index 5290b86..cec3472 100644 --- a/src/ZoneTree.FullTextSearch/Tokenizer/Slice.cs +++ b/src/ZoneTree.FullTextSearch/Tokenizer/Slice.cs @@ -1,4 +1,4 @@ -namespace ZoneTree.FullTextSearch.Tokenizer; +namespace ZoneTree.FullTextSearch.Tokenizer; /// /// Represents a slice of text with a specified offset and length. diff --git a/src/ZoneTree.FullTextSearch/Tokenizer/SliceExtension.cs b/src/ZoneTree.FullTextSearch/Tokenizer/SliceExtension.cs index 3db42e7..394f686 100644 --- a/src/ZoneTree.FullTextSearch/Tokenizer/SliceExtension.cs +++ b/src/ZoneTree.FullTextSearch/Tokenizer/SliceExtension.cs @@ -1,31 +1,31 @@ -namespace ZoneTree.FullTextSearch.Tokenizer; +namespace ZoneTree.FullTextSearch.Tokenizer; /// /// A static class that provides extension methods for slicing operations on ReadOnlyMemory and ReadOnlySpan. /// public static class SliceExtension { - /// - /// Slices the specified ReadOnlyMemory using the provided Slice object. - /// - /// The type of the elements in the ReadOnlyMemory. - /// The ReadOnlyMemory to be sliced. - /// An instance of the Slice class containing the offset and length for slicing. - /// A sliced ReadOnlyMemory segment according to the specified offset and length. - public static ReadOnlyMemory Slice(this ReadOnlyMemory memory, Slice slice) - { - return memory.Slice(slice.Offset, slice.Length); - } + /// + /// Slices the specified ReadOnlyMemory using the provided Slice object. + /// + /// The type of the elements in the ReadOnlyMemory. + /// The ReadOnlyMemory to be sliced. + /// An instance of the Slice class containing the offset and length for slicing. + /// A sliced ReadOnlyMemory segment according to the specified offset and length. + public static ReadOnlyMemory Slice(this ReadOnlyMemory memory, Slice slice) + { + return memory.Slice(slice.Offset, slice.Length); + } - /// - /// Slices the specified ReadOnlySpan using the provided Slice object. - /// - /// The type of the elements in the ReadOnlySpan. - /// The ReadOnlySpan to be sliced. - /// An instance of the Slice class containing the offset and length for slicing. - /// A sliced ReadOnlySpan segment according to the specified offset and length. - public static ReadOnlySpan Slice(this ReadOnlySpan readonlySpan, Slice slice) - { - return readonlySpan.Slice(slice.Offset, slice.Length); - } -} \ No newline at end of file + /// + /// Slices the specified ReadOnlySpan using the provided Slice object. + /// + /// The type of the elements in the ReadOnlySpan. + /// The ReadOnlySpan to be sliced. + /// An instance of the Slice class containing the offset and length for slicing. + /// A sliced ReadOnlySpan segment according to the specified offset and length. + public static ReadOnlySpan Slice(this ReadOnlySpan readonlySpan, Slice slice) + { + return readonlySpan.Slice(slice.Offset, slice.Length); + } +} diff --git a/src/ZoneTree.FullTextSearch/Tokenizer/WordTokenizer.cs b/src/ZoneTree.FullTextSearch/Tokenizer/WordTokenizer.cs index 69de1b7..7802437 100644 --- a/src/ZoneTree.FullTextSearch/Tokenizer/WordTokenizer.cs +++ b/src/ZoneTree.FullTextSearch/Tokenizer/WordTokenizer.cs @@ -1,4 +1,4 @@ -using ZoneTree.FullTextSearch.Hashing; +using ZoneTree.FullTextSearch.Hashing; namespace ZoneTree.FullTextSearch.Tokenizer; @@ -8,183 +8,183 @@ namespace ZoneTree.FullTextSearch.Tokenizer; /// public sealed class WordTokenizer : IWordTokenizer { - /// - /// Gets the minimum length of a token to be included in the tokenization results. - /// Tokens shorter than this length are ignored. - /// - public int MimimumTokenLength { get; } + /// + /// Gets the minimum length of a token to be included in the tokenization results. + /// Tokens shorter than this length are ignored. + /// + public int MimimumTokenLength { get; } - /// - /// Gets a value indicating whether digits should be included in the tokens. - /// If false, only alphabetic characters are considered as part of tokens. - /// - public bool IncludeDigits { get; } + /// + /// Gets a value indicating whether digits should be included in the tokens. + /// If false, only alphabetic characters are considered as part of tokens. + /// + public bool IncludeDigits { get; } - /// - /// Gets a value indicating whether stop words should be used during tokenization. - /// If true, tokens matching stop words will be excluded from the results. - /// - public bool UseStopWords { get; } + /// + /// Gets a value indicating whether stop words should be used during tokenization. + /// If true, tokens matching stop words will be excluded from the results. + /// + public bool UseStopWords { get; } - /// - /// A set of hash codes representing stop words to be excluded from tokenization results when is true. - /// - HashSet StopWords { get; } = new(); + /// + /// A set of hash codes representing stop words to be excluded from tokenization results when is true. + /// + HashSet StopWords { get; } = new(); - /// - /// The hash code generator used to generate hash codes for stop words. - /// - readonly IHashCodeGenerator HashCodeGenerator; + /// + /// The hash code generator used to generate hash codes for stop words. + /// + readonly IHashCodeGenerator HashCodeGenerator; - /// - /// Initializes a new instance of the class with the specified - /// minimum token length, an option to include digits, and an option to use stop words. - /// - /// The minimum length of tokens to include in the results. Must be non-negative. - /// Whether to include digits in the tokens. Defaults to false. - /// The hash code generator used to generate hash codes for the stop words. If null, a default generator is used. - /// Whether to filter out stop words from the tokens. Defaults to false. - /// The custom stop words list. If it is null, the default stop words list will be used. Defaults to null. - /// Thrown when is negative. - public WordTokenizer( - int mimimumTokenLength = 3, - bool includeDigits = false, - IHashCodeGenerator hashCodeGenerator = null, - bool useStopWords = false, - string[] customStopWords = null) - { - if (mimimumTokenLength < 0) - throw new ArgumentException($"{nameof(mimimumTokenLength)} can't be negative."); - HashCodeGenerator = hashCodeGenerator ?? new DefaultHashCodeGenerator(); - MimimumTokenLength = mimimumTokenLength; - IncludeDigits = includeDigits; - UseStopWords = useStopWords; - if (useStopWords) - AddStopWords(customStopWords ?? DefaultStopWords); - } + /// + /// Initializes a new instance of the class with the specified + /// minimum token length, an option to include digits, and an option to use stop words. + /// + /// The minimum length of tokens to include in the results. Must be non-negative. + /// Whether to include digits in the tokens. Defaults to false. + /// The hash code generator used to generate hash codes for the stop words. If null, a default generator is used. + /// Whether to filter out stop words from the tokens. Defaults to false. + /// The custom stop words list. If it is null, the default stop words list will be used. Defaults to null. + /// Thrown when is negative. + public WordTokenizer( + int mimimumTokenLength = 3, + bool includeDigits = false, + IHashCodeGenerator hashCodeGenerator = null, + bool useStopWords = false, + string[] customStopWords = null) + { + if (mimimumTokenLength < 0) + throw new ArgumentException($"{nameof(mimimumTokenLength)} can't be negative."); + HashCodeGenerator = hashCodeGenerator ?? new DefaultHashCodeGenerator(); + MimimumTokenLength = mimimumTokenLength; + IncludeDigits = includeDigits; + UseStopWords = useStopWords; + if (useStopWords) + AddStopWords(customStopWords ?? DefaultStopWords); + } - /// - /// The default list of stop words used when is enabled. - /// - static readonly string[] DefaultStopWords = new string[] { + /// + /// The default list of stop words used when is enabled. + /// + static readonly string[] DefaultStopWords = new string[] { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"}; - /// - /// Adds an array of stop words to the internal stop words set. Each word is hashed - /// and stored in the set. - /// - /// The array of stop words to add. - public void AddStopWords(string[] stopWords) + /// + /// Adds an array of stop words to the internal stop words set. Each word is hashed + /// and stored in the set. + /// + /// The array of stop words to add. + public void AddStopWords(string[] stopWords) + { + var len = stopWords.Length; + for (var i = 0; i < len; i++) { - var len = stopWords.Length; - for (var i = 0; i < len; i++) - { - var stopWord = stopWords[i]; - StopWords.Add(HashCodeGenerator.GetHashCode(stopWord)); - } + var stopWord = stopWords[i]; + StopWords.Add(HashCodeGenerator.GetHashCode(stopWord)); } + } - /// - /// Splits the given text into a list of slices, where each slice represents a token. - /// Tokens are determined based on the settings for minimum token length and whether digits are included. - /// Optionally filters out tokens that match stop words. - /// - /// The text to tokenize. - /// A read-only list of objects, each representing a token within the text. - public IReadOnlyList GetSlices(ReadOnlySpan text) + /// + /// Splits the given text into a list of slices, where each slice represents a token. + /// Tokens are determined based on the settings for minimum token length and whether digits are included. + /// Optionally filters out tokens that match stop words. + /// + /// The text to tokenize. + /// A read-only list of objects, each representing a token within the text. + public IReadOnlyList GetSlices(ReadOnlySpan text) + { + var digits = IncludeDigits; + var diff = MimimumTokenLength; + if (diff > 0) --diff; + var len = text.Length; + var tokens = new List(len / 15); + int tokenStart = 0; + int tokenEnd = 0; + var useStopWords = UseStopWords; + var stopWords = StopWords; + for (var i = 0; i < len; i++) + { + var currentChar = text[i]; + if (digits && char.IsLetterOrDigit(currentChar) || + char.IsLetter(currentChar)) + { + ++tokenEnd; + continue; + } + if (tokenStart < tokenEnd - diff) + { + var slice = new Slice(tokenStart, i - tokenStart); + if (!useStopWords || + !stopWords.Contains( + HashCodeGenerator.GetHashCode( + text.Slice(slice)))) + tokens.Add(slice); + } + tokenStart = i + 1; + tokenEnd = i + 1; + } + if (tokenStart < tokenEnd - diff) { - var digits = IncludeDigits; - var diff = MimimumTokenLength; - if (diff > 0) --diff; - var len = text.Length; - var tokens = new List(len / 15); - int tokenStart = 0; - int tokenEnd = 0; - var useStopWords = UseStopWords; - var stopWords = StopWords; - for (var i = 0; i < len; i++) - { - var currentChar = text[i]; - if (digits && char.IsLetterOrDigit(currentChar) || - char.IsLetter(currentChar)) - { - ++tokenEnd; - continue; - } - if (tokenStart < tokenEnd - diff) - { - var slice = new Slice(tokenStart, i - tokenStart); - if (!useStopWords || - !stopWords.Contains( - HashCodeGenerator.GetHashCode( - text.Slice(slice)))) - tokens.Add(slice); - } - tokenStart = i + 1; - tokenEnd = i + 1; - } - if (tokenStart < tokenEnd - diff) - { - var slice = new Slice(tokenStart, tokenEnd - tokenStart); - if (!useStopWords || - !stopWords.Contains( - HashCodeGenerator.GetHashCode( - text.Slice(slice)))) - tokens.Add(slice); - } - return tokens; + var slice = new Slice(tokenStart, tokenEnd - tokenStart); + if (!useStopWords || + !stopWords.Contains( + HashCodeGenerator.GetHashCode( + text.Slice(slice)))) + tokens.Add(slice); } + return tokens; + } - /// - /// Enumerates the slices of the given text, where each slice represents a token. - /// Tokens are determined based on the settings for minimum token length and whether digits are included. - /// Optionally filters out tokens that match stop words. - /// - /// The text to tokenize. - /// An enumerable collection of objects, each representing a token within the text. - public IEnumerable EnumerateSlices(ReadOnlyMemory text) + /// + /// Enumerates the slices of the given text, where each slice represents a token. + /// Tokens are determined based on the settings for minimum token length and whether digits are included. + /// Optionally filters out tokens that match stop words. + /// + /// The text to tokenize. + /// An enumerable collection of objects, each representing a token within the text. + public IEnumerable EnumerateSlices(ReadOnlyMemory text) + { + var digits = IncludeDigits; + var diff = MimimumTokenLength; + if (diff > 0) --diff; + var len = text.Length; + int tokenStart = 0; + int tokenEnd = 0; + var useStopWords = UseStopWords; + var stopWords = StopWords; + for (var i = 0; i < len; i++) + { + var currentChar = text.Span[i]; + if (digits && char.IsLetterOrDigit(currentChar) || + char.IsLetter(currentChar)) + { + ++tokenEnd; + continue; + } + if (tokenStart < tokenEnd - diff) + { + var slice = new Slice(tokenStart, i - tokenStart); + if (!useStopWords || + !stopWords.Contains( + HashCodeGenerator.GetHashCode( + text.Slice(slice)))) + yield return slice; + } + tokenStart = i + 1; + tokenEnd = i + 1; + } + if (tokenStart < tokenEnd - diff) { - var digits = IncludeDigits; - var diff = MimimumTokenLength; - if (diff > 0) --diff; - var len = text.Length; - int tokenStart = 0; - int tokenEnd = 0; - var useStopWords = UseStopWords; - var stopWords = StopWords; - for (var i = 0; i < len; i++) - { - var currentChar = text.Span[i]; - if (digits && char.IsLetterOrDigit(currentChar) || - char.IsLetter(currentChar)) - { - ++tokenEnd; - continue; - } - if (tokenStart < tokenEnd - diff) - { - var slice = new Slice(tokenStart, i - tokenStart); - if (!useStopWords || - !stopWords.Contains( - HashCodeGenerator.GetHashCode( - text.Slice(slice)))) - yield return slice; - } - tokenStart = i + 1; - tokenEnd = i + 1; - } - if (tokenStart < tokenEnd - diff) - { - var slice = new Slice(tokenStart, tokenEnd - tokenStart); - if (!useStopWords || - !stopWords.Contains( - HashCodeGenerator.GetHashCode( - text.Slice(slice)))) - yield return slice; - } + var slice = new Slice(tokenStart, tokenEnd - tokenStart); + if (!useStopWords || + !stopWords.Contains( + HashCodeGenerator.GetHashCode( + text.Slice(slice)))) + yield return slice; } + } } From becd87f0daa691d901ee1346c927ea2ad8c45c3f Mon Sep 17 00:00:00 2001 From: Ahmed Yasin Koculu Date: Mon, 8 Jun 2026 04:54:02 +0200 Subject: [PATCH 3/3] Drop support net 7 and lower. --- .../SearchEngineApp.cs | 44 +++++++++---------- .../Misc/FolderIterator.cs | 14 ++---- .../Model/NGramToken4.cs | 13 ++++-- .../QueryLanguage/UnexpectedTokenException.cs | 5 ++- .../ZoneTree.FullTextSearch.csproj | 2 +- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs index 61a2e44..5156fe0 100644 --- a/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs +++ b/src/ZoneTree.FullTextSearch.Playground/SearchEngineApp.cs @@ -218,28 +218,28 @@ public void CreateIndex(string indexPath, string pattern, bool isInteractive) var iteratorTask = folderIterator.IterateAll( (path) => { - if (cancellationTokenSource.IsCancellationRequested) - return Task.CompletedTask; - return Task.Run(async () => - { - try - { - if (cancellationTokenSource.IsCancellationRequested) return; - if (!RecordTable.TryGetRecord(path, out var record)) - record = Interlocked.Increment(ref nextRecord); - - var text = await File.ReadAllTextAsync(path); - RecordTable.UpsertRecord(record, path); - SearchEngine.AddRecord(record, text); - Interlocked.Increment(ref totalRecordUpserted); - } - catch (Exception ex) - { - Console.WriteLine(ex.ToString()); - throw; - } - }); - }, + if (cancellationTokenSource.IsCancellationRequested) + return Task.CompletedTask; + return Task.Run(async () => + { + try + { + if (cancellationTokenSource.IsCancellationRequested) return; + if (!RecordTable.TryGetRecord(path, out var record)) + record = Interlocked.Increment(ref nextRecord); + + var text = await File.ReadAllTextAsync(path); + RecordTable.UpsertRecord(record, path); + SearchEngine.AddRecord(record, text); + Interlocked.Increment(ref totalRecordUpserted); + } + catch (Exception ex) + { + Console.WriteLine(ex.ToString()); + throw; + } + }); + }, cancellationTokenSource.Token); iteratorTask.Wait(); sw.Stop(); diff --git a/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs b/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs index 884f6df..9c066ef 100644 --- a/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs +++ b/src/ZoneTree.FullTextSearch/Misc/FolderIterator.cs @@ -1,4 +1,4 @@ -namespace ZoneTree.FullTextSearch.Misc; +namespace ZoneTree.FullTextSearch.Misc; /// /// Provides functionality to iterate through files in a specified directory based on a search pattern, @@ -57,16 +57,10 @@ await Task.Run(async () => { Console.WriteLine("Cancelled the folder iteration."); break; - -<<<<<<< TODO: Unmerged change from project 'ZoneTree.FullTextSearch(net9.0)', Before: - await Task.WhenAll(tasks.ToArray()); -======= - await Task.WhenAll(tasks.ToArray()).ConfigureAwait(false); ->>>>>>> After -} + } tasks.Add(callback(path)); } - await Task.WhenAll(tasks.ToArray()).ConfigureAwait(false); - }).ConfigureAwait(false).ConfigureAwait(false); + await Task.WhenAll([.. tasks]).ConfigureAwait(false); + }, cancellationToken).ConfigureAwait(false); } } diff --git a/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs b/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs index 198aa85..98f0059 100644 --- a/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs +++ b/src/ZoneTree.FullTextSearch/Model/NGramToken4.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Runtime.InteropServices; namespace ZoneTree.FullTextSearch.Model; @@ -8,7 +8,7 @@ namespace ZoneTree.FullTextSearch.Model; /// 4-character sequences using a packed memory layout. /// [StructLayout(LayoutKind.Explicit, CharSet = CharSet.Unicode, Pack = 1, Size = 8)] -public struct NGramToken4 +public struct NGramToken4 : IEquatable { /// /// The raw data representing the 4-character n-gram as a 64-bit unsigned integer. @@ -43,12 +43,17 @@ public struct NGramToken4 public override bool Equals(object obj) { - throw new NotImplementedException(); + return obj is NGramToken4 token && Equals(token); + } + + public bool Equals(NGramToken4 other) + { + return data == other.data; } public override int GetHashCode() { - throw new NotImplementedException(); + return data.GetHashCode(); } public static bool operator ==(NGramToken4 left, NGramToken4 right) diff --git a/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs b/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs index 7155e6e..6208919 100644 --- a/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs +++ b/src/ZoneTree.FullTextSearch/QueryLanguage/UnexpectedTokenException.cs @@ -1,4 +1,4 @@ -namespace ZoneTree.FullTextSearch.QueryLanguage; +namespace ZoneTree.FullTextSearch.QueryLanguage; /// /// Exception thrown when an unexpected token is encountered during parsing. @@ -17,7 +17,8 @@ public UnexpectedTokenException() { } - public UnexpectedTokenException() + + public UnexpectedTokenException(string message, Exception innerException) : base(message, innerException) { } } diff --git a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj index e04ddb4..3659296 100644 --- a/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj +++ b/src/ZoneTree.FullTextSearch/ZoneTree.FullTextSearch.csproj @@ -1,7 +1,7 @@  - net10.0;net9.0;net8.0;net7.0;net6.0 + net10.0;net9.0;net8.0 true en-US https://github.com/koculu/ZoneTree.FullTextSearch