Skip to content

Commit 076d26f

Browse files
authored
KnowPro.NET: Answers Part 3, indexing improvements (#1769)
* Answer Generation * Select relevant messages * Include Messages in Context * Indexing * Auto Alias mapping for names * Auto load Synonym tables * Storage improvements * Bug fixes * Refactor
1 parent bb6c16d commit 076d26f

22 files changed

+503
-34
lines changed

dotnet/typeagent/examples/knowProConsole/PodcastCommands.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4+
using System.Threading.Tasks;
45
using TypeAgent.KnowPro.Storage;
56

67
namespace KnowProConsole;
@@ -48,11 +49,11 @@ private Command PodcastLoadDef()
4849
{
4950
Args.Arg<string>("name", "Name of existing podcast index"),
5051
};
51-
cmd.SetAction(this.PodcastLoad);
52+
cmd.SetAction(this.PodcastLoadAsync);
5253
return cmd;
5354
}
5455

55-
private void PodcastLoad(ParseResult args)
56+
private async Task PodcastLoadAsync(ParseResult args)
5657
{
5758
NamedArgs namedArgs = new(args);
5859
string name = namedArgs.GetRequired("name");
@@ -68,6 +69,8 @@ private void PodcastLoad(ParseResult args)
6869

6970
SetCurrent(podcast);
7071
KnowProWriter.WriteLine(ConsoleColor.Cyan, $"Loaded {name}");
72+
73+
//await podcast.BuildSecondaryIndexesAsync();
7174
}
7275

7376
private Command PodcastImportIndexDef()
@@ -121,7 +124,7 @@ await podcast.ImportTranscriptAsync(
121124

122125
if (namedArgs.Get<bool>("buildIndex"))
123126
{
124-
await podcast.UpdateIndexAsync(cancellationToken);
127+
await podcast.BuildIndexAsync(cancellationToken);
125128
}
126129
}
127130

dotnet/typeagent/src/common/ListExtensions.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ public static int GetCount<T>(this IList<T>? list)
2121
return list is not null ? list.Count : 0;
2222
}
2323

24+
public static T? GetOrNull<T>(this IList<T> list, int index)
25+
where T : class
26+
{
27+
return (index < list.Count) ? list[index] : null;
28+
}
29+
2430
public static void AddRange<T>(this IList<T> list, IEnumerable<T> items)
2531
{
2632
ArgumentVerify.ThrowIfNull(items, nameof(items));

dotnet/typeagent/src/common/Multiset.cs renamed to dotnet/typeagent/src/common/MultiMap.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ namespace TypeAgent.Common;
66
/// <summary>
77
/// A dictionary that permits DUPLICATES.
88
/// </summary>
9-
public class Multiset<TKey, TValue> : Dictionary<TKey, List<TValue>>, IEnumerable<TValue>
9+
public class MultiMap<TKey, TValue> : Dictionary<TKey, List<TValue>>, IEnumerable<TValue>
1010
{
1111
Func<List<TValue>> _allocator = NewList;
1212

13-
public Multiset(IEnumerable<KeyValuePair<TKey, TValue>>? values = null)
13+
public MultiMap(IEnumerable<KeyValuePair<TKey, TValue>>? values = null)
1414
: base()
1515
{
1616
if (values is not null)
@@ -19,12 +19,12 @@ public Multiset(IEnumerable<KeyValuePair<TKey, TValue>>? values = null)
1919
}
2020
}
2121

22-
public Multiset(IEqualityComparer<TKey> comparer)
22+
public MultiMap(IEqualityComparer<TKey> comparer)
2323
: base(comparer)
2424
{
2525
}
2626

27-
public Multiset(
27+
public MultiMap(
2828
int capacity,
2929
IEqualityComparer<TKey> comparer,
3030
Func<List<TValue>>? allocator

dotnet/typeagent/src/common/StringExtensions.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ int maxCharsPerChunk
5858
}
5959
}
6060

61+
public static void ToLower(this IList<string> list)
62+
{
63+
int count = list.Count;
64+
for (int i = 0; i < count; ++i)
65+
{
66+
list[i] = list[i].ToLower();
67+
}
68+
}
69+
6170
public static List<string> LowerAndSort(this List<string> list)
6271
{
6372
int count = list.Count;
@@ -77,6 +86,15 @@ public static List<string> LowerAndSort(this List<string> list)
7786
public static IList<string> SplitLines(this string text, StringSplitOptions options = default)
7887
=> text.Split(s_lineSplitter, options);
7988

89+
[GeneratedRegex(@"(""[^""]+""|'[^']+'|\b\S+\b)")]
90+
private static partial Regex s_wordBreakRegEx();
91+
92+
private static readonly Regex s_wordSplitter = s_wordBreakRegEx();
93+
94+
public static IList<string> SplitWords(this string text, StringSplitOptions options = default)
95+
=> text.Split(s_wordSplitter, options);
96+
97+
8098
// Split using a Regex
8199
public static IList<string> Split(this string text, Regex regex, StringSplitOptions options = default)
82100
{

dotnet/typeagent/src/common/TopNCollection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ public void Add(IEnumerable<Scored<T>> items)
217217
public List<Scored<T>> ByRankAndClear()
218218
{
219219
var results = _items ?? [];
220-
results.Sort();
220+
results.Sort((x, y) => x.CompareTo(y));
221221
_items = null;
222222
return results;
223223
}

dotnet/typeagent/src/conversationMemory/Memory.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,13 @@ private LangSearchOptions AdjustLanguageSearchOptions(LangSearchOptions? options
8181

8282
return options;
8383
}
84+
85+
protected void BeginIndexing()
86+
{
87+
}
88+
89+
protected void EndIndexing()
90+
{
91+
92+
}
8493
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
namespace TypeAgent.ConversationMemory;
5+
6+
public class PersonName
7+
{
8+
public PersonName(string fullName)
9+
{
10+
ArgumentVerify.ThrowIfNullOrEmpty(fullName, nameof(fullName));
11+
12+
Names = fullName.SplitWords(StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
13+
}
14+
15+
public IList<string> Names { get; }
16+
17+
public bool HasNames => !Names.IsNullOrEmpty();
18+
19+
public string? FirstName => Names.GetOrNull(0);
20+
21+
public string? LastName => Names.Count > 1 ? Names[^1] : null;
22+
}

dotnet/typeagent/src/conversationMemory/Podcast.cs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,36 @@ public Podcast(MemorySettings settings, IStorageProvider<PodcastMessage> provide
1010
{
1111
}
1212

13+
public async ValueTask BuildIndexAsync(CancellationToken cancellationToken)
14+
{
15+
BeginIndexing();
16+
try
17+
{
18+
await this.UpdateIndexAsync(
19+
cancellationToken
20+
).ConfigureAwait(false);
21+
22+
await BuildSecondaryIndexesAsync(
23+
cancellationToken
24+
).ConfigureAwait(false);
25+
}
26+
finally
27+
{
28+
EndIndexing();
29+
}
30+
}
31+
32+
public async ValueTask BuildSecondaryIndexesAsync(CancellationToken cancellationToken = default)
33+
{
34+
await BuildParticipantAliasesAsync(
35+
cancellationToken
36+
).ConfigureAwait(false);
37+
38+
await AddSynonymsAsync(
39+
cancellationToken
40+
).ConfigureAwait(false);
41+
}
42+
1343
public async ValueTask ImportTranscriptAsync(
1444
string filePath,
1545
string? name = null,
@@ -35,6 +65,42 @@ await Messages.AppendAsync(
3565
).ConfigureAwait(false);
3666
}
3767

68+
private async ValueTask AddSynonymsAsync(CancellationToken cancellationToken)
69+
{
70+
AliasMap aliases = AliasMap.LoadResource(
71+
typeof(Podcast).Assembly,
72+
"TypeAgent.ConversationMemory.podcastVerbs.json"
73+
);
74+
75+
await SecondaryIndexes.TermToRelatedTermsIndex.Aliases.AddAsync(
76+
aliases,
77+
cancellationToken
78+
).ConfigureAwait(false);
79+
}
80+
81+
private async ValueTask BuildParticipantAliasesAsync(CancellationToken cancellationToken = default)
82+
{
83+
var aliases = await CollectParticipantAliasesAsync(
84+
cancellationToken
85+
).ConfigureAwait(false);
86+
87+
await SecondaryIndexes.TermToRelatedTermsIndex.Aliases.AddAsync(
88+
aliases,
89+
cancellationToken
90+
).ConfigureAwait(false);
91+
}
92+
93+
private async ValueTask<AliasMap> CollectParticipantAliasesAsync(CancellationToken cancellationToken = default)
94+
{
95+
AliasMap aliases = [];
96+
await foreach (var message in Messages)
97+
{
98+
PodcastMessageMeta metadata = message.Metadata;
99+
metadata.CollectAliases(aliases);
100+
}
101+
return aliases;
102+
}
103+
38104
private void AssignMessageListeners(IList<PodcastMessage> messages, ISet<string> participants)
39105
{
40106
foreach (var message in messages)

dotnet/typeagent/src/conversationMemory/PodcastMessage.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,34 @@ public PodcastMessageMeta(string? speaker)
2323

2424
public override string? Source => Speaker;
2525
public override IList<string>? Dest => Listeners;
26+
27+
internal void CollectAliases(AliasMap aliasMap)
28+
{
29+
CollectAlias(Speaker, aliasMap);
30+
if (!Listeners.IsNullOrEmpty())
31+
{
32+
foreach (var listener in Listeners)
33+
{
34+
CollectAlias(listener, aliasMap);
35+
}
36+
}
37+
}
38+
39+
private void CollectAlias(string? fullName, AliasMap aliasMap)
40+
{
41+
if (string.IsNullOrEmpty(fullName))
42+
{
43+
return;
44+
}
45+
46+
PersonName person = new PersonName(fullName);
47+
if (person.HasNames && person.Names.Count == 2)
48+
{
49+
// If participantName is a full name, then associate firstName with the full name
50+
aliasMap.AddUnique(person.FirstName, fullName);
51+
aliasMap.AddUnique(fullName, person.FirstName);
52+
}
53+
}
2654
}
2755

2856
public class PodcastMessage : Message<PodcastMessageMeta>, ITranscriptMessage

dotnet/typeagent/src/conversationMemory/conversationMemory.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010

1111
<ItemGroup>
1212
<None Remove="noiseTerms.txt" />
13+
<None Remove="podcastVerbs.json" />
1314
</ItemGroup>
1415

1516
<ItemGroup>
1617
<EmbeddedResource Include="noiseTerms.txt" />
18+
<EmbeddedResource Include="podcastVerbs.json" />
1719
</ItemGroup>
1820

1921
<ItemGroup>

0 commit comments

Comments
 (0)