Skip to content

Commit 669f864

Browse files
authored
Drivers 3.9: Segmentation and Collation Analyzers (TG-202) (#350)
1 parent 76d8b84 commit 669f864

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

test/arangosearch_analyzers_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,36 @@ func TestArangoSearchAnalyzerEnsureAnalyzer(t *testing.T) {
251251
},
252252
},
253253
},
254+
{
255+
Name: "create-segmentation",
256+
MinVersion: newVersion("3.9"),
257+
Definition: driver.ArangoSearchAnalyzerDefinition{
258+
Name: "my-segmentation",
259+
Type: driver.ArangoSearchAnalyzerTypeSegmentation,
260+
Properties: driver.ArangoSearchAnalyzerProperties{
261+
Break: driver.ArangoSearchBreakTypeAll,
262+
Case: driver.ArangoSearchCaseUpper,
263+
},
264+
},
265+
},
266+
{
267+
Name: "create-collation",
268+
MinVersion: newVersion("3.9"),
269+
Definition: driver.ArangoSearchAnalyzerDefinition{
270+
Name: "my-collation",
271+
Type: driver.ArangoSearchAnalyzerTypeCollation,
272+
Properties: driver.ArangoSearchAnalyzerProperties{
273+
Locale: "en_US.utf-8",
274+
},
275+
},
276+
ExpectedDefinition: &driver.ArangoSearchAnalyzerDefinition{
277+
Name: "my-collation",
278+
Type: driver.ArangoSearchAnalyzerTypeCollation,
279+
Properties: driver.ArangoSearchAnalyzerProperties{
280+
Locale: "en_US",
281+
},
282+
},
283+
},
254284
}
255285

256286
for _, testCase := range testCases {

view_arangosearch.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ const (
6565
ArangoSearchAnalyzerTypeGeoJSON ArangoSearchAnalyzerType = "geojson"
6666
// ArangoSearchAnalyzerTypeGeoPoint an Analyzer capable of breaking up JSON object describing a coordinate into a set of indexable tokens for further usage with ArangoSearch Geo functions.
6767
ArangoSearchAnalyzerTypeGeoPoint ArangoSearchAnalyzerType = "geopoint"
68+
// ArangoSearchAnalyzerTypeSegmentation an Analyzer capable of breaking up the input text into tokens in a language-agnostic manner
69+
ArangoSearchAnalyzerTypeSegmentation ArangoSearchAnalyzerType = "segmentation"
70+
// ArangoSearchAnalyzerTypeCollation an Analyzer capable of converting the input into a set of language-specific tokens
71+
ArangoSearchAnalyzerTypeCollation ArangoSearchAnalyzerType = "collation"
6872
)
6973

7074
// ArangoSearchAnalyzerFeature specifies a feature to an analyzer
@@ -90,6 +94,17 @@ const (
9094
ArangoSearchCaseNone ArangoSearchCaseType = "none"
9195
)
9296

97+
type ArangoSearchBreakType string
98+
99+
const (
100+
// ArangoSearchBreakTypeAll to return all tokens
101+
ArangoSearchBreakTypeAll ArangoSearchBreakType = "all"
102+
// ArangoSearchBreakTypeAlpha to return tokens composed of alphanumeric characters only (default)
103+
ArangoSearchBreakTypeAlpha ArangoSearchBreakType = "alpha"
104+
// ArangoSearchBreakTypeGraphic to return tokens composed of non-whitespace characters only
105+
ArangoSearchBreakTypeGraphic ArangoSearchBreakType = "graphic"
106+
)
107+
93108
type ArangoSearchNGramStreamType string
94109

95110
const (
@@ -120,7 +135,7 @@ type ArangoSearchAnalyzerProperties struct {
120135
Delimiter string `json:"delimiter,omitempty"`
121136
// Accent used by Norm, Text
122137
Accent *bool `json:"accent,omitempty"`
123-
// Case used by Norm, Text
138+
// Case used by Norm, Text, Segmentation
124139
Case ArangoSearchCaseType `json:"case,omitempty"`
125140

126141
// EdgeNGram used by Text
@@ -173,6 +188,9 @@ type ArangoSearchAnalyzerProperties struct {
173188
Latitude []string `json:"latitude,omitempty"`
174189
// Longitude used by GetPoint.
175190
Longitude []string `json:"longitude,omitempty"`
191+
192+
// Break used by Segmentation
193+
Break ArangoSearchBreakType `json:"break,omitempty"`
176194
}
177195

178196
// ArangoSearchAnalyzerGeoJSONType GeoJSON Type parameter.

0 commit comments

Comments
 (0)