Skip to content

Commit 429f1ac

Browse files
committed
fixes and more tests for vector index ddl.
1 parent 94756fb commit 429f1ac

File tree

6 files changed

+268
-60
lines changed

6 files changed

+268
-60
lines changed

fdb-relational-core/src/main/antlr/RelationalLexer.g4

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ GET: 'GET';
112112
GRANT: 'GRANT';
113113
GROUP: 'GROUP';
114114
HAVING: 'HAVING';
115+
HNSW: 'HNSW';
115116
HIGH_PRIORITY: 'HIGH_PRIORITY';
116117
HISTOGRAM: 'HISTOGRAM';
117118
IF: 'IF';
@@ -919,8 +920,8 @@ ASYMMETRIC_DERIVE: 'ASYMMETRIC_DERIVE';
919920
ASYMMETRIC_ENCRYPT: 'ASYMMETRIC_ENCRYPT';
920921
ASYMMETRIC_SIGN: 'ASYMMETRIC_SIGN';
921922
ASYMMETRIC_VERIFY: 'ASYMMETRIC_VERIFY';
922-
ATAN: 'ATAN';
923923
ATAN2: 'ATAN2';
924+
ATAN: 'ATAN';
924925
BENCHMARK: 'BENCHMARK';
925926
BIN: 'BIN';
926927
BIT_COUNT: 'BIT_COUNT';
@@ -942,6 +943,7 @@ CONNECTION_ID: 'CONNECTION_ID';
942943
CONV: 'CONV';
943944
CONVERT_TZ: 'CONVERT_TZ';
944945
COS: 'COS';
946+
COSINE_METRIC: 'COSINE_METRIC';
945947
COT: 'COT';
946948
CRC32: 'CRC32';
947949
CREATE_ASYMMETRIC_PRIV_KEY: 'CREATE_ASYMMETRIC_PRIV_KEY';
@@ -961,7 +963,9 @@ DES_DECRYPT: 'DES_DECRYPT';
961963
DES_ENCRYPT: 'DES_ENCRYPT';
962964
DIMENSION: 'DIMENSION';
963965
DISJOINT: 'DISJOINT';
966+
DOT_PRODUCT_METRIC: 'DOT_PRODUCT_METRIC';
964967
DRY: 'DRY';
968+
EF_CONSTRUCTION: 'EF_CONSTRUCTION';
965969
ELT: 'ELT';
966970
ENABLE_LONG_ROWS: 'ENABLE_LONG_ROWS';
967971
ENCODE: 'ENCODE';
@@ -970,6 +974,8 @@ ENDPOINT: 'ENDPOINT';
970974
ENGINE_ATTRIBUTE: 'ENGINE_ATTRIBUTE';
971975
ENVELOPE: 'ENVELOPE';
972976
EQUALS: 'EQUALS';
977+
EUCLIDEAN_METRIC: 'EUCLIDEAN_METRIC';
978+
EUCLIDEAN_SQUARE_METRIC: 'EUCLIDEAN_SQUARE_METRIC';
973979
EXP: 'EXP';
974980
EXPORT_SET: 'EXPORT_SET';
975981
EXTERIORRING: 'EXTERIORRING';
@@ -999,15 +1005,6 @@ GREATEST: 'GREATEST';
9991005
GTID_SUBSET: 'GTID_SUBSET';
10001006
GTID_SUBTRACT: 'GTID_SUBTRACT';
10011007
HEX: 'HEX';
1002-
HNSW_EF_CONSTRUCTION: 'HNSW_EF_CONSTRUCTION';
1003-
HNSW_M_MAX: 'HNSW_M_MAX';
1004-
HNSW_M: 'HNSW_M';
1005-
HNSW_MAINTAIN_STATS_PROBABILITY: 'HNSW_MAINTAIN_STATS_PROBABILITY';
1006-
HNSW_METRIC: 'HNSW_METRIC';
1007-
HNSW_RABITQ_NUM_EX_BITS: 'HNSW_RABITQ_NUM_EX_BITS';
1008-
HNSW_SAMPLE_VECTOR_STATS_PROBABILITY:'HNSW_SAMPLE_VECTOR_STATS_PROBABILITY';
1009-
HNSW_STATS_THRESHOLD: 'HNSW_STATS_THRESHOLD';
1010-
HNSW_USE_RABITQ: 'HNSW_USE_RABITQ';
10111008
IFNULL: 'IFNULL';
10121009
INET6_ATON: 'INET6_ATON';
10131010
INET6_NTOA: 'INET6_NTOA';
@@ -1038,15 +1035,18 @@ LINESTRINGFROMWKB: 'LINESTRINGFROMWKB';
10381035
LN: 'LN';
10391036
LOAD_FILE: 'LOAD_FILE';
10401037
LOCATE: 'LOCATE';
1041-
LOG: 'LOG';
10421038
LOG10: 'LOG10';
10431039
LOG2: 'LOG2';
1040+
LOG: 'LOG';
10441041
LOWER: 'LOWER';
10451042
LPAD: 'LPAD';
10461043
LTRIM: 'LTRIM';
1044+
CONNECTIVITY: 'CONNECTIVITY';
1045+
MAINTAIN_STATS_PROBABILITY: 'MAINTAIN_STATS_PROBABILITY';
10471046
MAKEDATE: 'MAKEDATE';
10481047
MAKETIME: 'MAKETIME';
10491048
MAKE_SET: 'MAKE_SET';
1049+
MANHATTAN_METRIC: 'MANHATTAN_METRIC';
10501050
MASTER_POS_WAIT: 'MASTER_POS_WAIT';
10511051
MBRCONTAINS: 'MBRCONTAINS';
10521052
MBRDISJOINT: 'MBRDISJOINT';
@@ -1056,6 +1056,7 @@ MBROVERLAPS: 'MBROVERLAPS';
10561056
MBRTOUCHES: 'MBRTOUCHES';
10571057
MBRWITHIN: 'MBRWITHIN';
10581058
MD5: 'MD5';
1059+
METRIC: 'METRIC';
10591060
MLINEFROMTEXT: 'MLINEFROMTEXT';
10601061
MLINEFROMWKB: 'MLINEFROMWKB';
10611062
MONTHNAME: 'MONTHNAME';
@@ -1069,6 +1070,8 @@ MULTIPOINTFROMTEXT: 'MULTIPOINTFROMTEXT';
10691070
MULTIPOINTFROMWKB: 'MULTIPOINTFROMWKB';
10701071
MULTIPOLYGONFROMTEXT: 'MULTIPOLYGONFROMTEXT';
10711072
MULTIPOLYGONFROMWKB: 'MULTIPOLYGONFROMWKB';
1073+
M_MAX: 'M_MAX';
1074+
M_MAX_0: 'M_MAX_0';
10721075
NAME_CONST: 'NAME_CONST';
10731076
NULLIF: 'NULLIF';
10741077
NUMGEOMETRIES: 'NUMGEOMETRIES';
@@ -1091,6 +1094,7 @@ POLYGONFROMWKB: 'POLYGONFROMWKB';
10911094
POW: 'POW';
10921095
POWER: 'POWER';
10931096
QUOTE: 'QUOTE';
1097+
RABITQ_NUM_EX_BITS: 'RABITQ_NUM_EX_BITS';
10941098
RADIANS: 'RADIANS';
10951099
RAND: 'RAND';
10961100
RANDOM_BYTES: 'RANDOM_BYTES';
@@ -1101,13 +1105,14 @@ ROW_COUNT: 'ROW_COUNT';
11011105
RPAD: 'RPAD';
11021106
RTRIM: 'RTRIM';
11031107
RUN: 'RUN';
1104-
SEC_TO_TIME: 'SEC_TO_TIME';
1108+
SAMPLE_VECTOR_STATS_PROBABILITY: 'SAMPLE_VECTOR_STATS_PROBABILITY';
1109+
SCHEMA_NAME: 'SCHEMA_NAME';
11051110
SECONDARY_ENGINE_ATTRIBUTE: 'SECONDARY_ENGINE_ATTRIBUTE';
1111+
SEC_TO_TIME: 'SEC_TO_TIME';
11061112
SESSION_USER: 'SESSION_USER';
1107-
SHA: 'SHA';
11081113
SHA1: 'SHA1';
11091114
SHA2: 'SHA2';
1110-
SCHEMA_NAME: 'SCHEMA_NAME';
1115+
SHA: 'SHA';
11111116
SIGN: 'SIGN';
11121117
SIN: 'SIN';
11131118
SLEEP: 'SLEEP';
@@ -1116,6 +1121,7 @@ SQL_THREAD_WAIT_AFTER_GTIDS: 'SQL_THREAD_WAIT_AFTER_GTIDS';
11161121
SQRT: 'SQRT';
11171122
SRID: 'SRID';
11181123
STARTPOINT: 'STARTPOINT';
1124+
STATS_THRESHOLD: 'STATS_THRESHOLD';
11191125
STORE_ROW_VERSIONS: 'STORE_ROW_VERSIONS';
11201126
STRCMP: 'STRCMP';
11211127
STR_TO_DATE: 'STR_TO_DATE';
@@ -1198,6 +1204,7 @@ UNHEX: 'UNHEX';
11981204
UNIX_TIMESTAMP: 'UNIX_TIMESTAMP';
11991205
UPDATEXML: 'UPDATEXML';
12001206
UPPER: 'UPPER';
1207+
USE_RABITQ: 'USE_RABITQ';
12011208
UUID: 'UUID';
12021209
UUID_SHORT: 'UUID_SHORT';
12031210
VALIDATE_PASSWORD_STRENGTH: 'VALIDATE_PASSWORD_STRENGTH';
@@ -1207,9 +1214,9 @@ WEEKDAY: 'WEEKDAY';
12071214
WEEKOFYEAR: 'WEEKOFYEAR';
12081215
WEIGHT_STRING: 'WEIGHT_STRING';
12091216
WITHIN: 'WITHIN';
1217+
X_FUNCTION: 'X';
12101218
YEARWEEK: 'YEARWEEK';
12111219
Y_FUNCTION: 'Y';
1212-
X_FUNCTION: 'X';
12131220

12141221
// Calling conventions
12151222

fdb-relational-core/src/main/antlr/RelationalParser.g4

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ enumDefinition
168168
;
169169

170170
indexDefinition
171-
: (UNIQUE)? INDEX indexName=uid AS queryTerm indexAttributes? #indexAsSelectDefinition
172-
| (UNIQUE)? INDEX indexName=uid ON source=fullId indexColumnList includeClause? indexOptions? #indexOnSourceDefinition
173-
| VECTOR INDEX indexName=uid ON source=fullId indexColumnList partitionClause? vectorIndexOptions? #vectorIndexDefinition
171+
: (UNIQUE)? INDEX indexName=uid AS queryTerm indexAttributes? #indexAsSelectDefinition
172+
| (UNIQUE)? INDEX indexName=uid ON source=fullId indexColumnList includeClause? indexOptions? #indexOnSourceDefinition
173+
| VECTOR INDEX indexName=uid USING HNSW ON source=fullId indexColumnList includeClause? partitionClause? vectorIndexOptions? #vectorIndexDefinition
174174
;
175175

176176
indexColumnList
@@ -198,19 +198,28 @@ indexOption
198198
;
199199

200200
vectorIndexOptions
201-
: OPTIONS '(' vectorIndexOption (COMMA vectorIndexOptions)* ')'
201+
: OPTIONS '(' vectorIndexOption (COMMA vectorIndexOption)* ')'
202202
;
203203

204204
vectorIndexOption
205-
: HNSW_EF_CONSTRUCTION '=' efConstruction=decimalLiteral
206-
| HNSW_M '=' m=decimalLiteral
207-
| HNSW_M_MAX '=' mMax=decimalLiteral
208-
| HNSW_MAINTAIN_STATS_PROBABILITY '=' maintainStatsProbability=decimalLiteral
209-
| HNSW_METRIC '=' metric=stringLiteral
210-
| HNSW_RABITQ_NUM_EX_BITS '=' rabitQNumExBits=decimalLiteral
211-
| HNSW_SAMPLE_VECTOR_STATS_PROBABILITY '=' statsProbability=decimalLiteral
212-
| HNSW_STATS_THRESHOLD '=' statsThreshold=decimalLiteral
213-
| HNSW_USE_RABITQ '=' useRabitQ=booleanLiteral
205+
: EF_CONSTRUCTION '=' efConstruction=DECIMAL_LITERAL
206+
| CONNECTIVITY '=' connectivity=DECIMAL_LITERAL
207+
| M_MAX '=' mMax=DECIMAL_LITERAL
208+
| M_MAX_0 '=' mMaxZero=DECIMAL_LITERAL
209+
| MAINTAIN_STATS_PROBABILITY '=' maintainStatsProbability=REAL_LITERAL
210+
| METRIC '=' metric=hnswMetric
211+
| RABITQ_NUM_EX_BITS '=' rabitQNumExBits=DECIMAL_LITERAL
212+
| SAMPLE_VECTOR_STATS_PROBABILITY '=' statsProbability=REAL_LITERAL
213+
| STATS_THRESHOLD '=' statsThreshold=DECIMAL_LITERAL
214+
| USE_RABITQ '=' useRabitQ=booleanLiteral
215+
;
216+
217+
hnswMetric
218+
: MANHATTAN_METRIC
219+
| EUCLIDEAN_METRIC
220+
| EUCLIDEAN_SQUARE_METRIC
221+
| COSINE_METRIC
222+
| DOT_PRODUCT_METRIC
214223
;
215224

216225
indexAttributes

fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/ddl/OnSourceIndexGenerator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,11 @@ public Builder addValueColumn(@Nonnull final IndexedColumn keyColumn) {
366366
return this;
367367
}
368368

369+
@Nonnull
370+
public List<IndexedColumn> getValueColumns() {
371+
return valueColumns;
372+
}
373+
369374
@Nonnull
370375
public Builder addIndexOption(@Nonnull final String key, @Nonnull final String value) {
371376
indexOptions.put(key, value);

fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/visitors/DdlVisitor.java

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import com.apple.foundationdb.relational.recordlayer.query.Expressions;
4949
import com.apple.foundationdb.relational.recordlayer.query.Identifier;
5050
import com.apple.foundationdb.relational.recordlayer.query.LogicalOperators;
51-
import com.apple.foundationdb.relational.recordlayer.query.ParseHelpers;
5251
import com.apple.foundationdb.relational.recordlayer.query.ddl.OnSourceIndexGenerator;
5352
import com.apple.foundationdb.relational.recordlayer.query.ddl.MaterializedViewIndexGenerator;
5453
import com.apple.foundationdb.relational.recordlayer.query.LogicalOperator;
@@ -61,6 +60,7 @@
6160
import com.google.common.collect.ImmutableList;
6261
import com.google.common.collect.ImmutableMap;
6362
import com.google.common.collect.ImmutableSet;
63+
import com.google.common.collect.Iterables;
6464
import org.antlr.v4.runtime.ParserRuleContext;
6565

6666
import javax.annotation.Nonnull;
@@ -266,6 +266,8 @@ public RecordLayerIndex visitVectorIndexDefinition(final RelationalParser.Vector
266266
var logicalOperator = generateSourceAccessForIndex(sourceIdentifier);
267267
getDelegate().getCurrentPlanFragment().setOperator(logicalOperator);
268268

269+
270+
269271
final Identifier indexId = visitUid(indexDefinitionContext.indexName);
270272
final var indexOptions = parseVectorOptions(indexDefinitionContext.vectorIndexOptions());
271273
final var indexGeneratorBuilder = OnSourceIndexGenerator.newBuilder()
@@ -279,6 +281,21 @@ public RecordLayerIndex visitVectorIndexDefinition(final RelationalParser.Vector
279281
indexGeneratorBuilder.addValueColumn(OnSourceIndexGenerator.IndexedColumn
280282
.parseColSpec(colSpec, getDelegate().getIdentifierVisitor())));
281283

284+
// parse the number of dimensions.
285+
final var indexedColumns = indexGeneratorBuilder.getValueColumns();
286+
Assert.thatUnchecked(indexedColumns.size() == 1, ErrorCode.UNSUPPORTED_OPERATION,
287+
() -> "invalid number of indexed columns, only one column is supported, found " + indexedColumns.size() + " columns");
288+
final var indexedCol = Iterables.getOnlyElement(indexedColumns).getIdentifier();
289+
final var type = getDelegate().getSemanticAnalyzer().resolveIdentifier(indexedCol, getDelegate().getCurrentPlanFragment())
290+
.getDataType();
291+
Assert.thatUnchecked(type.getCode() == DataType.Code.VECTOR, ErrorCode.SYNTAX_ERROR,
292+
() -> "indexed column must be of vector type, found '" + type.getCode() + "' instead");
293+
final var numberOfDimensions = ((DataType.VectorType)type).getDimensions();
294+
indexGeneratorBuilder.addIndexOption(IndexOptions.HNSW_NUM_DIMENSIONS, String.valueOf(numberOfDimensions));
295+
296+
Assert.isNullUnchecked(indexDefinitionContext.includeClause(), ErrorCode.UNSUPPORTED_OPERATION,
297+
"INCLUDE clause is not supported for vector indexes");
298+
282299
if (indexDefinitionContext.partitionClause() != null) {
283300
indexDefinitionContext.partitionClause().indexColumnSpec().forEach(colSpec ->
284301
indexGeneratorBuilder.addKeyColumn(OnSourceIndexGenerator.IndexedColumn
@@ -313,25 +330,24 @@ private Map<String, String> parseVectorOptions(@Nullable final RelationalParser.
313330
return indexOptionsBuilder.build();
314331
}
315332

316-
for (final var vectorIndexOption : indexOptionsContext.vectorIndexOptions()) {
317-
final var option = vectorIndexOption.vectorIndexOption();
318-
if (option.HNSW_EF_CONSTRUCTION() != null) {
333+
for (final var option : indexOptionsContext.vectorIndexOption()) {
334+
if (option.EF_CONSTRUCTION() != null) {
319335
indexOptionsBuilder.put(IndexOptions.HNSW_EF_CONSTRUCTION, option.efConstruction.getText());
320-
} else if (option.HNSW_M() != null) {
321-
indexOptionsBuilder.put(IndexOptions.HNSW_M, option.m.getText());
322-
} else if (option.HNSW_M_MAX() != null) {
336+
} else if (option.CONNECTIVITY() != null) {
337+
indexOptionsBuilder.put(IndexOptions.HNSW_M, option.connectivity.getText());
338+
} else if (option.M_MAX() != null) {
323339
indexOptionsBuilder.put(IndexOptions.HNSW_M_MAX, option.mMax.getText());
324-
} else if (option.HNSW_MAINTAIN_STATS_PROBABILITY() != null) {
340+
} else if (option.MAINTAIN_STATS_PROBABILITY() != null) {
325341
indexOptionsBuilder.put(IndexOptions.HNSW_MAINTAIN_STATS_PROBABILITY, option.maintainStatsProbability.getText());
326-
} else if (option.HNSW_METRIC() != null) {
342+
} else if (option.METRIC() != null) {
327343
indexOptionsBuilder.put(IndexOptions.HNSW_METRIC, option.metric.getText());
328-
} else if (option.HNSW_RABITQ_NUM_EX_BITS() != null) {
344+
} else if (option.RABITQ_NUM_EX_BITS() != null) {
329345
indexOptionsBuilder.put(IndexOptions.HNSW_RABITQ_NUM_EX_BITS, option.rabitQNumExBits.getText());
330-
} else if (option.HNSW_SAMPLE_VECTOR_STATS_PROBABILITY() != null) {
346+
} else if (option.SAMPLE_VECTOR_STATS_PROBABILITY() != null) {
331347
indexOptionsBuilder.put(IndexOptions.HNSW_SAMPLE_VECTOR_STATS_PROBABILITY, option.statsProbability.getText());
332-
} else if (option.HNSW_STATS_THRESHOLD() != null) {
348+
} else if (option.STATS_THRESHOLD() != null) {
333349
indexOptionsBuilder.put(IndexOptions.HNSW_STATS_THRESHOLD, option.statsThreshold.getText());
334-
} else if (option.HNSW_USE_RABITQ() != null) {
350+
} else if (option.USE_RABITQ() != null) {
335351
indexOptionsBuilder.put(IndexOptions.HNSW_USE_RABITQ, option.useRabitQ.getText());
336352
}
337353
}

fdb-relational-core/src/main/java/com/apple/foundationdb/relational/recordlayer/query/visitors/DelegatingVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@ public Object visitVectorIndexOption(final RelationalParser.VectorIndexOptionCon
267267
return getDelegate().visitVectorIndexOption(ctx);
268268
}
269269

270+
@Override
271+
public Object visitHnswMetric(final RelationalParser.HnswMetricContext ctx) {
272+
return getDelegate().visitHnswMetric(ctx);
273+
}
274+
270275
@Nonnull
271276
@Override
272277
public Object visitIndexAttributes(@Nonnull RelationalParser.IndexAttributesContext ctx) {

0 commit comments

Comments
 (0)