From 8d826a3ac3227cd814135dc5308cafa44d905ee9 Mon Sep 17 00:00:00 2001 From: Olamide Kolawole Date: Mon, 29 Sep 2025 16:18:00 +0300 Subject: [PATCH] MODDICORE-457 Use index-friendly CQL for identifier matching The previous CQL query for matching instances by identifier was causing significant performance issues when there were many values to filter. The generated query (`identifiers="\"identifierTypeId\":\"...\""` AND ...) performed a string-based search on the entire JSONB object without using the index in some cases. --- .../loader/query/LoadQueryBuilder.java | 45 +++++++++-- .../matching/loader/LoadQueryBuilderTest.java | 78 ++++++++++++++----- 2 files changed, 97 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/folio/processing/matching/loader/query/LoadQueryBuilder.java b/src/main/java/org/folio/processing/matching/loader/query/LoadQueryBuilder.java index 296dda0a..e347e601 100644 --- a/src/main/java/org/folio/processing/matching/loader/query/LoadQueryBuilder.java +++ b/src/main/java/org/folio/processing/matching/loader/query/LoadQueryBuilder.java @@ -1,6 +1,9 @@ package org.folio.processing.matching.loader.query; +import io.vertx.core.json.Json; import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.folio.MatchDetail; import org.folio.processing.value.StringValue; import org.folio.processing.value.Value; @@ -24,10 +27,23 @@ public class LoadQueryBuilder { private LoadQueryBuilder() { } + private static final Logger LOGGER = LogManager.getLogger(LoadQueryBuilder.class); private static final String JSON_PATH_SEPARATOR = "."; private static final String IDENTIFIER_TYPE_ID = "identifierTypeId"; private static final String IDENTIFIER_TYPE_VALUE = "instance.identifiers[].value"; - private static final String IDENTIFIER_INDIVIDUAL_CQL_QUERY = "identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"%s\\\"\""; + /** + * CQL query template to find an instance by a specific identifier. + *

+ * This query leverages a relation modifier ({@code @}) to efficiently search within the 'identifiers' JSON array. + *

+ * This syntax allows PostgreSQL to use the GIN index on the field consistently, improving query performance. + */ + private static final String IDENTIFIER_INDIVIDUAL_CQL_QUERY = "identifiers =/@identifierTypeId=%s \"%s\""; /** * Builds LoadQuery, @@ -61,11 +77,16 @@ public static LoadQuery build(Value value, MatchDetail matchDetail) { mainQuery.applyAdditionalCondition(additionalQuery); // TODO provide all the requirements for MODDATAIMP-592 and refactor code block below if(checkIfIdentifierTypeExists(matchDetail, fieldPath, additionalField.getLabel())) { - String cqlQuery = buildIdentifierCqlQuery(value, additionalField.getValue()); + String cqlQuery = buildIdentifierCqlQuery(value, additionalField.getValue(), matchDetail.getMatchCriterion()); mainQuery.setCqlQuery(cqlQuery); mainQuery.setSqlQuery(StringUtils.EMPTY); + } else { + LOGGER.debug("LoadQueryBuilder::build - Additional field does not match identifier type criteria: {} fieldPath: {}", + additionalField.getLabel(), fieldPath); } } + LOGGER.debug(() -> String.format("LoadQueryBuilder::build - Built LoadQuery for VALUE: ~| %s |~ MATCHDETAIL: ~| %s |~ CQL: ~| %s |~", + Json.encode(value), Json.encode(matchDetail), mainQuery.getCqlQuery())); return new DefaultJsonLoadQuery(tableName, mainQuery.getSqlQuery(), mainQuery.getCqlQuery()); } } @@ -75,8 +96,9 @@ public static LoadQuery build(Value value, MatchDetail matchDetail) { private static boolean checkIfIdentifierTypeExists(MatchDetail matchDetail, String fieldPath, String additionalFieldPath) { return matchDetail.getIncomingRecordType() == EntityType.MARC_BIBLIOGRAPHIC && matchDetail.getExistingRecordType() == EntityType.INSTANCE && - matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXACTLY_MATCHES && fieldPath.equals(IDENTIFIER_TYPE_VALUE) && - additionalFieldPath.equals(IDENTIFIER_TYPE_ID); + (matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXACTLY_MATCHES || + matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) && + fieldPath.equals(IDENTIFIER_TYPE_VALUE) && additionalFieldPath.equals(IDENTIFIER_TYPE_ID); } /** @@ -84,15 +106,24 @@ private static boolean checkIfIdentifierTypeExists(MatchDetail matchDetail, Stri * * @param value the value to match against (can be STRING or LIST) * @param identifierTypeId the identifier type ID + * @param matchCriterion the match criterion to determine if wildcards should be applied * @return CQL query string with individual AND conditions */ - private static String buildIdentifierCqlQuery(Value value, String identifierTypeId) { + private static String buildIdentifierCqlQuery(Value value, String identifierTypeId, MatchDetail.MatchCriterion matchCriterion) { if (value.getType() == STRING) { - return String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapeCqlValue(value.getValue().toString())); + String escapedValue = escapeCqlValue(value.getValue().toString()); + if (matchCriterion == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) { + escapedValue = "*" + escapedValue + "*"; + } + return String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapedValue); } else if (value.getType() == LIST) { List conditions = new ArrayList<>(); for (Object val : ((org.folio.processing.value.ListValue) value).getValue()) { - conditions.add("(" + String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapeCqlValue(val.toString())) + ")"); + String escapedValue = escapeCqlValue(val.toString()); + if (matchCriterion == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) { + escapedValue = "*" + escapedValue + "*"; + } + conditions.add(String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapedValue)); } return String.join(" OR ", conditions); } diff --git a/src/test/java/org/folio/processing/matching/loader/LoadQueryBuilderTest.java b/src/test/java/org/folio/processing/matching/loader/LoadQueryBuilderTest.java index 1fe4ca7b..b01a2d99 100644 --- a/src/test/java/org/folio/processing/matching/loader/LoadQueryBuilderTest.java +++ b/src/test/java/org/folio/processing/matching/loader/LoadQueryBuilderTest.java @@ -70,6 +70,8 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingSt StringValue value = StringValue.of("ybp7406411"); MatchDetail matchDetail = new MatchDetail() .withMatchCriterion(EXACTLY_MATCHES) + .withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC) + .withExistingRecordType(EntityType.INSTANCE) .withExistingMatchExpression(new MatchExpression() .withDataValueType(VALUE_FROM_RECORD) .withFields(Arrays.asList( @@ -80,11 +82,9 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingSt LoadQuery result = LoadQueryBuilder.build(value, matchDetail); //then assertNotNull(result); - assertNotNull(result.getSql()); - String expectedSQLQuery = "CROSS JOIN LATERAL jsonb_array_elements(instance.jsonb -> 'identifiers') fields(field) WHERE field ->> 'value' = 'ybp7406411' AND field ->> 'identifierTypeId' = '439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef'"; - assertEquals(expectedSQLQuery, result.getSql()); + assertEquals(StringUtils.EMPTY, result.getSql()); assertNotNull(result.getCql()); - String expectedCQLQuery = "identifiers=\"\\\"identifierTypeId\\\":\\\"439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef\\\"\" AND (identifiers=\"\\\"value\\\":\\\"ybp7406411\\\"\")"; + String expectedCQLQuery = "identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp7406411\""; assertEquals(expectedCQLQuery, result.getCql()); } @@ -94,6 +94,8 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi ListValue value = ListValue.of(Arrays.asList("ybp7406411", "ybp74064123")); MatchDetail matchDetail = new MatchDetail() .withMatchCriterion(EXACTLY_MATCHES) + .withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC) + .withExistingRecordType(EntityType.INSTANCE) .withExistingMatchExpression(new MatchExpression() .withDataValueType(VALUE_FROM_RECORD) .withFields(Arrays.asList( @@ -104,11 +106,9 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi LoadQuery result = LoadQueryBuilder.build(value, matchDetail); //then assertNotNull(result); - assertNotNull(result.getSql()); - String expectedSQLQuery = "CROSS JOIN LATERAL jsonb_array_elements(instance.jsonb -> 'identifiers') fields(field) WHERE (field ->> 'value' = 'ybp7406411' OR field ->> 'value' = 'ybp74064123') AND field ->> 'identifierTypeId' = '439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef'"; - assertEquals(expectedSQLQuery, result.getSql()); + assertEquals(StringUtils.EMPTY, result.getSql()); assertNotNull(result.getCql()); - String expectedCQLQuery = "identifiers=\"\\\"identifierTypeId\\\":\\\"439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef\\\"\" AND (identifiers=\"\\\"value\\\":\\\"ybp7406411\\\"\" OR identifiers=\"\\\"value\\\":\\\"ybp74064123\\\"\")"; + String expectedCQLQuery = "identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp7406411\" OR identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp74064123\""; assertEquals(expectedCQLQuery, result.getCql()); } @@ -691,7 +691,7 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi assertNotEquals(expectedSQLQuery, wrongResult.getSql()); assertNotNull(result.getCql()); assertNotNull(wrongResult.getCql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"%s\\\"\"",identifierTypeFieldValue, value.getValue()); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"%s\"",identifierTypeFieldValue, value.getValue()); assertEquals(expectedCQLQuery, result.getCql()); assertNotEquals(expectedCQLQuery, wrongResult.getCql()); } @@ -716,7 +716,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithParenthesesInValue() { //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)1024095011\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)1024095011\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -740,7 +740,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithQuotesInValue() { //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\\"quote\\\"value\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"test\\\"quote\\\"value\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -764,7 +764,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithBackslashesInValue() { //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"path\\\\to\\\\resource\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"path\\\\to\\\\resource\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -788,7 +788,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithWildcardsInValue() { //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\*value\\?\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"test\\*value\\?\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -812,7 +812,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithMultipleSpecialCharacte //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(test\\*\\)\\\\query\\?\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(test\\*\\)\\\\query\\?\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -836,7 +836,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_ListWithSpecialCharacters() //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("(identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)123\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\*value\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"path\\\\file\\\"\")", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)123\" OR identifiers =/@identifierTypeId=%s \"test\\*value\" OR identifiers =/@identifierTypeId=%s \"path\\\\file\"", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -861,7 +861,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithApostropheInValue() { assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); // Apostrophes don't need escaping in CQL - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"O'Reilly's Book\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"O'Reilly's Book\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -885,7 +885,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_EmptyValue() { //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -910,7 +910,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_PreEscapedValue() { assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); // Should double-escape the already escaped backslashes - String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"already\\\\\\\\escaped\\\"\"", identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"already\\\\\\\\escaped\"", identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); } @@ -939,7 +939,47 @@ public void shouldBuildQueryWhere_IdentifierMatching_RealWorldExampleFromProblem //then assertNotNull(result); assertEquals(StringUtils.EMPTY, result.getSql()); - String expectedCQLQuery = format("(identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(CStRLIN\\)NYCX1604275S\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(NIC\\)notisABP6388\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"366832\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)1604275\\\"\")", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue); + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(CStRLIN\\)NYCX1604275S\" OR identifiers =/@identifierTypeId=%s \"\\(NIC\\)notisABP6388\" OR identifiers =/@identifierTypeId=%s \"366832\" OR identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)1604275\"", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue); + assertEquals(expectedCQLQuery, result.getCql()); + } + + @Test + public void shouldBuildQueryWhere_IdentifierMatching_WithListValue_ContainsCriterion() { + // given + ListValue value = ListValue.of(Arrays.asList( + "(OCoLC)1349275037", + "9924655804502931", + "in00022912564" + )); + String identifierTypeFieldValue = "439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef"; + MatchDetail matchDetail = new MatchDetail() + .withMatchCriterion(EXISTING_VALUE_CONTAINS_INCOMING_VALUE) + .withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC) + .withExistingRecordType(EntityType.INSTANCE) + .withIncomingMatchExpression(new MatchExpression() + .withDataValueType(VALUE_FROM_RECORD) + .withFields(Arrays.asList( + new Field().withLabel("field").withValue("035"), + new Field().withLabel("indicator1").withValue(""), + new Field().withLabel("indicator2").withValue(""), + new Field().withLabel("recordSubfield").withValue("a")) + )) + .withExistingMatchExpression(new MatchExpression() + .withDataValueType(VALUE_FROM_RECORD) + .withFields(Arrays.asList( + new Field().withLabel("field").withValue("instance.identifiers[].value"), + new Field().withLabel("identifierTypeId").withValue(identifierTypeFieldValue)) + )); + + // when + LoadQuery result = LoadQueryBuilder.build(value, matchDetail); + + // then + assertNotNull(result); + assertEquals(StringUtils.EMPTY, result.getSql()); + // For EXISTING_VALUE_CONTAINS_INCOMING_VALUE with identifiers, the CQL should use wildcard matching + String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"*\\(OCoLC\\)1349275037*\" OR identifiers =/@identifierTypeId=%s \"*9924655804502931*\" OR identifiers =/@identifierTypeId=%s \"*in00022912564*\"", + identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue); assertEquals(expectedCQLQuery, result.getCql()); }