Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.folio.processing.matching.loader.query;

import io.vertx.core.json.Json;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.folio.MatchDetail;
import org.folio.processing.value.StringValue;
import org.folio.processing.value.Value;
Expand All @@ -24,10 +27,23 @@ public class LoadQueryBuilder {
private LoadQueryBuilder() {
}

private static final Logger LOGGER = LogManager.getLogger(LoadQueryBuilder.class);
private static final String JSON_PATH_SEPARATOR = ".";
private static final String IDENTIFIER_TYPE_ID = "identifierTypeId";
private static final String IDENTIFIER_TYPE_VALUE = "instance.identifiers[].value";
private static final String IDENTIFIER_INDIVIDUAL_CQL_QUERY = "identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"%s\\\"\"";
/**
* CQL query template to find an instance by a specific identifier.
* <p>
* This query leverages a relation modifier ({@code @}) to efficiently search within the 'identifiers' JSON array.
* <ul>
* <li>{@code @identifierTypeId=%s}: Filters array elements to only include those where the 'identifierTypeId'
* matches the first placeholder.</li>
* <li>{@code "%s"}: The search term (the identifier's value) is then matched against the 'value' subfield
* of the filtered elements.</li>
* </ul>
* This syntax allows PostgreSQL to use the GIN index on the field consistently, improving query performance.
*/
private static final String IDENTIFIER_INDIVIDUAL_CQL_QUERY = "identifiers =/@identifierTypeId=%s \"%s\"";

/**
* Builds LoadQuery,
Expand Down Expand Up @@ -61,11 +77,16 @@ public static LoadQuery build(Value<?> value, MatchDetail matchDetail) {
mainQuery.applyAdditionalCondition(additionalQuery);
// TODO provide all the requirements for MODDATAIMP-592 and refactor code block below
if(checkIfIdentifierTypeExists(matchDetail, fieldPath, additionalField.getLabel())) {
String cqlQuery = buildIdentifierCqlQuery(value, additionalField.getValue());
String cqlQuery = buildIdentifierCqlQuery(value, additionalField.getValue(), matchDetail.getMatchCriterion());
mainQuery.setCqlQuery(cqlQuery);
mainQuery.setSqlQuery(StringUtils.EMPTY);
} else {
LOGGER.debug("LoadQueryBuilder::build - Additional field does not match identifier type criteria: {} fieldPath: {}",
additionalField.getLabel(), fieldPath);
}
}
LOGGER.debug(() -> String.format("LoadQueryBuilder::build - Built LoadQuery for VALUE: ~| %s |~ MATCHDETAIL: ~| %s |~ CQL: ~| %s |~",
Json.encode(value), Json.encode(matchDetail), mainQuery.getCqlQuery()));
return new DefaultJsonLoadQuery(tableName, mainQuery.getSqlQuery(), mainQuery.getCqlQuery());
}
}
Expand All @@ -75,24 +96,34 @@ public static LoadQuery build(Value<?> value, MatchDetail matchDetail) {

private static boolean checkIfIdentifierTypeExists(MatchDetail matchDetail, String fieldPath, String additionalFieldPath) {
return matchDetail.getIncomingRecordType() == EntityType.MARC_BIBLIOGRAPHIC && matchDetail.getExistingRecordType() == EntityType.INSTANCE &&
matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXACTLY_MATCHES && fieldPath.equals(IDENTIFIER_TYPE_VALUE) &&
additionalFieldPath.equals(IDENTIFIER_TYPE_ID);
(matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXACTLY_MATCHES ||
matchDetail.getMatchCriterion() == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) &&
fieldPath.equals(IDENTIFIER_TYPE_VALUE) && additionalFieldPath.equals(IDENTIFIER_TYPE_ID);
}

/**
* Builds CQL query for identifier matching with individual AND conditions for each value
*
* @param value the value to match against (can be STRING or LIST)
* @param identifierTypeId the identifier type ID
* @param matchCriterion the match criterion to determine if wildcards should be applied
* @return CQL query string with individual AND conditions
*/
private static String buildIdentifierCqlQuery(Value<?> value, String identifierTypeId) {
private static String buildIdentifierCqlQuery(Value<?> value, String identifierTypeId, MatchDetail.MatchCriterion matchCriterion) {
if (value.getType() == STRING) {
return String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapeCqlValue(value.getValue().toString()));
String escapedValue = escapeCqlValue(value.getValue().toString());
if (matchCriterion == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) {
escapedValue = "*" + escapedValue + "*";
}
return String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapedValue);
} else if (value.getType() == LIST) {
List<String> conditions = new ArrayList<>();
for (Object val : ((org.folio.processing.value.ListValue) value).getValue()) {
conditions.add("(" + String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapeCqlValue(val.toString())) + ")");
String escapedValue = escapeCqlValue(val.toString());
if (matchCriterion == MatchDetail.MatchCriterion.EXISTING_VALUE_CONTAINS_INCOMING_VALUE) {
escapedValue = "*" + escapedValue + "*";
}
conditions.add(String.format(IDENTIFIER_INDIVIDUAL_CQL_QUERY, identifierTypeId, escapedValue));
}
return String.join(" OR ", conditions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingSt
StringValue value = StringValue.of("ybp7406411");
MatchDetail matchDetail = new MatchDetail()
.withMatchCriterion(EXACTLY_MATCHES)
.withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC)
.withExistingRecordType(EntityType.INSTANCE)
.withExistingMatchExpression(new MatchExpression()
.withDataValueType(VALUE_FROM_RECORD)
.withFields(Arrays.asList(
Expand All @@ -80,11 +82,9 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingSt
LoadQuery result = LoadQueryBuilder.build(value, matchDetail);
//then
assertNotNull(result);
assertNotNull(result.getSql());
String expectedSQLQuery = "CROSS JOIN LATERAL jsonb_array_elements(instance.jsonb -> 'identifiers') fields(field) WHERE field ->> 'value' = 'ybp7406411' AND field ->> 'identifierTypeId' = '439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef'";
assertEquals(expectedSQLQuery, result.getSql());
assertEquals(StringUtils.EMPTY, result.getSql());
assertNotNull(result.getCql());
String expectedCQLQuery = "identifiers=\"\\\"identifierTypeId\\\":\\\"439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef\\\"\" AND (identifiers=\"\\\"value\\\":\\\"ybp7406411\\\"\")";
String expectedCQLQuery = "identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp7406411\"";
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -94,6 +94,8 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi
ListValue value = ListValue.of(Arrays.asList("ybp7406411", "ybp74064123"));
MatchDetail matchDetail = new MatchDetail()
.withMatchCriterion(EXACTLY_MATCHES)
.withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC)
.withExistingRecordType(EntityType.INSTANCE)
.withExistingMatchExpression(new MatchExpression()
.withDataValueType(VALUE_FROM_RECORD)
.withFields(Arrays.asList(
Expand All @@ -104,11 +106,9 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi
LoadQuery result = LoadQueryBuilder.build(value, matchDetail);
//then
assertNotNull(result);
assertNotNull(result.getSql());
String expectedSQLQuery = "CROSS JOIN LATERAL jsonb_array_elements(instance.jsonb -> 'identifiers') fields(field) WHERE (field ->> 'value' = 'ybp7406411' OR field ->> 'value' = 'ybp74064123') AND field ->> 'identifierTypeId' = '439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef'";
assertEquals(expectedSQLQuery, result.getSql());
assertEquals(StringUtils.EMPTY, result.getSql());
assertNotNull(result.getCql());
String expectedCQLQuery = "identifiers=\"\\\"identifierTypeId\\\":\\\"439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef\\\"\" AND (identifiers=\"\\\"value\\\":\\\"ybp7406411\\\"\" OR identifiers=\"\\\"value\\\":\\\"ybp74064123\\\"\")";
String expectedCQLQuery = "identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp7406411\" OR identifiers =/@identifierTypeId=439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef \"ybp74064123\"";
assertEquals(expectedCQLQuery, result.getCql());
}

Expand Down Expand Up @@ -691,7 +691,7 @@ public void shouldBuildQueryWhere_ExistingValueExactlyMatches_MultipleIncomingLi
assertNotEquals(expectedSQLQuery, wrongResult.getSql());
assertNotNull(result.getCql());
assertNotNull(wrongResult.getCql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"%s\\\"\"",identifierTypeFieldValue, value.getValue());
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"%s\"",identifierTypeFieldValue, value.getValue());
assertEquals(expectedCQLQuery, result.getCql());
assertNotEquals(expectedCQLQuery, wrongResult.getCql());
}
Expand All @@ -716,7 +716,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithParenthesesInValue() {
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)1024095011\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)1024095011\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -740,7 +740,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithQuotesInValue() {
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\\"quote\\\"value\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"test\\\"quote\\\"value\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -764,7 +764,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithBackslashesInValue() {
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"path\\\\to\\\\resource\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"path\\\\to\\\\resource\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -788,7 +788,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithWildcardsInValue() {
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\*value\\?\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"test\\*value\\?\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -812,7 +812,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithMultipleSpecialCharacte
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(test\\*\\)\\\\query\\?\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(test\\*\\)\\\\query\\?\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -836,7 +836,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_ListWithSpecialCharacters()
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("(identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)123\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"test\\*value\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"path\\\\file\\\"\")", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)123\" OR identifiers =/@identifierTypeId=%s \"test\\*value\" OR identifiers =/@identifierTypeId=%s \"path\\\\file\"", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -861,7 +861,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_WithApostropheInValue() {
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
// Apostrophes don't need escaping in CQL
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"O'Reilly's Book\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"O'Reilly's Book\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -885,7 +885,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_EmptyValue() {
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand All @@ -910,7 +910,7 @@ public void shouldBuildQueryWhere_IdentifierMatching_PreEscapedValue() {
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
// Should double-escape the already escaped backslashes
String expectedCQLQuery = format("identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"already\\\\\\\\escaped\\\"\"", identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"already\\\\\\\\escaped\"", identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand Down Expand Up @@ -939,7 +939,47 @@ public void shouldBuildQueryWhere_IdentifierMatching_RealWorldExampleFromProblem
//then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
String expectedCQLQuery = format("(identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(CStRLIN\\)NYCX1604275S\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(NIC\\)notisABP6388\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"366832\\\"\") OR (identifiers=\"\\\"identifierTypeId\\\":\\\"%s\\\"\" AND identifiers=\"\\\"value\\\":\\\"\\(OCoLC\\)1604275\\\"\")", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue);
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"\\(CStRLIN\\)NYCX1604275S\" OR identifiers =/@identifierTypeId=%s \"\\(NIC\\)notisABP6388\" OR identifiers =/@identifierTypeId=%s \"366832\" OR identifiers =/@identifierTypeId=%s \"\\(OCoLC\\)1604275\"", identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

@Test
public void shouldBuildQueryWhere_IdentifierMatching_WithListValue_ContainsCriterion() {
// given
ListValue value = ListValue.of(Arrays.asList(
"(OCoLC)1349275037",
"9924655804502931",
"in00022912564"
));
String identifierTypeFieldValue = "439bfbae-75bc-4f74-9fc7-b2a2d47ce3ef";
MatchDetail matchDetail = new MatchDetail()
.withMatchCriterion(EXISTING_VALUE_CONTAINS_INCOMING_VALUE)
.withIncomingRecordType(EntityType.MARC_BIBLIOGRAPHIC)
.withExistingRecordType(EntityType.INSTANCE)
.withIncomingMatchExpression(new MatchExpression()
.withDataValueType(VALUE_FROM_RECORD)
.withFields(Arrays.asList(
new Field().withLabel("field").withValue("035"),
new Field().withLabel("indicator1").withValue(""),
new Field().withLabel("indicator2").withValue(""),
new Field().withLabel("recordSubfield").withValue("a"))
))
.withExistingMatchExpression(new MatchExpression()
.withDataValueType(VALUE_FROM_RECORD)
.withFields(Arrays.asList(
new Field().withLabel("field").withValue("instance.identifiers[].value"),
new Field().withLabel("identifierTypeId").withValue(identifierTypeFieldValue))
));

// when
LoadQuery result = LoadQueryBuilder.build(value, matchDetail);

// then
assertNotNull(result);
assertEquals(StringUtils.EMPTY, result.getSql());
// For EXISTING_VALUE_CONTAINS_INCOMING_VALUE with identifiers, the CQL should use wildcard matching
String expectedCQLQuery = format("identifiers =/@identifierTypeId=%s \"*\\(OCoLC\\)1349275037*\" OR identifiers =/@identifierTypeId=%s \"*9924655804502931*\" OR identifiers =/@identifierTypeId=%s \"*in00022912564*\"",
identifierTypeFieldValue, identifierTypeFieldValue, identifierTypeFieldValue);
assertEquals(expectedCQLQuery, result.getCql());
}

Expand Down
Loading