From 6d4fde89562f1d94ab840a65d5fb30398c580f12 Mon Sep 17 00:00:00 2001 From: kaanaktas Date: Fri, 14 Mar 2025 15:45:00 +0000 Subject: [PATCH] improve _source field deduplication logic and enforce data_classification mapping to match data fields for consistency --- .../elasticsearch/ElasticsearchRequest.java | 32 +++- .../ElasticsearchRequestTest.java | 154 +++++++++++++++++- 2 files changed, 176 insertions(+), 10 deletions(-) diff --git a/src/main/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequest.java b/src/main/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequest.java index ec6204a994..29bd090348 100644 --- a/src/main/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequest.java +++ b/src/main/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequest.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.BooleanNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.TextNode; import lombok.Data; @@ -10,8 +11,10 @@ import uk.gov.hmcts.ccd.data.casedetails.search.MetaData; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.stream.StreamSupport; import static java.util.stream.Collectors.toList; @@ -41,7 +44,6 @@ public class ElasticsearchRequest { for (String metadata : MetaData.CaseField.getColumnNames()) { METADATA_FIELDS.add(new TextNode(metadata)); } - METADATA_FIELDS.add(new TextNode(DATA_CLASSIFICATION_COL)); } public ElasticsearchRequest(@NonNull JsonNode searchRequest) { @@ -145,7 +147,33 @@ private ArrayNode mergedSourceFields() { .forEach(sd -> sourceFields.add(new TextNode(SUPPLEMENTARY_DATA_PREFIX + sd.asText()))); } - return sourceFields; + addDataClassificationFields(sourceFields); + + return removeDuplicateFields(sourceFields); + } + + private void addDataClassificationFields(ArrayNode sourceFields) { + Set classificationFields = new HashSet<>(); + + for (JsonNode field : sourceFields) { + String fieldText = field.asText(); + if (fieldText.equals(DATA_COL) || fieldText.startsWith(DATA_COL + ".")) { + String classificationField = fieldText.replaceFirst(DATA_COL, DATA_CLASSIFICATION_COL); + classificationFields.add(classificationField); + } + } + + classificationFields.forEach(field -> sourceFields.add(new TextNode(field))); + } + + private ArrayNode removeDuplicateFields(ArrayNode sourceFields) { + Set uniqueFields = new HashSet<>(); + sourceFields.forEach(field -> uniqueFields.add(field.asText())); + + ArrayNode cleanedSourceFields = JsonNodeFactory.instance.arrayNode(); + uniqueFields.forEach(field -> cleanedSourceFields.add(new TextNode(field))); + + return cleanedSourceFields; } private String getFieldId(String fieldSourceName) { diff --git a/src/test/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequestTest.java b/src/test/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequestTest.java index 16a7b29f5d..d31128ddac 100644 --- a/src/test/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequestTest.java +++ b/src/test/java/uk/gov/hmcts/ccd/domain/model/search/elasticsearch/ElasticsearchRequestTest.java @@ -18,6 +18,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.not; import static uk.gov.hmcts.ccd.data.casedetails.CaseDetailsEntity.DATA_CLASSIFICATION_COL; import static uk.gov.hmcts.ccd.data.casedetails.CaseDetailsEntity.DATA_COL; import static uk.gov.hmcts.ccd.data.casedetails.search.MetaData.CaseField.CASE_REFERENCE; @@ -31,7 +32,7 @@ class ElasticsearchRequestTest { - private ObjectMapper mapper = new ObjectMapper(); + private final ObjectMapper mapper = new ObjectMapper(); @Test void hasSourceFieldsShouldReturnFalseWhenSourceIsMissing() throws JsonProcessingException { @@ -98,7 +99,7 @@ void shouldReturnRequestedFieldsAsCaseFieldIds() throws JsonProcessingException assertAll( () -> assertThat(requestedFields.size(), is(4)), - () -> assertThat(requestedFields.get(0), is("CaseDataField")), + () -> assertThat(requestedFields.getFirst(), is("CaseDataField")), () -> assertThat(requestedFields.get(1), is("[CASE_REFERENCE]")), () -> assertThat(requestedFields.get(2), is("[STATE]")), () -> assertThat(requestedFields.get(3), is("OtherCaseDataField")) @@ -116,7 +117,7 @@ void shouldOnlyReturnRequestedMetadataFieldsThatExist() throws JsonProcessingExc assertAll( () -> assertThat(requestedFields.size(), is(1)), - () -> assertThat(requestedFields.get(0), is("[CASE_REFERENCE]")) + () -> assertThat(requestedFields.getFirst(), is("[CASE_REFERENCE]")) ); } @@ -190,7 +191,7 @@ void shouldSetMetadataFieldsArrayNode() throws Exception { List resultAsList = new ObjectMapper().readValue(result.traverse(), new TypeReference>(){}); assertAll( - () -> assertThat(result.size(), is(9)), + () -> assertThat(result.size(), is(8)), () -> assertThat(resultAsList, hasItem(CASE_REFERENCE.getDbColumnName())), () -> assertThat(resultAsList, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), () -> assertThat(resultAsList, hasItem(CREATED_DATE.getDbColumnName())), @@ -198,8 +199,7 @@ void shouldSetMetadataFieldsArrayNode() throws Exception { () -> assertThat(resultAsList, hasItem(JURISDICTION.getDbColumnName())), () -> assertThat(resultAsList, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), () -> assertThat(resultAsList, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), - () -> assertThat(resultAsList, hasItem(STATE.getDbColumnName())), - () -> assertThat(resultAsList, hasItem(DATA_CLASSIFICATION_COL)) + () -> assertThat(resultAsList, hasItem(STATE.getDbColumnName())) ); } @@ -221,6 +221,7 @@ void shouldSetSourceFieldsWhenSourceIsProvidedInRequest() throws Exception { assertAll( () -> assertThat(sourceFields.size(), is(10)), () -> assertThat(sourceFields, hasItem("data.name")), + () -> assertThat(sourceFields, hasItem("data_classification.name")), () -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())), () -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), () -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())), @@ -228,11 +229,148 @@ void shouldSetSourceFieldsWhenSourceIsProvidedInRequest() throws Exception { () -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())), () -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), () -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), - () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())), - () -> assertThat(sourceFields, hasItem(DATA_CLASSIFICATION_COL)) + () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())) + ); + } + + @Test + void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithMultipleDataFields() throws Exception { + String queryNode = """ + { + "_source": [ + "data.name", + "data.surname", + "data.city", + "data.country" + ], + "query": {} + }"""; + ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode)); + + String result = elasticsearchRequest.toFinalRequest(); + + JsonNode jsonResult = mapper.readTree(result); + JsonNode sourceNode = jsonResult.get("_source"); + List sourceFields = + new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference>(){}); + + assertAll( + () -> assertThat(sourceFields.size(), is(16)), + () -> assertThat(sourceFields, hasItem("data.name")), + () -> assertThat(sourceFields, hasItem("data_classification.name")), + () -> assertThat(sourceFields, hasItem("data.surname")), + () -> assertThat(sourceFields, hasItem("data_classification.surname")), + () -> assertThat(sourceFields, hasItem("data.city")), + () -> assertThat(sourceFields, hasItem("data_classification.city")), + () -> assertThat(sourceFields, hasItem("data.country")), + () -> assertThat(sourceFields, hasItem("data_classification.country")), + () -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())) + ); + } + + @Test + void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithDataAsterisk() throws Exception { + String queryNode = """ + { + "_source": [ + "data.*" + ], + "query": {} + }"""; + ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode)); + + String result = elasticsearchRequest.toFinalRequest(); + + JsonNode jsonResult = mapper.readTree(result); + JsonNode sourceNode = jsonResult.get("_source"); + List sourceFields = + new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference>(){}); + + assertAll( + () -> assertThat(sourceFields.size(), is(10)), + () -> assertThat(sourceFields, hasItem("data.*")), + () -> assertThat(sourceFields, hasItem("data_classification.*")), + () -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())) ); } + @Test + void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithEmptySource() throws Exception { + String queryNode = """ + { + "_source": [], + "query": {} + }"""; + ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode)); + + String result = elasticsearchRequest.toFinalRequest(); + + JsonNode jsonResult = mapper.readTree(result); + JsonNode sourceNode = jsonResult.get("_source"); + List sourceFields = + new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference>(){}); + + assertAll( + () -> assertThat(sourceFields.size(), is(10)), + () -> assertThat(sourceFields, hasItem(DATA_COL)), + () -> assertThat(sourceFields, hasItem(DATA_CLASSIFICATION_COL)), + () -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())) + ); + } + + @Test + void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithoutDataAndNonEmptySource() throws Exception { + String queryNode = """ + { + "_source": ["reference"], + "query": {} + }"""; + ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode)); + + String result = elasticsearchRequest.toFinalRequest(); + + JsonNode jsonResult = mapper.readTree(result); + JsonNode sourceNode = jsonResult.get("_source"); + List sourceFields = + new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference>(){}); + + assertAll( + () -> assertThat(sourceFields.size(), is(8)), + () -> assertThat(sourceFields, not(hasItem(DATA_COL))), + () -> assertThat(sourceFields, not(hasItem(DATA_CLASSIFICATION_COL))), + () -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())), + () -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())) + ); + } + + @Test void shouldSetSourceFieldsWhenSourceIsNotProvidedInRequest() throws Exception { JsonNode queryNode = queryAsJsonNode("{\"query\":{}}");