Skip to content

improve _source field deduplication logic and enforce data_classifica… #2547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.BooleanNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import lombok.Data;
import lombok.NonNull;
import uk.gov.hmcts.ccd.data.casedetails.search.MetaData;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.StreamSupport;

import static java.util.stream.Collectors.toList;
Expand Down Expand Up @@ -41,7 +44,6 @@ public class ElasticsearchRequest {
for (String metadata : MetaData.CaseField.getColumnNames()) {
METADATA_FIELDS.add(new TextNode(metadata));
}
METADATA_FIELDS.add(new TextNode(DATA_CLASSIFICATION_COL));
}

public ElasticsearchRequest(@NonNull JsonNode searchRequest) {
Expand Down Expand Up @@ -145,7 +147,33 @@ private ArrayNode mergedSourceFields() {
.forEach(sd -> sourceFields.add(new TextNode(SUPPLEMENTARY_DATA_PREFIX + sd.asText())));
}

return sourceFields;
addDataClassificationFields(sourceFields);

return removeDuplicateFields(sourceFields);
}

private void addDataClassificationFields(ArrayNode sourceFields) {
Set<String> classificationFields = new HashSet<>();

for (JsonNode field : sourceFields) {
String fieldText = field.asText();
if (fieldText.equals(DATA_COL) || fieldText.startsWith(DATA_COL + ".")) {
String classificationField = fieldText.replaceFirst(DATA_COL, DATA_CLASSIFICATION_COL);
classificationFields.add(classificationField);
}
}

classificationFields.forEach(field -> sourceFields.add(new TextNode(field)));
}

private ArrayNode removeDuplicateFields(ArrayNode sourceFields) {
Set<String> uniqueFields = new HashSet<>();
sourceFields.forEach(field -> uniqueFields.add(field.asText()));

ArrayNode cleanedSourceFields = JsonNodeFactory.instance.arrayNode();
uniqueFields.forEach(field -> cleanedSourceFields.add(new TextNode(field)));

return cleanedSourceFields;
}

private String getFieldId(String fieldSourceName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.not;
import static uk.gov.hmcts.ccd.data.casedetails.CaseDetailsEntity.DATA_CLASSIFICATION_COL;
import static uk.gov.hmcts.ccd.data.casedetails.CaseDetailsEntity.DATA_COL;
import static uk.gov.hmcts.ccd.data.casedetails.search.MetaData.CaseField.CASE_REFERENCE;
Expand All @@ -31,7 +32,7 @@

class ElasticsearchRequestTest {

private ObjectMapper mapper = new ObjectMapper();
private final ObjectMapper mapper = new ObjectMapper();

@Test
void hasSourceFieldsShouldReturnFalseWhenSourceIsMissing() throws JsonProcessingException {
Expand Down Expand Up @@ -98,7 +99,7 @@ void shouldReturnRequestedFieldsAsCaseFieldIds() throws JsonProcessingException

assertAll(
() -> assertThat(requestedFields.size(), is(4)),
() -> assertThat(requestedFields.get(0), is("CaseDataField")),
() -> assertThat(requestedFields.getFirst(), is("CaseDataField")),
() -> assertThat(requestedFields.get(1), is("[CASE_REFERENCE]")),
() -> assertThat(requestedFields.get(2), is("[STATE]")),
() -> assertThat(requestedFields.get(3), is("OtherCaseDataField"))
Expand All @@ -116,7 +117,7 @@ void shouldOnlyReturnRequestedMetadataFieldsThatExist() throws JsonProcessingExc

assertAll(
() -> assertThat(requestedFields.size(), is(1)),
() -> assertThat(requestedFields.get(0), is("[CASE_REFERENCE]"))
() -> assertThat(requestedFields.getFirst(), is("[CASE_REFERENCE]"))
);
}

Expand Down Expand Up @@ -190,16 +191,15 @@ void shouldSetMetadataFieldsArrayNode() throws Exception {
List<String> resultAsList =
new ObjectMapper().readValue(result.traverse(), new TypeReference<ArrayList<String>>(){});
assertAll(
() -> assertThat(result.size(), is(9)),
() -> assertThat(result.size(), is(8)),
() -> assertThat(resultAsList, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(STATE.getDbColumnName())),
() -> assertThat(resultAsList, hasItem(DATA_CLASSIFICATION_COL))
() -> assertThat(resultAsList, hasItem(STATE.getDbColumnName()))
);
}

Expand All @@ -221,18 +221,156 @@ void shouldSetSourceFieldsWhenSourceIsProvidedInRequest() throws Exception {
assertAll(
() -> assertThat(sourceFields.size(), is(10)),
() -> assertThat(sourceFields, hasItem("data.name")),
() -> assertThat(sourceFields, hasItem("data_classification.name")),
() -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(DATA_CLASSIFICATION_COL))
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName()))
);
}

@Test
void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithMultipleDataFields() throws Exception {
String queryNode = """
{
"_source": [
"data.name",
"data.surname",
"data.city",
"data.country"
],
"query": {}
}""";
ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode));

String result = elasticsearchRequest.toFinalRequest();

JsonNode jsonResult = mapper.readTree(result);
JsonNode sourceNode = jsonResult.get("_source");
List<String> sourceFields =
new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference<ArrayList<String>>(){});

assertAll(
() -> assertThat(sourceFields.size(), is(16)),
() -> assertThat(sourceFields, hasItem("data.name")),
() -> assertThat(sourceFields, hasItem("data_classification.name")),
() -> assertThat(sourceFields, hasItem("data.surname")),
() -> assertThat(sourceFields, hasItem("data_classification.surname")),
() -> assertThat(sourceFields, hasItem("data.city")),
() -> assertThat(sourceFields, hasItem("data_classification.city")),
() -> assertThat(sourceFields, hasItem("data.country")),
() -> assertThat(sourceFields, hasItem("data_classification.country")),
() -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName()))
);
}

@Test
void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithDataAsterisk() throws Exception {
String queryNode = """
{
"_source": [
"data.*"
],
"query": {}
}""";
ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode));

String result = elasticsearchRequest.toFinalRequest();

JsonNode jsonResult = mapper.readTree(result);
JsonNode sourceNode = jsonResult.get("_source");
List<String> sourceFields =
new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference<ArrayList<String>>(){});

assertAll(
() -> assertThat(sourceFields.size(), is(10)),
() -> assertThat(sourceFields, hasItem("data.*")),
() -> assertThat(sourceFields, hasItem("data_classification.*")),
() -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName()))
);
}

@Test
void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithEmptySource() throws Exception {
String queryNode = """
{
"_source": [],
"query": {}
}""";
ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode));

String result = elasticsearchRequest.toFinalRequest();

JsonNode jsonResult = mapper.readTree(result);
JsonNode sourceNode = jsonResult.get("_source");
List<String> sourceFields =
new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference<ArrayList<String>>(){});

assertAll(
() -> assertThat(sourceFields.size(), is(10)),
() -> assertThat(sourceFields, hasItem(DATA_COL)),
() -> assertThat(sourceFields, hasItem(DATA_CLASSIFICATION_COL)),
() -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName()))
);
}

@Test
void shouldSetSourceFieldsWhenSourceIsProvidedInRequestWithoutDataAndNonEmptySource() throws Exception {
String queryNode = """
{
"_source": ["reference"],
"query": {}
}""";
ElasticsearchRequest elasticsearchRequest = new ElasticsearchRequest(queryAsJsonNode(queryNode));

String result = elasticsearchRequest.toFinalRequest();

JsonNode jsonResult = mapper.readTree(result);
JsonNode sourceNode = jsonResult.get("_source");
List<String> sourceFields =
new ObjectMapper().readValue(sourceNode.traverse(), new TypeReference<ArrayList<String>>(){});

assertAll(
() -> assertThat(sourceFields.size(), is(8)),
() -> assertThat(sourceFields, not(hasItem(DATA_COL))),
() -> assertThat(sourceFields, not(hasItem(DATA_CLASSIFICATION_COL))),
() -> assertThat(sourceFields, hasItem(CASE_REFERENCE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_STATE_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CREATED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(CASE_TYPE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(JURISDICTION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(SECURITY_CLASSIFICATION.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(LAST_MODIFIED_DATE.getDbColumnName())),
() -> assertThat(sourceFields, hasItem(STATE.getDbColumnName()))
);
}


@Test
void shouldSetSourceFieldsWhenSourceIsNotProvidedInRequest() throws Exception {
JsonNode queryNode = queryAsJsonNode("{\"query\":{}}");
Expand Down