diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ingestionrecipes/DisableUrnLowercasingConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ingestionrecipes/DisableUrnLowercasingConfig.java new file mode 100644 index 00000000000000..b4c35422d6e5b4 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ingestionrecipes/DisableUrnLowercasingConfig.java @@ -0,0 +1,24 @@ +package com.linkedin.datahub.upgrade.config.ingestionrecipes; + +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.ingestionrecipes.DisableUrnLowercasing; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class DisableUrnLowercasingConfig { + + @Bean + public NonBlockingSystemUpgrade disableUrnLowercasing( + final OperationContext opContext, + final EntityService entityService, + @Value("${systemUpdate.disableUrnLowercasing.enabled}") final boolean enabled) { + return new DisableUrnLowercasing(opContext, entityService, enabled); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasing.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasing.java new file mode 100644 index 00000000000000..a17a360eb1f0a5 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasing.java @@ -0,0 +1,39 @@ +package com.linkedin.datahub.upgrade.system.ingestionrecipes; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all domain aspects as part of reindexing descriptions This is required to + * fix the analytics for domains + */ +@Slf4j +public class DisableUrnLowercasing implements NonBlockingSystemUpgrade { + + private final List _steps; + + public DisableUrnLowercasing( + @Nonnull OperationContext opContext, EntityService entityService, boolean enabled) { + if (enabled) { + _steps = ImmutableList.of(new DisableUrnLowercasingStep(opContext, entityService)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStep.java new file mode 100644 index 00000000000000..34b6dedd5b8bc0 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStep.java @@ -0,0 +1,162 @@ +package com.linkedin.datahub.upgrade.system.ingestionrecipes; + +import static com.linkedin.metadata.Constants.*; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.ingestion.DataHubIngestionSourceInfo; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DisableUrnLowercasingStep implements UpgradeStep { + + public static final String RECIPE_SOURCE_NODE_NAME = "source"; + public static final String RECIPE_CONFIG_NODE_NAME = "config"; + public static final String CONVERT_URNS_TO_LOWERCASE_CONFIG_NAME = "convert_urns_to_lowercase"; + private static final String UPGRADE_ID = "DisableUrnLowercasingStep"; + public static final String DATAHUB_CLI_EXECUTOR_ID = "__datahub_cli_"; + private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); + public static final List SUPPORTED_TYPES = + Collections.unmodifiableList( + Arrays.asList( + "athena", + "bigquery", + "clickhouse", + "cockroachdb", + "druid", + "hana", + "hive", + "hive_metastore", + "mariadb", + "mysql", + "oracle", + "postgres", + "presto", + "presto", + "redshift", + "teradata", + "trino", + "vertica")); + private final OperationContext opContext; + private final EntityService entityService; + private final ObjectMapper mapper = new ObjectMapper(); + + public DisableUrnLowercasingStep(OperationContext opContext, EntityService entityService) { + this.opContext = opContext; + this.entityService = entityService; + } + + @Override + public String id() { + return "ingestion-recipe-urn-lowercasing-v1"; + } + + @Override + public Function executable() { + return (context) -> { + int start = 0; + int count = 100; + boolean hasMore = true; + + while (hasMore) { + ListResult entities = + this.entityService.listLatestAspects( + this.opContext, + INGESTION_SOURCE_ENTITY_NAME, + INGESTION_INFO_ASPECT_NAME, + start, + count); + if (entities.getValues().size() != entities.getMetadata().getExtraInfos().size()) { + // Bad result -- we should log that we cannot migrate this batch of formInfos. + log.warn( + "Failed to match formInfo aspects with corresponding urns. Found mismatched length between aspects ({})" + + "and metadata ({}) for metadata {}", + entities.getValues().size(), + entities.getMetadata().getExtraInfos().size(), + entities.getMetadata()); + throw new RuntimeException("Failed to match formInfo aspects with corresponding urns"); + } + + int num = 0; + for (RecordTemplate entity : entities.getValues()) { + DataHubIngestionSourceInfo sourceInfo = (DataHubIngestionSourceInfo) entity; + if (!SUPPORTED_TYPES.contains(sourceInfo.getType()) + || DATAHUB_CLI_EXECUTOR_ID.equals(sourceInfo.getConfig().getExecutorId())) { + continue; + } + sourceInfo.getConfig().getRecipe(); + try { + if (updateRecipe(sourceInfo)) { + updateSourceInfo( + sourceInfo, entities.getMetadata().getExtraInfos().get(num).getUrn()); + } + num++; + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + if (entities.getValues().size() < count) { + hasMore = false; + } else { + start += count; + } + } + BootstrapStep.setUpgradeResult(context.opContext(), UPGRADE_ID_URN, entityService); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.SUCCEEDED); + }; + } + + private boolean updateRecipe(DataHubIngestionSourceInfo sourceInfo) + throws JsonProcessingException { + if ((sourceInfo != null)) { + String jsonRecipe = sourceInfo.getConfig().getRecipe(); + JsonNode rootNode = mapper.readTree(jsonRecipe); + JsonNode sourceNode = rootNode.path(RECIPE_SOURCE_NODE_NAME); + if (sourceNode.isObject()) { + JsonNode configNode = sourceNode.path(RECIPE_CONFIG_NODE_NAME); + if (configNode.isObject()) { + if (configNode.get(CONVERT_URNS_TO_LOWERCASE_CONFIG_NAME) == null) { + ((ObjectNode) configNode).put(CONVERT_URNS_TO_LOWERCASE_CONFIG_NAME, false); + sourceInfo.getConfig().setRecipe(mapper.writeValueAsString(rootNode)); + return true; + } + } + } + } + return false; + } + + private void updateSourceInfo(DataHubIngestionSourceInfo sourceInfo, Urn urn) { + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(urn); + proposal.setEntityType(Constants.INGESTION_SOURCE_ENTITY_NAME); + proposal.setAspectName(Constants.INGESTION_INFO_ASPECT_NAME); + proposal.setAspect(GenericRecordUtils.serializeAspect(sourceInfo)); + proposal.setChangeType(ChangeType.UPSERT); + log.info("About to ingest datahub ingetion source metadata {}", proposal); + final AuditStamp auditStamp = opContext.getAuditStamp(); + this.entityService.ingestProposal(this.opContext, proposal, auditStamp, false); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStepTest.java new file mode 100644 index 00000000000000..76698e863633c4 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/ingestionrecipes/DisableUrnLowercasingStepTest.java @@ -0,0 +1,243 @@ +package com.linkedin.datahub.upgrade.system.ingestionrecipes; + +import static com.linkedin.metadata.Constants.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.ingestion.DataHubIngestionSourceConfig; +import com.linkedin.ingestion.DataHubIngestionSourceInfo; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.query.ExtraInfo; +import com.linkedin.metadata.query.ExtraInfoArray; +import com.linkedin.metadata.query.ListResultMetadata; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RetrieverContext; +import java.util.Collections; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +class DisableUrnLowercasingStepTest { + + @Mock private OperationContext mockOpContext; + + @Mock private EntityService mockEntityService; + + @Mock private RetrieverContext mockRetrieverContext; + + @Mock private UpgradeContext upgradeContext; + + private DisableUrnLowercasingStep step; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + step = new DisableUrnLowercasingStep(mockOpContext, mockEntityService); + when(mockOpContext.getRetrieverContext()).thenReturn(mockRetrieverContext); + } + + @Test + public void testDisableUrnLowercasing() throws Exception { + String jsonString = "{\"source\":{\"config\":{\"job_id\":true}}}"; + String expectedJsonString = + "{\"source\":{\"config\":{\"job_id\":true,\"convert_urns_to_lowercase\":false}}}"; + String updatedJsonString = ""; + DataHubIngestionSourceInfo dataHubIngestionSourceInfo = new DataHubIngestionSourceInfo(); + + dataHubIngestionSourceInfo.setType("bigquery"); + dataHubIngestionSourceInfo.setConfig( + new DataHubIngestionSourceConfig().setExecutorId("default").setRecipe(jsonString)); + ListResultMetadata listResultMetadata = new ListResultMetadata(); + ExtraInfo extraInfo = new ExtraInfo(); + extraInfo.setUrn(Urn.createFromString("urn:li:dataset:1")); + listResultMetadata.setExtraInfos(new ExtraInfoArray(Collections.singletonList(extraInfo))); + ListResult listResult = + new ListResult<>( + Collections.singletonList(dataHubIngestionSourceInfo), + listResultMetadata, + 0, + false, + 1, + 1, + 100); + + when(mockEntityService.listLatestAspects( + any(OperationContext.class), + eq(INGESTION_SOURCE_ENTITY_NAME), + eq(INGESTION_INFO_ASPECT_NAME), + eq(0), + eq(100))) + .thenReturn(listResult); + + step.executable().apply(upgradeContext); + ArgumentCaptor mcpArgumentCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + + verify(mockEntityService, times(2)) + .ingestProposal(any(), mcpArgumentCaptor.capture(), any(), eq(false)); + MetadataChangeProposal mcp = mcpArgumentCaptor.getAllValues().get(0); + DataHubIngestionSourceInfo aspect = + GenericRecordUtils.deserializeAspect( + mcp.getAspect().getValue(), + mcp.getAspect().getContentType(), + DataHubIngestionSourceInfo.class); + assertEquals(expectedJsonString, aspect.getConfig().getRecipe()); + } + + @Test + public void testDisableUrnLowercasingShouldNotTouchExistingValue() throws Exception { + String jsonString = + "{\"source\":{\"config\":{\"job_id\":true,\"convert_urns_to_lowercase\":true}}}"; + String expectedJsonString = + "{\"source\":{\"config\":{\"job_id\":true,\"convert_urns_to_lowercase\":true}}}"; + String updatedJsonString = ""; + DataHubIngestionSourceInfo dataHubIngestionSourceInfo = new DataHubIngestionSourceInfo(); + + dataHubIngestionSourceInfo.setType("bigquery"); + dataHubIngestionSourceInfo.setConfig( + new DataHubIngestionSourceConfig().setExecutorId("default").setRecipe(jsonString)); + ListResultMetadata listResultMetadata = new ListResultMetadata(); + ExtraInfo extraInfo = new ExtraInfo(); + extraInfo.setUrn(Urn.createFromString("urn:li:dataset:1")); + listResultMetadata.setExtraInfos(new ExtraInfoArray(Collections.singletonList(extraInfo))); + ListResult listResult = + new ListResult<>( + Collections.singletonList(dataHubIngestionSourceInfo), + listResultMetadata, + 0, + false, + 1, + 1, + 100); + + when(mockEntityService.listLatestAspects( + any(OperationContext.class), + eq(INGESTION_SOURCE_ENTITY_NAME), + eq(INGESTION_INFO_ASPECT_NAME), + eq(0), + eq(100))) + .thenReturn(listResult); + + step.executable().apply(upgradeContext); + verify(mockEntityService, times(1)).ingestProposal(any(), any(), any(), eq(false)); + + // verify(mockEntityService, never()).ingestProposal(any(), any(), any(), anyBoolean()); + } + + @Test + public void testDisableUrnLowercasingShouldNotTouchExistingValueEvenIfItFalse() throws Exception { + String jsonString = + "{\"source\":{\"config\":{\"job_id\":true,\"convert_urns_to_lowercase\":false}}}"; + String expectedJsonString = + "{\"source\":{\"config\":{\"job_id\":true,\"convert_urns_to_lowercase\":false}}}"; + String updatedJsonString = ""; + DataHubIngestionSourceInfo dataHubIngestionSourceInfo = new DataHubIngestionSourceInfo(); + + dataHubIngestionSourceInfo.setType("bigquery"); + dataHubIngestionSourceInfo.setConfig( + new DataHubIngestionSourceConfig().setExecutorId("default").setRecipe(jsonString)); + ListResultMetadata listResultMetadata = new ListResultMetadata(); + ExtraInfo extraInfo = new ExtraInfo(); + extraInfo.setUrn(Urn.createFromString("urn:li:dataset:1")); + listResultMetadata.setExtraInfos(new ExtraInfoArray(Collections.singletonList(extraInfo))); + ListResult listResult = + new ListResult<>( + Collections.singletonList(dataHubIngestionSourceInfo), + listResultMetadata, + 0, + false, + 1, + 1, + 100); + + when(mockEntityService.listLatestAspects( + any(OperationContext.class), + eq(INGESTION_SOURCE_ENTITY_NAME), + eq(INGESTION_INFO_ASPECT_NAME), + eq(0), + eq(100))) + .thenReturn(listResult); + + step.executable().apply(upgradeContext); + verify(mockEntityService, times(1)).ingestProposal(any(), any(), any(), anyBoolean()); + } + + @Test + public void testDisableUrnLowercasingShouldNotTouchUnsupportedRecipeTypes() throws Exception { + String jsonString = "{\"source\":{\"config\":{\"job_id\":true}}}"; + String expectedJsonString = "{\"source\":{\"config\":{\"job_id\":true}}}"; + DataHubIngestionSourceInfo dataHubIngestionSourceInfo = new DataHubIngestionSourceInfo(); + + dataHubIngestionSourceInfo.setType("tableau"); + dataHubIngestionSourceInfo.setConfig( + new DataHubIngestionSourceConfig().setExecutorId("default").setRecipe(jsonString)); + ListResultMetadata listResultMetadata = new ListResultMetadata(); + ExtraInfo extraInfo = new ExtraInfo(); + extraInfo.setUrn(Urn.createFromString("urn:li:dataset:1")); + listResultMetadata.setExtraInfos(new ExtraInfoArray(Collections.singletonList(extraInfo))); + ListResult listResult = + new ListResult<>( + Collections.singletonList(dataHubIngestionSourceInfo), + listResultMetadata, + 0, + false, + 1, + 1, + 100); + + when(mockEntityService.listLatestAspects( + any(OperationContext.class), + eq(INGESTION_SOURCE_ENTITY_NAME), + eq(INGESTION_INFO_ASPECT_NAME), + eq(0), + eq(100))) + .thenReturn(listResult); + + step.executable().apply(upgradeContext); + verify(mockEntityService, times(1)).ingestProposal(any(), any(), any(), anyBoolean()); + } + + @Test + public void testDisableUrnLowercasingShouldNotTouchCliExecutor() throws Exception { + String jsonString = "{\"source\":{\"config\":{\"job_id\":true}}}"; + String expectedJsonString = "{\"source\":{\"config\":{\"job_id\":true}}}"; + DataHubIngestionSourceInfo dataHubIngestionSourceInfo = new DataHubIngestionSourceInfo(); + + dataHubIngestionSourceInfo.setType("bigquery"); + dataHubIngestionSourceInfo.setConfig( + new DataHubIngestionSourceConfig().setExecutorId("__datahub_cli_").setRecipe(jsonString)); + ListResultMetadata listResultMetadata = new ListResultMetadata(); + ExtraInfo extraInfo = new ExtraInfo(); + extraInfo.setUrn(Urn.createFromString("urn:li:dataset:1")); + listResultMetadata.setExtraInfos(new ExtraInfoArray(Collections.singletonList(extraInfo))); + ListResult listResult = + new ListResult<>( + Collections.singletonList(dataHubIngestionSourceInfo), + listResultMetadata, + 0, + false, + 1, + 1, + 100); + + when(mockEntityService.listLatestAspects( + any(OperationContext.class), + eq(INGESTION_SOURCE_ENTITY_NAME), + eq(INGESTION_INFO_ASPECT_NAME), + eq(0), + eq(100))) + .thenReturn(listResult); + + step.executable().apply(upgradeContext); + verify(mockEntityService, times(1)).ingestProposal(any(), any(), any(), anyBoolean()); + } +} diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index 52df7172e13471..5c4f029f3d31f5 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -5,7 +5,7 @@ "displayName": "BigQuery", "description": "Import Projects, Datasets, Tables, Views, lineage, queries, and statistics from BigQuery.", "docsUrl": "https://datahubproject.io/docs/quick-ingestion-guides/bigquery/overview", - "recipe": "source:\n type: bigquery\n config:\n include_table_lineage: true\n include_usage_statistics: true\n include_tables: true\n include_views: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: bigquery\n config:\n include_table_lineage: true\n include_usage_statistics: true\n include_tables: true\n include_views: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:redshift", @@ -13,7 +13,7 @@ "displayName": "Redshift", "description": "Import Tables, Views, Databases, Schemas, lineage, queries, and statistics from Redshift.", "docsUrl": "https://datahubproject.io/docs/quick-ingestion-guides/redshift/overview", - "recipe": "source: \n type: redshift\n config:\n # Coordinates\n host_port: # Your Redshift host and post, e.g. example.something.us-west-2.redshift.amazonaws.com:5439\n database: # Your Redshift database, e.g. SampleDatabase\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Redshift username, e.g. admin\n\n table_lineage_mode: stl_scan_based\n include_table_lineage: true\n include_tables: true\n include_views: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: redshift\n config:\n # Coordinates\n host_port: # Your Redshift host and post, e.g. example.something.us-west-2.redshift.amazonaws.com:5439\n database: # Your Redshift database, e.g. SampleDatabase\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Redshift username, e.g. admin\n\n table_lineage_mode: stl_scan_based\n include_table_lineage: true\n include_tables: true\n include_views: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:snowflake", @@ -77,7 +77,7 @@ "displayName": "MySQL", "description": "Import Tables, Views, Databases, Schemas, and statistics from MySQL.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mysql/", - "recipe": "source: \n type: mysql\n config: \n # Coordinates\n host_port: # Your MySQL host and post, e.g. mysql:3306\n database: # Your MySQL database name, e.g. datahub\n \n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your MySQL username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: mysql\n config: \n # Coordinates\n host_port: # Your MySQL host and post, e.g. mysql:3306\n database: # Your MySQL database name, e.g. datahub\n \n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your MySQL username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n convert_urns_to_lowercase: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:postgres", @@ -85,7 +85,7 @@ "displayName": "Postgres", "description": "Import Tables, Views, Databases, Schemas, and statistics from Postgres.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/postgres/", - "recipe": "source: \n type: postgres\n config:\n # Coordinates\n host_port: # Your Postgres host and port, e.g. postgres:5432\n database: # Your Postgres Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Postgres username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: postgres\n config:\n # Coordinates\n host_port: # Your Postgres host and port, e.g. postgres:5432\n database: # Your Postgres Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Postgres username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n convert_urns_to_lowercase: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:kafka", @@ -93,7 +93,7 @@ "displayName": "Kafka", "description": "Import streaming topics from Kafka.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/kafka/", - "recipe": "source:\n type: kafka\n config:\n connection:\n consumer_config:\n security.protocol: \"PLAINTEXT\"\n stateful_ingestion:\n enabled: false" + "recipe": "source:\n type: kafka\n config:\n connection:\n consumer_config:\n security.protocol: \"PLAINTEXT\"\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:hive", @@ -101,7 +101,7 @@ "displayName": "Hive", "description": "Import Tables, Views, Databases, Schemas, and statistics from Hive.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/hive/", - "recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Hive username, e.g. admin\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Hive username, e.g. admin\n convert_urns_to_lowercase: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:presto", @@ -109,7 +109,7 @@ "displayName": "Presto", "description": "Import Tables, Databases, Schemas, and statistics from Presto.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/presto/", - "recipe": "source:\n type: presto\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: presto\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:trino", @@ -117,7 +117,7 @@ "displayName": "Trino", "description": "Import Tables, Databases, Schemas, and statistics from Trino.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/trino/", - "recipe": "source:\n type: trino\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: trino\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:glue", @@ -125,7 +125,7 @@ "displayName": "Glue", "description": "Import Tables, Databases, Jobs, statistics, and lineage to S3 from AWS Glue.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/glue/", - "recipe": "source:\n type: glue\n config:\n # AWS credentials. \n aws_region: # The region for your AWS Glue instance. \n # Add secret in Secrets Tab with relevant names for each variable\n # The access key for your AWS account.\n aws_access_key_id: \"${AWS_ACCESS_KEY_ID}\"\n # The secret key for your AWS account.\n aws_secret_access_key: \"${AWS_SECRET_KEY}\"\n aws_session_token: # The session key for your AWS account. This is only needed when you are using temporary credentials.\n # aws_role: # (Optional) The role to assume (Role chaining supported by using a sorted list).\n\n # Allow / Deny specific databases & tables\n # database_pattern:\n # allow:\n # - \"flights-database\"\n # table_pattern:\n # allow:\n # - \"avro\"" + "recipe": "source:\n type: glue\n config:\n # AWS credentials. \n aws_region: # The region for your AWS Glue instance. \n # Add secret in Secrets Tab with relevant names for each variable\n # The access key for your AWS account.\n aws_access_key_id: \"${AWS_ACCESS_KEY_ID}\"\n # The secret key for your AWS account.\n aws_secret_access_key: \"${AWS_SECRET_KEY}\"\n aws_session_token: # The session key for your AWS account. This is only needed when you are using temporary credentials.\n # aws_role: # (Optional) The role to assume (Role chaining supported by using a sorted list).\n\n # Allow / Deny specific databases & tables\n # database_pattern:\n # allow:\n # - \"flights-database\"\n # table_pattern:\n # allow:\n # - \"avro\"\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mssql", @@ -141,7 +141,7 @@ "displayName": "MariaDB", "description": "Import Tables, Views, Databases, Schemas, and statistics from MariaDB.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mariadb/", - "recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mongodb", @@ -149,7 +149,7 @@ "displayName": "MongoDB", "description": "Import Databases and Collections from MongoDB.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mongodb/", - "recipe": "source:\n type: mongodb\n config:\n # Coordinates\n connect_uri: # Your MongoDB connect URI, e.g. \"mongodb://localhost\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${MONGO_USERNAME}\" # Your MongoDB username, e.g. admin\n password: \"${MONGO_PASSWORD}\" # Your MongoDB password, e.g. password_01\n\n # Options (recommended)\n enableSchemaInference: True\n useRandomSampling: True\n maxSchemaSize: 300" + "recipe": "source:\n type: mongodb\n config:\n # Coordinates\n connect_uri: # Your MongoDB connect URI, e.g. \"mongodb://localhost\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${MONGO_USERNAME}\" # Your MongoDB username, e.g. admin\n password: \"${MONGO_PASSWORD}\" # Your MongoDB password, e.g. password_01\n\n # Options (recommended)\n enableSchemaInference: True\n useRandomSampling: True\n maxSchemaSize: 300\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:dynamodb", @@ -157,7 +157,7 @@ "displayName": "DynamoDB", "description": "Import Tables from DynamoDB.", "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/", - "recipe": "source:\n type: dynamodb\n config:\n platform_instance: \"AWS_ACCOUNT_ID\"\n aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n # If there are items that have most representative fields of the table, users could use the\n # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.\n # For each `region.table`, the list of primary keys can be at most 100.\n # We include these items in addition to the first 100 items in the table when we scan it.\n # include_table_item:\n # region.table_name:\n # [\n # {\n # 'partition_key_name': { 'attribute_type': 'attribute_value' },\n # 'sort_key_name': { 'attribute_type': 'attribute_value' },\n # },\n # ]" + "recipe": "source:\n type: dynamodb\n config:\n platform_instance: \"AWS_ACCOUNT_ID\"\n aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n # If there are items that have most representative fields of the table, users could use the\n # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.\n # For each `region.table`, the list of primary keys can be at most 100.\n # We include these items in addition to the first 100 items in the table when we scan it.\n # include_table_item:\n # region.table_name:\n # [\n # {\n # 'partition_key_name': { 'attribute_type': 'attribute_value' },\n # 'sort_key_name': { 'attribute_type': 'attribute_value' },\n # },\n # ]\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:oracle", @@ -165,7 +165,7 @@ "displayName": "Oracle", "description": "Import Databases, Schemas, Tables, Views, statistics, and lineage from Oracle.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/oracle/", - "recipe": "source: \n type: oracle\n config:\n # Coordinates\n host_port: # Your Oracle host and port, e.g. oracle:5432\n database: # Your Oracle database name, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${ORACLE_USERNAME}\" # Your Oracle username, e.g. admin\n password: \"${ORACLE_PASSWORD}\" # Your Oracle password, e.g. password_01\n\n # Optional service name\n # service_name: # Your service name, e.g. svc # omit database if using this option" + "recipe": "source: \n type: oracle\n config:\n # Coordinates\n host_port: # Your Oracle host and port, e.g. oracle:5432\n database: # Your Oracle database name, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${ORACLE_USERNAME}\" # Your Oracle username, e.g. admin\n password: \"${ORACLE_PASSWORD}\" # Your Oracle password, e.g. password_01\n\n # Optional service name\n # service_name: # Your service name, e.g. svc # omit database if using this option\n convert_urns_to_lowercase: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:superset", @@ -173,7 +173,7 @@ "displayName": "Superset", "description": "Import Charts and Dashboards from Superset", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/superset/", - "recipe": "source:\n type: superset\n config:\n # Coordinates\n connect_uri: http://localhost:8088\n\n # Credentials\n username: user\n password: pass\n provider: ldap" + "recipe": "source:\n type: superset\n config:\n # Coordinates\n connect_uri: http://localhost:8088\n\n # Credentials\n username: user\n password: pass\n provider: ldap\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:athena", @@ -181,7 +181,7 @@ "displayName": "Athena", "description": "Import Schemas, Tables, Views, and lineage to S3 from Athena.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/athena/", - "recipe": "source:\n type: athena\n config:\n # AWS Keys (Optional - Required only if local aws credentials are not set)\n username: aws_access_key_id\n password: aws_secret_access_key\n # Coordinates\n aws_region: my_aws_region\n work_group: primary\n\n # Options\n s3_staging_dir: \"s3://my_staging_athena_results_bucket/results/\"" + "recipe": "source:\n type: athena\n config:\n # AWS Keys (Optional - Required only if local aws credentials are not set)\n username: aws_access_key_id\n password: aws_secret_access_key\n # Coordinates\n aws_region: my_aws_region\n work_group: primary\n convert_urns_to_lowercase: true\n\n # Options\n s3_staging_dir: \"s3://my_staging_athena_results_bucket/results/\"\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:clickhouse", @@ -189,7 +189,7 @@ "displayName": "ClickHouse", "description": "Import Tables, Views, Materialized Views, Dictionaries, statistics, queries, and lineage from ClickHouse.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/clickhouse/", - "recipe": "source:\n type: clickhouse\n config:\n # Coordinates\n host_port: localhost:9000\n\n # Credentials\n username: user\n password: pass\n\n # Options\n platform_instance: DatabaseNameToBeIngested\n\n include_views: true # whether to include views, defaults to True\n include_tables: true # whether to include views, defaults to True\n\nsink:\n # sink configs\n\n#---------------------------------------------------------------------------\n# For the HTTP interface:\n#---------------------------------------------------------------------------\nsource:\n type: clickhouse\n config:\n host_port: localhost:8443\n protocol: https\n\n#---------------------------------------------------------------------------\n# For the Native interface:\n#---------------------------------------------------------------------------\n\nsource:\n type: clickhouse\n config:\n host_port: localhost:9440\n scheme: clickhouse+native\n secure: True" + "recipe": "source:\n type: clickhouse\n config:\n # Coordinates\n host_port: localhost:9000\n\n # Credentials\n username: user\n password: pass\n\n # Options\n platform_instance: DatabaseNameToBeIngested\n convert_urns_to_lowercase: true\n\n include_views: true # whether to include views, defaults to True\n include_tables: true # whether to include views, defaults to True\n\nsink:\n # sink configs\n\n#---------------------------------------------------------------------------\n# For the HTTP interface:\n#---------------------------------------------------------------------------\nsource:\n type: clickhouse\n config:\n host_port: localhost:8443\n protocol: https\n\n#---------------------------------------------------------------------------\n# For the Native interface:\n#---------------------------------------------------------------------------\n\nsource:\n type: clickhouse\n config:\n host_port: localhost:9440\n scheme: clickhouse+native\n secure: True\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:druid", @@ -197,7 +197,7 @@ "displayName": "Druid", "description": "Import Databases, Schemas, Tables, statistics, and lineage from Druid.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/druid/", - "recipe": "source:\n type: druid\n config:\n # Coordinates\n host_port: \"localhost:8082\"\n\n # Credentials\n username: admin\n password: password" + "recipe": "source:\n type: druid\n config:\n # Coordinates\n host_port: \"localhost:8082\"\n convert_urns_to_lowercase: true\n\n # Credentials\n username: admin\n password: password\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mode", @@ -205,7 +205,7 @@ "displayName": "Mode", "description": "Import Reports, Charts, and lineage from Mode.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mode/", - "recipe": "source:\n type: mode\n config:\n # Coordinates\n connect_uri: http://app.mode.com\n\n # Credentials\n token: token\n password: pass\n\n # Options\n workspace: \"datahub\"\n default_schema: \"public\"\n owner_username_instead_of_email: False\n api_options:\n retry_backoff_multiplier: 2\n max_retry_interval: 10\n max_attempts: 5" + "recipe": "source:\n type: mode\n config:\n # Coordinates\n connect_uri: http://app.mode.com\n\n # Credentials\n token: token\n password: pass\n\n # Options\n workspace: \"datahub\"\n default_schema: \"public\"\n owner_username_instead_of_email: False\n api_options:\n retry_backoff_multiplier: 2\n max_retry_interval: 10\n max_attempts: 5\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:metabase", @@ -213,7 +213,7 @@ "displayName": "Metabase", "description": "Import Collections, Dashboards, and Charts from Metabase.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/metabase/", - "recipe": "source:\n type: metabase\n config:\n # Coordinates\n connect_uri:\n\n # Credentials\n username: root\n password: example" + "recipe": "source:\n type: metabase\n config:\n # Coordinates\n connect_uri:\n\n # Credentials\n username: root\n password: example\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mlflow", @@ -221,7 +221,7 @@ "displayName": "MLflow", "description": "Import Registered Models, Model Versions, and Model Stages from MLflow.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mlflow/", - "recipe": "source:\n type: mlflow\n config:\n tracking_uri: tracking_uri" + "recipe": "source:\n type: mlflow\n config:\n tracking_uri: tracking_uri\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:azure-ad", @@ -245,7 +245,7 @@ "displayName": "Vertica", "description": "Import Databases, Schemas, Tables, Views, Projections, statistics, and lineage from Vertica.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/vertica/", - "recipe": "source:\n type: vertica\n config:\n # Coordinates\n host_port: localhost:5433\n # The name of the vertica database\n database: Database_Name\n # Credentials\n username: Vertica_User\n password: Vertica_Password\n\n include_tables: true\n include_views: true\n include_projections: true\n include_models: true\n include_view_lineage: true\n include_projection_lineage: true\n profiling:\n enabled: false\n stateful_ingestion:\n enabled: true " + "recipe": "source:\n type: vertica\n config:\n # Coordinates\n host_port: localhost:5433\n # The name of the vertica database\n database: Database_Name\n # Credentials\n username: Vertica_User\n password: Vertica_Password\n\n include_tables: true\n include_views: true\n include_projections: true\n include_models: true\n include_view_lineage: true\n include_projection_lineage: true\n convert_urns_to_lowercase: true\n profiling:\n enabled: false\n stateful_ingestion:\n enabled: true " }, { "urn": "urn:li:dataPlatform:fivetran", @@ -253,7 +253,7 @@ "displayName": "Fivetran", "description": "Import Connectors, Destinations, Sync Histor, Users, and lineage from FiveTran.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/", - "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n snowflake_destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV" + "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n snowflake_destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:sigma", @@ -261,7 +261,7 @@ "displayName": "Sigma", "description": "Import Workspaces, Workbooks, Pages, Elements, and lineage from Sigma Computing.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/sigma/", - "recipe": "source:\n type: sigma\n config:\n # Coordinates\n api_url: https://aws-api.sigmacomputing.com/v2\n # Coordinates\n client_id: CLIENT_ID\n client_secret: CLIENT_SECRET\n\n # Optional - filter for certain workspace names instead of ingesting everything.\n # workspace_pattern:\n\n # allow:\n # - workspace_name\n ingest_owner: true" + "recipe": "source:\n type: sigma\n config:\n # Coordinates\n api_url: https://aws-api.sigmacomputing.com/v2\n # Coordinates\n client_id: CLIENT_ID\n client_secret: CLIENT_SECRET\n\n # Optional - filter for certain workspace names instead of ingesting everything.\n # workspace_pattern:\n\n # allow:\n # - workspace_name\n ingest_owner: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:qlik-sense", @@ -269,7 +269,7 @@ "displayName": "Qlik Sense", "description": "Import Spaces, Apps, Sheets, Charts, and Datasets from Qlik Sense.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/qlik-sense/", - "recipe": "source:\n type: qlik-sense\n config:\n # Coordinates\n tenant_hostname: https://xyz12xz.us.qlikcloud.com\n # Coordinates\n api_key: QLIK_API_KEY\n\n # Optional - filter for certain space names instead of ingesting everything.\n # space_pattern:\n\n # allow:\n # - space_name\n ingest_owner: true" + "recipe": "source:\n type: qlik-sense\n config:\n # Coordinates\n tenant_hostname: https://xyz12xz.us.qlikcloud.com\n # Coordinates\n api_key: QLIK_API_KEY\n\n # Optional - filter for certain space names instead of ingesting everything.\n # space_pattern:\n\n # allow:\n # - space_name\n ingest_owner: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:cockroachdb", @@ -277,7 +277,7 @@ "displayName": "CockroachDb", "description": "Import Databases, Schemas, Tables, Views, statistics and lineage from CockroachDB.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/cockroachdb/", - "recipe": "source: \n type: cockroachdb\n config:\n # Coordinates\n host_port: # Your CockroachDb host and port, e.g. cockroachdb:5432\n database: # Your CockroachDb Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your CockroachDb username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: cockroachdb\n config:\n # Coordinates\n host_port: # Your CockroachDb host and port, e.g. cockroachdb:5432\n database: # Your CockroachDb Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your CockroachDb username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n convert_urns_to_lowercase: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:csv-enricher", @@ -293,7 +293,7 @@ "displayName": "SAP Analytics Cloud", "description": "Import Stories, Applications and Models from SAP Analytics Cloud.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/sac/", - "recipe": "source:\n type: sac\n config:\n tenant_url: # Your SAP Analytics Cloud tenant URL, e.g. https://company.eu10.sapanalytics.cloud or https://company.eu10.hcs.cloud.sap\n token_url: # The Token URL of your SAP Analytics Cloud tenant, e.g. https://company.eu10.hana.ondemand.com/oauth/token.\n\n # Add secret in Secrets Tab with relevant names for each variable\n client_id: \"${SAC_CLIENT_ID}\" # Your SAP Analytics Cloud client id\n client_secret: \"${SAC_CLIENT_SECRET}\" # Your SAP Analytics Cloud client secret" + "recipe": "source:\n type: sac\n config:\n tenant_url: # Your SAP Analytics Cloud tenant URL, e.g. https://company.eu10.sapanalytics.cloud or https://company.eu10.hcs.cloud.sap\n token_url: # The Token URL of your SAP Analytics Cloud tenant, e.g. https://company.eu10.hana.ondemand.com/oauth/token.\n\n # Add secret in Secrets Tab with relevant names for each variable\n client_id: \"${SAC_CLIENT_ID}\" # Your SAP Analytics Cloud client id\n client_secret: \"${SAC_CLIENT_SECRET}\" # Your SAP Analytics Cloud client secret\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:custom", @@ -316,7 +316,7 @@ "name": "cassandra", "displayName": "CassandraDB", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/cassandra", - "recipe": "source:\n type: cassandra\n config:\n # Credentials for on prem cassandra\n contact_point: localhost\n port: 9042\n username: admin\n password: password\n\n # Or\n # Credentials Astra Cloud\n #cloud_config:\n # secure_connect_bundle: Path to Secure Connect Bundle (.zip)\n # token: Application Token\n\n # Optional Allow / Deny extraction of particular keyspaces.\n keyspace_pattern:\n allow: [.*]\n\n # Optional Allow / Deny extraction of particular tables.\n table_pattern:\n allow: [.*]" + "recipe": "source:\n type: cassandra\n config:\n # Credentials for on prem cassandra\n contact_point: localhost\n port: 9042\n username: admin\n password: password\n\n # Or\n # Credentials Astra Cloud\n #cloud_config:\n # secure_connect_bundle: Path to Secure Connect Bundle (.zip)\n # token: Application Token\n\n # Optional Allow / Deny extraction of particular keyspaces.\n keyspace_pattern:\n allow: [.*]\n\n # Optional Allow / Deny extraction of particular tables.\n table_pattern:\n allow: [.*]\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:iceberg", @@ -324,7 +324,7 @@ "displayName": "Iceberg", "description": "Ingest databases and tables from any Iceberg catalog implementation", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/iceberg", - "recipe": "source:\n type: \"iceberg\"\n config:\n env: dev\n # each thread will open internet connections to fetch manifest files independently, \n # this value needs to be adjusted with ulimit\n processing_threads: 1 \n # a single catalog definition with a form of a dictionary\n catalog: \n demo: # name of the catalog\n type: \"rest\" # other types are available\n uri: \"uri\"\n s3.access-key-id: \"access-key\"\n s3.secret-access-key: \"secret-access-key\"\n s3.region: \"aws-region\"\n profiling:\n enabled: false\n" + "recipe": "source:\n type: \"iceberg\"\n config:\n env: dev\n # each thread will open internet connections to fetch manifest files independently, \n # this value needs to be adjusted with ulimit\n processing_threads: 1 \n # a single catalog definition with a form of a dictionary\n catalog: \n demo: # name of the catalog\n type: \"rest\" # other types are available\n uri: \"uri\"\n s3.access-key-id: \"access-key\"\n s3.secret-access-key: \"secret-access-key\"\n s3.region: \"aws-region\"\n profiling:\n enabled: false\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:neo4j", @@ -332,7 +332,7 @@ "displayName": "Neo4j", "description": "Import Nodes and Relationships from Neo4j.", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/neo4j/", - "recipe": "source:\n type: 'neo4j'\n config:\n uri: 'neo4j+ssc://host:7687'\n username: 'neo4j'\n password: 'password'\n env: 'PROD'\n\nsink:\n type: \"datahub-rest\"\n config:\n server: 'http://localhost:8080'" + "recipe": "source:\n type: 'neo4j'\n config:\n uri: 'neo4j+ssc://host:7687'\n username: 'neo4j'\n password: 'password'\n env: 'PROD'\n\nsink:\n type: \"datahub-rest\"\n config:\n server: 'http://localhost:8080'\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:vertexai", diff --git a/datahub-web-react/src/app/ingest/source/conf/bigquery/bigquery.ts b/datahub-web-react/src/app/ingest/source/conf/bigquery/bigquery.ts index 491c7d65793348..df08ef7a395ca8 100644 --- a/datahub-web-react/src/app/ingest/source/conf/bigquery/bigquery.ts +++ b/datahub-web-react/src/app/ingest/source/conf/bigquery/bigquery.ts @@ -17,6 +17,7 @@ source: include_table_lineage: true include_view_lineage: true + convert_urns_to_lowercase: true profiling: enabled: true stateful_ingestion: diff --git a/datahub-web-react/src/app/ingest/source/conf/glue/glue.ts b/datahub-web-react/src/app/ingest/source/conf/glue/glue.ts index 449f76eea8afb8..6004c33a6ab4a0 100644 --- a/datahub-web-react/src/app/ingest/source/conf/glue/glue.ts +++ b/datahub-web-react/src/app/ingest/source/conf/glue/glue.ts @@ -22,6 +22,8 @@ source: # table_pattern: # allow: # - "avro" + stateful_ingestion: + enabled: true `; const glueConfig: SourceConfig = { diff --git a/datahub-web-react/src/app/ingest/source/conf/hive/hive.ts b/datahub-web-react/src/app/ingest/source/conf/hive/hive.ts index 75bf78e40d8375..b811c196379425 100644 --- a/datahub-web-react/src/app/ingest/source/conf/hive/hive.ts +++ b/datahub-web-react/src/app/ingest/source/conf/hive/hive.ts @@ -13,6 +13,9 @@ source: # Add secret in Secrets Tab with relevant names for each variable username: "\${HIVE_USERNAME}" # Your Hive username, e.g. admin password: "\${HIVE_PASSWORD}"# Your Hive password, e.g. password_01 + + convert_urns_to_lowercase: true + stateful_ingestion: enabled: true `; diff --git a/datahub-web-react/src/app/ingest/source/conf/looker/looker.ts b/datahub-web-react/src/app/ingest/source/conf/looker/looker.ts index 228afc26c7dc2c..2715c9214905f2 100644 --- a/datahub-web-react/src/app/ingest/source/conf/looker/looker.ts +++ b/datahub-web-react/src/app/ingest/source/conf/looker/looker.ts @@ -12,6 +12,8 @@ source: # Add secret in Secrets Tab with relevant names for each variable client_id: "\${LOOKER_CLIENT_ID}" # Your Looker client id, e.g. admin client_secret: "\${LOOKER_CLIENT_SECRET}" # Your Looker password, e.g. password_01 + stateful_ingestion: + enabled: true `; export const LOOKER = 'looker'; diff --git a/datahub-web-react/src/app/ingest/source/conf/mongodb/mongodb.ts b/datahub-web-react/src/app/ingest/source/conf/mongodb/mongodb.ts index f4856adbede8e9..47fbc9cbd10ee2 100644 --- a/datahub-web-react/src/app/ingest/source/conf/mongodb/mongodb.ts +++ b/datahub-web-react/src/app/ingest/source/conf/mongodb/mongodb.ts @@ -17,6 +17,8 @@ source: enableSchemaInference: True useRandomSampling: True maxSchemaSize: 300 + stateful_ingestion: + enabled: true `; const mongoConfig: SourceConfig = { diff --git a/datahub-web-react/src/app/ingest/source/conf/mysql/mysql.ts b/datahub-web-react/src/app/ingest/source/conf/mysql/mysql.ts index 9f90677be6c19d..3ea01cf3809053 100644 --- a/datahub-web-react/src/app/ingest/source/conf/mysql/mysql.ts +++ b/datahub-web-react/src/app/ingest/source/conf/mysql/mysql.ts @@ -17,7 +17,9 @@ source: # Options include_tables: True include_views: True - + + convert_urns_to_lowercase: true + # Profiling profiling: enabled: false diff --git a/datahub-web-react/src/app/ingest/source/conf/postgres/postgres.ts b/datahub-web-react/src/app/ingest/source/conf/postgres/postgres.ts index 301afe817862e1..3c8193e81521d2 100644 --- a/datahub-web-react/src/app/ingest/source/conf/postgres/postgres.ts +++ b/datahub-web-react/src/app/ingest/source/conf/postgres/postgres.ts @@ -17,6 +17,8 @@ source: # Options include_tables: True include_views: True + + convert_urns_to_lowercase: true # Profiling profiling: diff --git a/datahub-web-react/src/app/ingest/source/conf/redshift/redshift.ts b/datahub-web-react/src/app/ingest/source/conf/redshift/redshift.ts index 4a6858a3a1d48b..5140f52173dd32 100644 --- a/datahub-web-react/src/app/ingest/source/conf/redshift/redshift.ts +++ b/datahub-web-react/src/app/ingest/source/conf/redshift/redshift.ts @@ -17,6 +17,9 @@ source: table_lineage_mode: stl_scan_based include_table_lineage: true include_view_lineage: true + + convert_urns_to_lowercase: true + profiling: enabled: true stateful_ingestion: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index c89f45d9069208..dfeaef42da9acf 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -19,6 +19,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next ### Breaking Changes +- #13038: Enabled URN lowercasing by default for SQL-based connectors (Athena, BigQuery, ClickHouse, CockroachDB, Druid, HANA, Hive, Hive Metastore, MariaDB, MySQL, Oracle, PostgreSQL, Presto, Redshift, Teradata, Trino, Vertica) to improve lineage detection and reference integrity, with UI ingestions automatically upgraded through a bootstrap script while other ingestions may need manual configuration changes. - #13004: The `acryl-datahub-airflow-plugin` dropped support for Airflow 2.3 and 2.4. diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 8e41e9fb917878..6bb424c73d5113 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -48,7 +48,7 @@ class DatasetSourceConfigMixin(PlatformInstanceConfigMixin, EnvConfigMixin): class LowerCaseDatasetUrnConfigMixin(ConfigModel): convert_urns_to_lowercase: bool = Field( - default=False, + default=True, description="Whether to convert dataset urns to lowercase.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py index 7749f655070b13..a899814b2724d1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py @@ -147,6 +147,11 @@ class KafkaSourceConfig( description="Enables ingesting schemas from schema registry as separate entities, in addition to the topics", ) + convert_urns_to_lowercase: bool = pydantic.Field( + default=False, + description="Whether to convert dataset urns to lowercase.", + ) + def get_kafka_consumer( connection: KafkaConsumerConnectionConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py index 6c3f7a51294797..974fa26215c16d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py @@ -272,6 +272,11 @@ class UnityCatalogSourceConfig( description="Details about the delta lake, incase to emit siblings", ) + convert_urns_to_lowercase: bool = Field( + default=False, + description="Whether to convert dataset urns to lowercase.", + ) + scheme: str = DATABRICKS def get_sql_alchemy_url(self, database: Optional[str] = None) -> str: diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index ed9f73b76f78fd..dd095024019cf7 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -17,7 +17,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -33,7 +33,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -49,7 +49,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -67,7 +67,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -83,7 +83,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -99,7 +99,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -122,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -138,7 +138,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -154,7 +154,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -172,7 +172,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -193,13 +193,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -209,14 +209,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test1,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -272,13 +272,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test1,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -290,13 +290,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test1,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -315,13 +315,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test2,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -331,14 +331,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test2,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -394,13 +394,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test2,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -412,13 +412,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.test2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.test2,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -437,13 +437,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -453,14 +453,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -519,13 +519,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -537,13 +537,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -555,13 +555,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -580,7 +580,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -596,7 +596,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -619,7 +619,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -635,7 +635,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -651,7 +651,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -669,7 +669,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -690,13 +690,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test3,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -706,14 +706,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test3,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -769,13 +769,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test3,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -787,13 +787,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test3,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -812,13 +812,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test4,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -828,14 +828,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test4,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -891,13 +891,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test4,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -909,13 +909,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.test4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.test4,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -934,13 +934,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -950,14 +950,14 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1016,13 +1016,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1034,13 +1034,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1052,13 +1052,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1077,13 +1077,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1111,7 +1111,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD),mock_column1)" ], "confidenceScore": 0.2, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29" @@ -1123,7 +1123,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD),mock_column2)" ], "confidenceScore": 0.2, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29" @@ -1133,7 +1133,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1144,6 +1144,7 @@ "aspectName": "queryProperties", "aspect": { "json": { + "customProperties": {}, "statement": { "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", "language": "SQL" @@ -1161,7 +1162,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1183,20 +1184,20 @@ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),mock_column2)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD)" }, { - "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),mock_column1)" + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD),mock_column1)" }, { - "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),mock_column2)" + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.view1,PROD),mock_column2)" } ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1212,13 +1213,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1246,7 +1247,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),mock_column1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD),mock_column1)" ], "confidenceScore": 0.2, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29" @@ -1258,7 +1259,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),mock_column2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD),mock_column2)" ], "confidenceScore": 0.2, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29" @@ -1268,7 +1269,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1279,6 +1280,7 @@ "aspectName": "queryProperties", "aspect": { "json": { + "customProperties": {}, "statement": { "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", "language": "SQL" @@ -1296,7 +1298,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1318,20 +1320,20 @@ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),mock_column2)" }, { - "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)" + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD)" }, { - "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),mock_column1)" + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD),mock_column1)" }, { - "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),mock_column2)" + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.view1,PROD),mock_column2)" } ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1347,7 +1349,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1363,7 +1365,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } }, @@ -1379,7 +1381,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-77emm3", + "runId": "oracle-2022_02_03-07_00_00-7ay0kw", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 388275309ae18b..ed148e2b48f9f3 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -462,7 +462,9 @@ systemUpdate: batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_PROPERTY_DEFINITIONS_BATCH_SIZE:500} delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_PROPERTY_DEFINITIONS_DELAY_MS:1000} limit: ${BOOTSTRAP_SYSTEM_UPDATE_PROPERTY_DEFINITIONS_CLL_LIMIT:0} - + disableUrnLowercasing: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DISABLE_URN_LOWERCASING_ENABLED:true} + structuredProperties: enabled: ${ENABLE_STRUCTURED_PROPERTIES_HOOK:true} # applies structured properties mappings writeEnabled: ${ENABLE_STRUCTURED_PROPERTIES_WRITE:true} # write structured property values