Skip to content

Commit f0bdc24

Browse files
authored
feat(ingest/dbt): dbt model performance (#9992)
1 parent ef0048e commit f0bdc24

35 files changed

+19911
-236
lines changed

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceRunEventMapper.java

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ public com.linkedin.datahub.graphql.generated.DataProcessRunEvent apply(
4242
if (runEvent.hasResult()) {
4343
result.setResult(DataProcessInstanceRunResultMapper.map(context, runEvent.getResult()));
4444
}
45+
if (runEvent.hasDurationMillis()) {
46+
result.setDurationMillis(runEvent.getDurationMillis());
47+
}
4548

4649
return result;
4750
}

datahub-graphql-core/src/main/resources/entity.graphql

+5
Original file line numberDiff line numberDiff line change
@@ -6471,6 +6471,11 @@ type DataProcessRunEvent implements TimeSeriesAspect {
64716471
The timestamp associated with the run event in milliseconds
64726472
"""
64736473
timestampMillis: Long!
6474+
6475+
"""
6476+
The duration of the run in milliseconds
6477+
"""
6478+
durationMillis: Long
64746479
}
64756480

64766481
"""

datahub-web-react/.eslintrc.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module.exports = {
88
'plugin:vitest/recommended',
99
'prettier',
1010
],
11-
plugins: ['@typescript-eslint'],
11+
plugins: ['@typescript-eslint', 'react-refresh'],
1212
parserOptions: {
1313
ecmaVersion: 2020, // Allows for the parsing of modern ECMAScript features
1414
sourceType: 'module', // Allows for the use of imports
@@ -48,6 +48,7 @@ module.exports = {
4848
],
4949
'vitest/prefer-to-be': 'off',
5050
'@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }],
51+
'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }],
5152
},
5253
settings: {
5354
react: {

datahub-web-react/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
"eslint-plugin-jsx-a11y": "^6.5.1",
125125
"eslint-plugin-react": "^7.28.0",
126126
"eslint-plugin-react-hooks": "^4.3.0",
127+
"eslint-plugin-react-refresh": "^0.4.6",
127128
"eslint-plugin-vitest": "^0.3.17",
128129
"jsdom": "^22.1.0",
129130
"less": "^4.2.0",

datahub-web-react/src/Mocks.tsx

+1-2
Original file line numberDiff line numberDiff line change
@@ -625,8 +625,7 @@ export const dataset3 = {
625625
health: [],
626626
assertions: null,
627627
status: null,
628-
readRuns: null,
629-
writeRuns: null,
628+
runs: null,
630629
testResults: null,
631630
siblings: null,
632631
statsSummary: null,

datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx

+4-7
Original file line numberDiff line numberDiff line change
@@ -178,18 +178,15 @@ export class DatasetEntity implements Entity<Dataset> {
178178
},
179179
},
180180
{
181-
name: 'Operations',
181+
name: 'Runs',
182+
// TODO: Rename this to DatasetRunsTab.
182183
component: OperationsTab,
183184
display: {
184185
visible: (_, dataset: GetDatasetQuery) => {
185-
return (
186-
(dataset?.dataset?.readRuns?.total || 0) + (dataset?.dataset?.writeRuns?.total || 0) > 0
187-
);
186+
return (dataset?.dataset?.runs?.total || 0) > 0;
188187
},
189188
enabled: (_, dataset: GetDatasetQuery) => {
190-
return (
191-
(dataset?.dataset?.readRuns?.total || 0) + (dataset?.dataset?.writeRuns?.total || 0) > 0
192-
);
189+
return (dataset?.dataset?.runs?.total || 0) > 0;
193190
},
194191
},
195192
},

datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx

+82-37
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import { DeliveredProcedureOutlined } from '@ant-design/icons';
2-
import { Button, Pagination, Table, Tooltip, Typography } from 'antd';
3-
import ButtonGroup from 'antd/lib/button/button-group';
2+
import { Pagination, Table, Tooltip, Typography } from 'antd';
43
import React, { useState } from 'react';
54
import styled from 'styled-components';
65

7-
import { useGetDatasetRunsQuery } from '../../../../graphql/dataset.generated';
6+
import { GetDatasetRunsQuery, useGetDatasetRunsQuery } from '../../../../graphql/dataset.generated';
87
import {
98
DataProcessInstanceRunResultType,
109
DataProcessRunStatus,
10+
EntityType,
1111
RelationshipDirection,
1212
} from '../../../../types.generated';
1313
import {
@@ -20,6 +20,8 @@ import { ANTD_GRAY } from '../../shared/constants';
2020
import { useEntityData } from '../../shared/EntityContext';
2121
import LoadingSvg from '../../../../images/datahub-logo-color-loading_pendulum.svg?react';
2222
import { scrollToTop } from '../../../shared/searchUtils';
23+
import { formatDuration } from '../../../shared/formatDuration';
24+
import { notEmpty } from '../../shared/utils';
2325

2426
const ExternalUrlLink = styled.a`
2527
font-size: 16px;
@@ -32,10 +34,6 @@ const PaginationControlContainer = styled.div`
3234
text-align: center;
3335
`;
3436

35-
const ReadWriteButtonGroup = styled(ButtonGroup)`
36-
padding: 12px;
37-
`;
38-
3937
const LoadingText = styled.div`
4038
margin-top: 18px;
4139
font-size: 12px;
@@ -67,6 +65,12 @@ const columns = [
6765
<Tooltip title={new Date(Number(value)).toUTCString()}>{new Date(Number(value)).toLocaleString()}</Tooltip>
6866
),
6967
},
68+
{
69+
title: 'Duration',
70+
dataIndex: 'duration',
71+
key: 'duration',
72+
render: (durationMs: number) => formatDuration(durationMs),
73+
},
7074
{
7175
title: 'Run ID',
7276
dataIndex: 'name',
@@ -129,14 +133,59 @@ const columns = [
129133
const PAGE_SIZE = 20;
130134

131135
export const OperationsTab = () => {
132-
const { urn } = useEntityData();
136+
const { urn, entityData } = useEntityData();
133137
const [page, setPage] = useState(1);
134-
const [direction, setDirection] = useState(RelationshipDirection.Incoming);
135138

136-
const { loading, data } = useGetDatasetRunsQuery({
137-
variables: { urn, start: (page - 1) * PAGE_SIZE, count: PAGE_SIZE, direction },
139+
// Fetch data across all siblings.
140+
const allUrns = [urn, ...(entityData?.siblings?.siblings || []).map((sibling) => sibling?.urn).filter(notEmpty)];
141+
const loadings: boolean[] = [];
142+
const datas: GetDatasetRunsQuery[] = [];
143+
allUrns.forEach((entityUrn) => {
144+
// Because there's a consistent number and order of the urns,
145+
// this usage of a hook within a loop should be safe.
146+
// eslint-disable-next-line react-hooks/rules-of-hooks
147+
const { loading, data } = useGetDatasetRunsQuery({
148+
variables: {
149+
urn: entityUrn,
150+
start: (page - 1) * PAGE_SIZE,
151+
count: PAGE_SIZE,
152+
direction: RelationshipDirection.Outgoing,
153+
},
154+
});
155+
loadings.push(loading);
156+
if (data) {
157+
datas.push(data);
158+
}
138159
});
139-
const runs = data && data?.dataset?.runs?.runs;
160+
161+
const loading = loadings.some((loadingEntry) => loadingEntry);
162+
163+
// Merge the runs data from all entities.
164+
// If there's more than one entity contributing to the data, then we can't do pagination.
165+
let canPaginate = true;
166+
let dataRuns: NonNullable<GetDatasetRunsQuery['dataset']>['runs'] | undefined;
167+
if (datas.length > 0) {
168+
let numWithRuns = 0;
169+
for (let i = 0; i < datas.length; i++) {
170+
if (datas[i]?.dataset?.runs?.total) {
171+
numWithRuns++;
172+
}
173+
174+
if (dataRuns && dataRuns.runs) {
175+
dataRuns.runs.push(...(datas[i]?.dataset?.runs?.runs || []));
176+
dataRuns.total = (dataRuns.total ?? 0) + (datas[i]?.dataset?.runs?.total ?? 0);
177+
} else {
178+
dataRuns = JSON.parse(JSON.stringify(datas[i]?.dataset?.runs));
179+
}
180+
}
181+
182+
if (numWithRuns > 1) {
183+
canPaginate = false;
184+
}
185+
}
186+
187+
// This also sorts the runs data across all entities.
188+
const runs = dataRuns?.runs?.sort((a, b) => (b?.created?.time ?? 0) - (a?.created?.time ?? 0));
140189

141190
const tableData = runs
142191
?.filter((run) => run)
@@ -145,33 +194,27 @@ export const OperationsTab = () => {
145194
name: run?.name,
146195
status: run?.state?.[0]?.status,
147196
resultType: run?.state?.[0]?.result?.resultType,
197+
duration: run?.state?.[0]?.durationMillis,
148198
inputs: run?.inputs?.relationships.map((relationship) => relationship.entity),
149199
outputs: run?.outputs?.relationships.map((relationship) => relationship.entity),
150200
externalUrl: run?.externalUrl,
151201
parentTemplate: run?.parentTemplate?.relationships?.[0]?.entity,
152202
}));
153203

204+
// If the table contains jobs, we need to show the job-related columns. Otherwise we can simplify the table.
205+
const containsJobs = tableData?.some((run) => run.parentTemplate?.type !== EntityType.Dataset);
206+
const simplifiedColumns = containsJobs
207+
? columns
208+
: columns.filter((column) => !['name', 'inputs', 'outputs'].includes(column.key));
209+
154210
const onChangePage = (newPage: number) => {
155211
scrollToTop();
156212
setPage(newPage);
157213
};
158214

215+
// TODO: Much of this file is duplicated from RunsTab.tsx. We should refactor this to share code.
159216
return (
160217
<>
161-
<ReadWriteButtonGroup>
162-
<Button
163-
type={direction === RelationshipDirection.Incoming ? 'primary' : 'default'}
164-
onClick={() => setDirection(RelationshipDirection.Incoming)}
165-
>
166-
Reads
167-
</Button>
168-
<Button
169-
type={direction === RelationshipDirection.Outgoing ? 'primary' : 'default'}
170-
onClick={() => setDirection(RelationshipDirection.Outgoing)}
171-
>
172-
Writes
173-
</Button>
174-
</ReadWriteButtonGroup>
175218
{loading && (
176219
<LoadingContainer>
177220
<LoadingSvg height={80} width={80} />
@@ -180,17 +223,19 @@ export const OperationsTab = () => {
180223
)}
181224
{!loading && (
182225
<>
183-
<Table dataSource={tableData} columns={columns} pagination={false} />
184-
<PaginationControlContainer>
185-
<Pagination
186-
current={page}
187-
pageSize={PAGE_SIZE}
188-
total={data?.dataset?.runs?.total || 0}
189-
showLessItems
190-
onChange={onChangePage}
191-
showSizeChanger={false}
192-
/>
193-
</PaginationControlContainer>
226+
<Table dataSource={tableData} columns={simplifiedColumns} pagination={false} />
227+
{canPaginate && (
228+
<PaginationControlContainer>
229+
<Pagination
230+
current={page}
231+
pageSize={PAGE_SIZE}
232+
total={dataRuns?.total || 0}
233+
showLessItems
234+
onChange={onChangePage}
235+
showSizeChanger={false}
236+
/>
237+
</PaginationControlContainer>
238+
)}
194239
</>
195240
)}
196241
</>

datahub-web-react/src/app/entity/shared/siblingUtils.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ function cleanHelper(obj, visited) {
2424
if ((v && typeof v === 'object' && !Object.keys(v).length) || v === null || v === undefined || v === '') {
2525
if (Array.isArray(object)) {
2626
object.splice(Number(k), 1);
27-
} else {
27+
} else if (Object.getOwnPropertyDescriptor(object, k)?.configurable) {
28+
// TODO(hsheth2): Not sure why we needed to add the above "configurable" check.
29+
// However, I was getting errors when it was not present in dev mode (but not in prod mode).
2830
delete object[k];
2931
}
3032
}

datahub-web-react/src/graphql/dataProcess.graphql

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ fragment runResults on DataProcessInstanceResult {
1818
nativeResultType
1919
}
2020
timestampMillis
21+
durationMillis
2122
}
2223
inputs: relationships(input: { types: ["Consumes"], direction: OUTGOING, start: 0, count: 20 }) {
2324
...runRelationshipResults

datahub-web-react/src/graphql/dataset.graphql

+1-6
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,7 @@ fragment nonSiblingDatasetFields on Dataset {
121121
status {
122122
removed
123123
}
124-
readRuns: runs(start: 0, count: 20, direction: INCOMING) {
125-
count
126-
start
127-
total
128-
}
129-
writeRuns: runs(start: 0, count: 20, direction: OUTGOING) {
124+
runs: runs(start: 0, count: 20, direction: OUTGOING) {
130125
count
131126
start
132127
total

datahub-web-react/src/setupTests.ts

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ global.matchMedia =
1919

2020
window.location = { ...window.location, replace: () => {} };
2121

22+
// Suppress `Error: Not implemented: window.computedStyle(elt, pseudoElt)`.
23+
// From https://github.com/vitest-dev/vitest/issues/2061
24+
// and https://github.com/NickColley/jest-axe/issues/147#issuecomment-758804533
25+
const { getComputedStyle } = window;
26+
window.getComputedStyle = (elt) => getComputedStyle(elt);
27+
2228
vi.mock('js-cookie', () => ({
2329
default: {
2430
get: () => 'urn:li:corpuser:2',

datahub-web-react/vite.config.ts

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export default defineConfig(({ mode }) => {
2727
};
2828

2929
return {
30+
appType: 'spa',
3031
plugins: [
3132
react(),
3233
svgr(),

datahub-web-react/yarn.lock

+5
Original file line numberDiff line numberDiff line change
@@ -5799,6 +5799,11 @@ eslint-plugin-react-hooks@^4.3.0:
57995799
resolved "https://registry.yarnpkg.com/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.6.0.tgz#4c3e697ad95b77e93f8646aaa1630c1ba607edd3"
58005800
integrity sha512-oFc7Itz9Qxh2x4gNHStv3BqJq54ExXmfC+a1NjAta66IAN87Wu0R/QArgIS9qKzX3dXKPI9H5crl9QchNMY9+g==
58015801

5802+
eslint-plugin-react-refresh@^0.4.6:
5803+
version "0.4.6"
5804+
resolved "https://registry.yarnpkg.com/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.6.tgz#e8e8accab681861baed00c5c12da70267db0936f"
5805+
integrity sha512-NjGXdm7zgcKRkKMua34qVO9doI7VOxZ6ancSvBELJSSoX97jyndXcSoa8XBh69JoB31dNz3EEzlMcizZl7LaMA==
5806+
58025807
eslint-plugin-react@^7.28.0:
58035808
version "7.32.2"
58045809
resolved "https://registry.yarnpkg.com/eslint-plugin-react/-/eslint-plugin-react-7.32.2.tgz#e71f21c7c265ebce01bcbc9d0955170c55571f10"

0 commit comments

Comments
 (0)