Skip to content

Commit 503ee05

Browse files
authored
Add tesseract and foreign object rendering (#86)
Signed-off-by: Joshua Li <[email protected]>
1 parent 7b51bd9 commit 503ee05

File tree

11 files changed

+543
-70
lines changed

11 files changed

+543
-70
lines changed

NOTICE.txt

+3
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
OpenSearch (https://opensearch.org/)
22
Copyright OpenSearch Contributors
3+
4+
This product includes software developed by
5+
naptha (https://github.com/naptha/tesseract.js/)

common/tesseract/.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore

package.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"cypress:run": "cypress run",
1515
"cypress:open": "cypress open",
1616
"plugin-helpers": "node ../../scripts/plugin_helpers",
17-
"postinstall": "node ./scripts/patch-html2canvas.js"
17+
"postinstall": "node ./scripts/postinstall.js"
1818
},
1919
"dependencies": {
2020
"babel-polyfill": "^6.26.0",
@@ -38,7 +38,8 @@
3838
"react-router-dom": "^5.3.0",
3939
"react-toast-notifications": "^2.4.0",
4040
"set-interval-async": "1.0.33",
41-
"showdown": "^1.9.1"
41+
"showdown": "^1.9.1",
42+
"tesseract.js": "^4.0.2"
4243
},
4344
"devDependencies": {
4445
"@elastic/eslint-import-resolver-kibana": "link:../../packages/osd-eslint-import-resolver-opensearch-dashboards",

public/components/context_menu/context_menu_ui.js

+27-27
Large diffs are not rendered by default.

public/components/visual_report/assets/report_styles.ts

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ html,
88
body {
99
margin: 0;
1010
padding: 0;
11+
padding-top: 0px;
1112
}
1213
1314
iframe, embed, object {

public/components/visual_report/generate_report.ts

+105-33
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import createDOMPurify from 'dompurify';
77
import html2canvas from 'html2canvas';
88
import jsPDF from 'jspdf';
9+
import { createWorker } from 'tesseract.js';
910
import { v1 as uuidv1 } from 'uuid';
1011
import { ReportSchemaType } from '../../../server/model';
1112
import { uiSettingsService } from '../utils/settings_service';
@@ -57,15 +58,16 @@ const removeNonReportElements = (
5758
reportSource: VISUAL_REPORT_TYPE
5859
) => {
5960
// remove buttons
60-
doc.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)").forEach((e) => e.remove());
61+
doc
62+
.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)")
63+
.forEach((e) => e.remove());
6164
// remove top navBar
6265
doc.querySelectorAll("[class^='euiHeader']").forEach((e) => e.remove());
6366
// remove visualization editor
6467
if (reportSource === VISUAL_REPORT_TYPE.visualization) {
6568
doc.querySelector('[data-test-subj="splitPanelResizer"]')?.remove();
6669
doc.querySelector('.visEditor__collapsibleSidebar')?.remove();
6770
}
68-
doc.body.style.paddingTop = '0px';
6971
};
7072

7173
const addReportHeader = (doc: Document, header: string) => {
@@ -96,8 +98,10 @@ const addReportFooter = (doc: Document, footer: string) => {
9698

9799
const addReportStyle = (doc: Document, style: string) => {
98100
const styleElement = document.createElement('style');
101+
styleElement.className = 'reportInjectedStyles';
99102
styleElement.innerHTML = style;
100103
doc.getElementsByTagName('head')[0].appendChild(styleElement);
104+
doc.body.style.paddingTop = '0px';
101105
};
102106

103107
const computeHeight = (height: number, header: string, footer: string) => {
@@ -115,6 +119,7 @@ const computeHeight = (height: number, header: string, footer: string) => {
115119

116120
export const generateReport = async (id: string, forceDelay = 15000) => {
117121
const http = uiSettingsService.getHttpClient();
122+
const useForeignObjectRendering = uiSettingsService.get('reporting:useFOR');
118123
const DOMPurify = createDOMPurify(window);
119124

120125
const report = await http.get<ReportSchemaType>(
@@ -154,6 +159,26 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
154159
}
155160
await timeout(forceDelay);
156161

162+
// Style changes onclone does not work with foreign object rendering enabled.
163+
// Additionally increase span width to prevent text being truncated
164+
if (useForeignObjectRendering) {
165+
document
166+
.querySelectorAll<HTMLSpanElement>('span:not([data-html2canvas-ignore])')
167+
.forEach((el) => {
168+
if (!el.closest('.globalFilterItem'))
169+
el.style.width = el.offsetWidth + 30 + 'px';
170+
});
171+
document
172+
.querySelectorAll<HTMLSpanElement>(
173+
'span.globalFilterItem:not([data-html2canvas-ignore])'
174+
)
175+
.forEach((el) => (el.style.width = el.offsetWidth + 5 + 'px'));
176+
addReportHeader(document, header);
177+
addReportFooter(document, footer);
178+
addReportStyle(document, reportingStyle);
179+
await timeout(1000);
180+
}
181+
157182
const width = document.documentElement.scrollWidth;
158183
const height = computeHeight(
159184
document.documentElement.scrollHeight,
@@ -170,40 +195,87 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
170195
imageTimeout: 30000,
171196
useCORS: true,
172197
removeContainer: false,
198+
allowTaint: true,
199+
foreignObjectRendering: useForeignObjectRendering,
173200
onclone: function (documentClone) {
174201
removeNonReportElements(documentClone, reportSource);
175-
addReportHeader(documentClone, header);
176-
addReportFooter(documentClone, footer);
177-
addReportStyle(documentClone, reportingStyle);
202+
if (!useForeignObjectRendering) {
203+
addReportHeader(documentClone, header);
204+
addReportFooter(documentClone, footer);
205+
addReportStyle(documentClone, reportingStyle);
206+
}
178207
},
179-
}).then(function (canvas) {
180-
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
181-
document
182-
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
183-
.forEach((e) => {
184-
const iframe = e.contentWindow;
185-
if (e) {
186-
e.src = 'about:blank';
187-
if (iframe) {
188-
iframe.document.write('');
189-
iframe.document.clear();
190-
iframe.close();
208+
})
209+
.then(async function (canvas) {
210+
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
211+
document
212+
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
213+
.forEach((e) => {
214+
const iframe = e.contentWindow;
215+
if (e) {
216+
e.src = 'about:blank';
217+
if (iframe) {
218+
iframe.document.write('');
219+
iframe.document.clear();
220+
iframe.close();
221+
}
222+
e.remove();
191223
}
192-
e.remove();
193-
}
194-
});
224+
});
195225

196-
if (format === 'png') {
197-
const link = document.createElement('a');
198-
link.download = fileName;
199-
link.href = canvas.toDataURL();
200-
link.click();
201-
} else {
202-
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
203-
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
204-
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
205-
pdf.save(fileName);
206-
}
207-
return true;
208-
});
226+
if (format === 'png') {
227+
const link = document.createElement('a');
228+
link.download = fileName;
229+
link.href = canvas.toDataURL();
230+
link.click();
231+
} else if (uiSettingsService.get('reporting:useOcr')) {
232+
const worker = await createWorker({
233+
workerPath: '../api/reporting/tesseract.js/dist/worker.min.js',
234+
langPath: '../api/reporting/tesseract-lang-data',
235+
corePath: '../api/reporting/tesseract.js-core/tesseract-core.wasm.js',
236+
});
237+
await worker.loadLanguage('eng');
238+
await worker.initialize('eng');
239+
const {
240+
data: { text, pdf },
241+
} = await worker
242+
.recognize(canvas.toDataURL(), { pdfTitle: fileName }, { pdf: true })
243+
.catch((e) => console.error('recognize', e));
244+
await worker.terminate();
245+
246+
const blob = new Blob([new Uint8Array(pdf)], {
247+
type: 'application/pdf',
248+
});
249+
const link = document.createElement('a');
250+
if (link.download !== undefined) {
251+
const url = URL.createObjectURL(blob);
252+
link.setAttribute('href', url);
253+
link.setAttribute('download', fileName);
254+
link.style.visibility = 'hidden';
255+
document.body.appendChild(link);
256+
link.click();
257+
document.body.removeChild(link);
258+
}
259+
} else {
260+
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
261+
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
262+
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
263+
pdf.save(fileName);
264+
}
265+
return true;
266+
})
267+
.finally(() => {
268+
if (useForeignObjectRendering) {
269+
document
270+
.querySelectorAll<HTMLSpanElement>(
271+
'span:not(.data-html2canvas-ignore)'
272+
)
273+
.forEach((el) => (el.style.width = ''));
274+
document.querySelectorAll('.reportWrapper').forEach((e) => e.remove());
275+
document
276+
.querySelectorAll('.reportInjectedStyles')
277+
.forEach((e) => e.remove());
278+
document.body.style.paddingTop = '';
279+
}
280+
});
209281
};

scripts/patch-html2canvas.js scripts/postinstall.js

+15
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
// @ts-check
77
// workaround for Safari support before https://github.com/niklasvh/html2canvas/pull/2911 is merged
8+
const https = require('https');
9+
const fs = require('fs');
810
const replace = require('replace-in-file');
911

1012
const options = {
@@ -31,3 +33,16 @@ try {
3133
error
3234
);
3335
}
36+
37+
// download tesseract model
38+
const modelFile = fs.createWriteStream(__dirname + '/../common/tesseract/eng.traineddata.gz');
39+
https.get(
40+
'https://raw.githubusercontent.com/naptha/tessdata/gh-pages/4.0.0_best/eng.traineddata.gz',
41+
function (response) {
42+
response.pipe(modelFile);
43+
modelFile.on('finish', () => {
44+
modelFile.close();
45+
console.log('Downloaded eng.traineddata.gz for tesseract.js');
46+
});
47+
}
48+
);

server/plugin.ts

+25-7
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,24 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6+
import { schema } from '@osd/config-schema';
67
import {
7-
PluginInitializerContext,
88
CoreSetup,
99
CoreStart,
10-
Plugin,
11-
Logger,
1210
ILegacyClusterClient,
11+
Logger,
12+
Plugin,
13+
PluginInitializerContext,
1314
} from '../../../src/core/server';
1415
import opensearchReportsPlugin from './backend/opensearch-reports-plugin';
16+
import { NotificationsPlugin } from './clusters/notificationsPlugin';
17+
import { buildConfig, ReportingConfigType } from './config';
18+
import { ReportingConfig } from './config/config';
19+
import registerRoutes from './routes';
1520
import {
1621
ReportsDashboardsPluginSetup,
1722
ReportsDashboardsPluginStart,
1823
} from './types';
19-
import registerRoutes from './routes';
20-
import { NotificationsPlugin } from './clusters/notificationsPlugin';
21-
import { buildConfig, ReportingConfigType } from './config';
22-
import { ReportingConfig } from './config/config';
2324

2425
export interface ReportsPluginRequestContext {
2526
logger: Logger;
@@ -49,6 +50,23 @@ export class ReportsDashboardsPlugin
4950
public async setup(core: CoreSetup) {
5051
this.logger.debug('reports-dashboards: Setup');
5152

53+
core.uiSettings.register({
54+
'reporting:useOcr': {
55+
name: 'Reporting use OCR on PDF',
56+
value: false,
57+
description:
58+
'Whether to run optical character recognition on PDF reports to make text selectable',
59+
schema: schema.boolean(),
60+
},
61+
'reporting:useFOR': {
62+
name: 'Reporting use ForeignObject rendering',
63+
value: true,
64+
description:
65+
'Whether to use ForeignObject rendering when generating reports. If it causes issues, try disabling this option.',
66+
schema: schema.boolean(),
67+
},
68+
});
69+
5270
try {
5371
const config = await buildConfig(
5472
this.initializerContext,

server/routes/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import registerReportDefinitionRoute from './reportDefinition';
88
import registerReportSourceRoute from './reportSource';
99
import registerMetricRoute from './metric';
1010
import registerNotificationRoute from './notifications';
11+
import registerTesseractRoute from './tesseract';
1112
import { IRouter } from '../../../../src/core/server';
1213
import { ReportingConfig } from 'server/config/config';
1314

@@ -17,4 +18,5 @@ export default function (router: IRouter, config: ReportingConfig) {
1718
registerReportSourceRoute(router);
1819
registerMetricRoute(router);
1920
registerNotificationRoute(router);
21+
registerTesseractRoute(router);
2022
}

0 commit comments

Comments
 (0)