Skip to content

Commit 0afa4ea

Browse files
authoredJul 13, 2022
Move away from Node.js built-ins
This makes it possible to easily build this project using tools such as Webpack 5. Saxes does not support lenient parsing mode so the strict option is now mandatory in this change.
1 parent a0cba55 commit 0afa4ea

File tree

7 files changed

+638
-85
lines changed

7 files changed

+638
-85
lines changed
 

‎.github/workflows/ci.yml

+15-1
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,18 @@ jobs:
8989
run: yarn install
9090
- name: Build project
9191
run: yarn run build
92-
- run: yarn run spec
92+
- run: yarn run spec
93+
94+
webpack:
95+
runs-on: ubuntu-latest
96+
steps:
97+
- uses: actions/checkout@v3
98+
- uses: actions/cache@v3
99+
with:
100+
path: '**/node_modules'
101+
key: ${{ runner.os }}-webpack-modules-${{ hashFiles('**/yarn.lock') }}
102+
- uses: actions/setup-node@v3
103+
with:
104+
node-version: 18.x
105+
- run: yarn install
106+
- run: npx webpack

‎lib/ParseError.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {SAXParser} from "sax";
1+
import {SaxesParser} from "saxes";
22
import {RdfXmlParser} from "./RdfXmlParser";
33

44
/**
@@ -7,8 +7,8 @@ import {RdfXmlParser} from "./RdfXmlParser";
77
export class ParseError extends Error {
88

99
constructor(parser: RdfXmlParser, message: string) {
10-
const saxParser: SAXParser = (<any> (<any> parser).saxStream)._parser;
11-
super(parser.trackPosition ? `Line ${saxParser.line + 1} column ${saxParser.column + 1}: ${message}` : message);
10+
const saxParser: SaxesParser = (<any>parser).saxParser;
11+
super(parser.trackPosition ? `Line ${saxParser.line} column ${saxParser.column + 1}: ${message}` : message);
1212
}
1313

1414
}

‎lib/RdfXmlParser.ts

+23-33
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as RDF from "@rdfjs/types";
22
import {resolve} from "relative-to-absolute-iri";
3-
import {createStream, SAXStream, Tag} from "sax";
4-
import {PassThrough, Transform, TransformCallback} from "stream";
3+
import {SaxesParser, SaxesTagPlain} from "saxes";
4+
import {PassThrough, Transform} from "readable-stream";
55
import EventEmitter = NodeJS.EventEmitter;
66
import {ParseError} from "./ParseError";
77
import {DataFactory} from "rdf-data-factory";
@@ -52,9 +52,8 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
5252
private readonly dataFactory: RDF.DataFactory;
5353
private readonly baseIRI: string;
5454
private readonly defaultGraph?: RDF.Quad_Graph;
55-
private readonly strict?: boolean;
5655
private readonly allowDuplicateRdfIds?: boolean;
57-
private readonly saxStream: SAXStream;
56+
private readonly saxParser: SaxesParser;
5857

5958
private readonly activeTagStack: IActiveTag[] = [];
6059
private readonly nodeIds: {[id: string]: boolean} = {};
@@ -76,25 +75,20 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
7675
this.defaultGraph = this.dataFactory.defaultGraph();
7776
}
7877

79-
this.saxStream = createStream(this.strict, { xmlns: false, position: this.trackPosition });
80-
81-
// Workaround for an issue in SAX where non-strict mode either lower- or upper-cases all tags.
82-
if (!this.strict) {
83-
(<any> this.saxStream)._parser.looseCase = 'toString';
84-
}
78+
this.saxParser = new SaxesParser({ xmlns: false, position: this.trackPosition });
8579

8680
this.attachSaxListeners();
8781
}
8882

8983
/**
9084
* Parse the namespace of the given tag,
9185
* and take into account the namespace of the parent tag that was already parsed.
92-
* @param {Tag} tag A tag to parse the namespace from.
86+
* @param {SaxesTagPlain} tag A tag to parse the namespace from.
9387
* @param {IActiveTag} parentTag The parent tag, or null if this tag is the root.
9488
* @return {{[p: string]: string}[]} An array of namespaces,
9589
* where the last ones have a priority over the first ones.
9690
*/
97-
public static parseNamespace(tag: Tag, parentTag?: IActiveTag): {[prefix: string]: string}[] {
91+
public static parseNamespace(tag: SaxesTagPlain, parentTag?: IActiveTag): {[prefix: string]: string}[] {
9892
const thisNs: {[prefix: string]: string} = {};
9993
let hasNs: boolean = false;
10094
for (const attributeKey in tag.attributes) {
@@ -184,9 +178,9 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
184178
return parsed;
185179
}
186180

187-
public _transform(chunk: any, encoding: BufferEncoding, callback: TransformCallback) {
181+
public _transform(chunk: any, encoding: BufferEncoding, callback: (error?: Error | null, data?: any) => void) {
188182
try {
189-
this.saxStream.write(chunk, encoding);
183+
this.saxParser.write(chunk);
190184
} catch (e) {
191185
return callback(e);
192186
}
@@ -244,18 +238,18 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
244238
}
245239

246240
protected attachSaxListeners() {
247-
this.saxStream.on('error', (error) => this.emit('error', error));
248-
this.saxStream.on('opentag', this.onTag.bind(this));
249-
this.saxStream.on('text', this.onText.bind(this));
250-
this.saxStream.on('closetag', this.onCloseTag.bind(this));
251-
this.saxStream.on('doctype', this.onDoctype.bind(this));
241+
this.saxParser.on('error', (error) => this.emit('error', error));
242+
this.saxParser.on('opentag', this.onTag.bind(this));
243+
this.saxParser.on('text', this.onText.bind(this));
244+
this.saxParser.on('closetag', this.onCloseTag.bind(this));
245+
this.saxParser.on('doctype', this.onDoctype.bind(this));
252246
}
253247

254248
/**
255249
* Handle the given tag.
256-
* @param {QualifiedTag} tag A SAX tag.
250+
* @param {SaxesTagPlain} tag A SAX tag.
257251
*/
258-
protected onTag(tag: Tag) {
252+
protected onTag(tag: SaxesTagPlain) {
259253
// Get parent tag
260254
const parentTag: IActiveTag = this.activeTagStack.length
261255
? this.activeTagStack[this.activeTagStack.length - 1] : null;
@@ -306,12 +300,12 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
306300

307301
/**
308302
* Handle the given node element in resource-mode.
309-
* @param {QualifiedTag} tag A SAX tag.
303+
* @param {SaxesTagPlain} tag A SAX tag.
310304
* @param {IActiveTag} activeTag The currently active tag.
311305
* @param {IActiveTag} parentTag The parent tag or null.
312306
* @param {boolean} rootTag If we are currently processing the root tag.
313307
*/
314-
protected onTagResource(tag: Tag, activeTag: IActiveTag, parentTag: IActiveTag, rootTag: boolean) {
308+
protected onTagResource(tag: SaxesTagPlain, activeTag: IActiveTag, parentTag: IActiveTag, rootTag: boolean) {
315309
const tagExpanded: IExpandedPrefix = RdfXmlParser.expandPrefixedTerm(tag.name, activeTag.ns, this);
316310

317311
activeTag.childrenParseType = ParseType.PROPERTY;
@@ -342,7 +336,7 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
342336
let subjectValueBlank: boolean = false;
343337
let explicitType: string = null;
344338
for (const attributeKey in tag.attributes) {
345-
const attributeValue: string = tag.attributes[attributeKey];
339+
const attributeValue = tag.attributes[attributeKey];
346340
const attributeKeyExpanded: IExpandedPrefix = RdfXmlParser.expandPrefixedTerm(attributeKey, activeTag.ns, this);
347341
if (parentTag && attributeKeyExpanded.uri === RdfXmlParser.RDF) {
348342
switch (attributeKeyExpanded.local) {
@@ -475,11 +469,11 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
475469

476470
/**
477471
* Handle the given property element in property-mode.
478-
* @param {QualifiedTag} tag A SAX tag.
472+
* @param {SaxesTagPlain} tag A SAX tag.
479473
* @param {IActiveTag} activeTag The currently active tag.
480474
* @param {IActiveTag} parentTag The parent tag or null.
481475
*/
482-
protected onTagProperty(tag: Tag, activeTag: IActiveTag, parentTag: IActiveTag) {
476+
protected onTagProperty(tag: SaxesTagPlain, activeTag: IActiveTag, parentTag: IActiveTag) {
483477
const tagExpanded: IExpandedPrefix = RdfXmlParser.expandPrefixedTerm(tag.name, activeTag.ns, this);
484478

485479
activeTag.childrenParseType = ParseType.RESOURCE;
@@ -508,11 +502,11 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
508502
// Collect all attributes as triples
509503
// Assign subject value only after all attributes have been processed, because baseIRI may change the final val
510504
let activeSubSubjectValue: string = null;
511-
let subSubjectValueBlank: boolean = true;
505+
let subSubjectValueBlank = true;
512506
const predicates: RDF.NamedNode[] = [];
513507
const objects: (RDF.NamedNode | RDF.BlankNode | RDF.Literal)[] = [];
514508
for (const propertyAttributeKey in tag.attributes) {
515-
const propertyAttributeValue: string = tag.attributes[propertyAttributeKey];
509+
const propertyAttributeValue = tag.attributes[propertyAttributeKey];
516510
const propertyAttributeKeyExpanded: IExpandedPrefix = RdfXmlParser
517511
.expandPrefixedTerm(propertyAttributeKey, activeTag.ns, this);
518512
if (propertyAttributeKeyExpanded.uri === RdfXmlParser.RDF) {
@@ -755,7 +749,7 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
755749
*/
756750
protected onDoctype(doctype: string) {
757751
doctype.replace(/<!ENTITY\s+([^\s]+)\s+["']([^"']+)["']\s*>/g, (match, prefix, uri) => {
758-
(<any> this.saxStream)._parser.ENTITIES[prefix] = uri;
752+
this.saxParser.ENTITIES[prefix] = uri;
759753
return '';
760754
});
761755
}
@@ -780,10 +774,6 @@ export interface IRdfXmlParserArgs {
780774
* The default graph for constructing quads.
781775
*/
782776
defaultGraph?: RDF.Term;
783-
/**
784-
* If the internal SAX parser should parse XML in strict mode, and error if it is invalid.
785-
*/
786-
strict?: boolean;
787777
/**
788778
* If the internal position (line, column) should be tracked an emitted in error messages.
789779
*/

‎package.json

+8-2
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,12 @@
4646
"streamify-array": "^1.0.0",
4747
"streamify-string": "^1.0.1",
4848
"ts-jest": "^28.0.5",
49+
"ts-loader": "^9.3.1",
4950
"tslint": "^6.0.0",
5051
"tslint-eslint-rules": "^5.3.1",
51-
"typescript": "^4.3.5"
52+
"typescript": "^4.3.5",
53+
"webpack": "^5.73.0",
54+
"webpack-cli": "^4.10.0"
5255
},
5356
"jest": {
5457
"globals": {
@@ -87,9 +90,12 @@
8790
},
8891
"dependencies": {
8992
"@rdfjs/types": "*",
93+
"@types/readable-stream": "^2.3.13",
94+
"buffer": "^6.0.3",
9095
"rdf-data-factory": "^1.1.0",
9196
"relative-to-absolute-iri": "^1.0.0",
92-
"sax": "^1.2.4"
97+
"readable-stream": "^4.0.0",
98+
"saxes": "^6.0.0"
9399
},
94100
"sideEffects": false
95101
}

‎test/RdfXmlParser-test.ts

+18-34
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import "jest-rdf";
22
import * as RDF from "@rdfjs/types";
3-
import {SAXStream, Tag} from "sax";
3+
import {SaxesParser, SaxesTagPlain} from "saxes";
44
import {PassThrough} from "stream";
55
import {RdfXmlParser} from "../lib/RdfXmlParser";
66
import {DataFactory} from "rdf-data-factory";
@@ -24,8 +24,7 @@ describe('RdfXmlParser', () => {
2424
expect((<any> instance).dataFactory).toBeInstanceOf(DataFactory);
2525
expect((<any> instance).baseIRI).toBe('');
2626
expect((<any> instance).defaultGraph).toBe(DF.defaultGraph());
27-
expect((<any> instance).strict).toBeFalsy();
28-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
27+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
2928
});
3029

3130
it('should be constructable with empty args', () => {
@@ -34,8 +33,7 @@ describe('RdfXmlParser', () => {
3433
expect((<any> instance).dataFactory).toBeInstanceOf(DataFactory);
3534
expect((<any> instance).baseIRI).toBe('');
3635
expect((<any> instance).defaultGraph).toBe(DF.defaultGraph());
37-
expect((<any> instance).strict).toBeFalsy();
38-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
36+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
3937
});
4038

4139
it('should be constructable with args with a custom data factory', () => {
@@ -45,8 +43,7 @@ describe('RdfXmlParser', () => {
4543
expect((<any> instance).dataFactory).toBe(dataFactory);
4644
expect((<any> instance).baseIRI).toBe('');
4745
expect((<any> instance).defaultGraph).toBe('abc');
48-
expect((<any> instance).strict).toBeFalsy();
49-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
46+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
5047
});
5148

5249
it('should be constructable with args with a custom base IRI', () => {
@@ -55,8 +52,7 @@ describe('RdfXmlParser', () => {
5552
expect((<any> instance).dataFactory).toBeInstanceOf(DataFactory);
5653
expect((<any> instance).baseIRI).toEqual('myBaseIRI');
5754
expect((<any> instance).defaultGraph).toBe(DF.defaultGraph());
58-
expect((<any> instance).strict).toBeFalsy();
59-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
55+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
6056
});
6157

6258
it('should be constructable with args with a custom default graph', () => {
@@ -66,35 +62,23 @@ describe('RdfXmlParser', () => {
6662
expect((<any> instance).dataFactory).toBeInstanceOf(DataFactory);
6763
expect((<any> instance).baseIRI).toEqual('');
6864
expect((<any> instance).defaultGraph).toBe(defaultGraph);
69-
expect((<any> instance).strict).toBeFalsy();
70-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
65+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
7166
});
7267

73-
it('should be constructable with args with strict mode', () => {
74-
const instance = new RdfXmlParser({ strict: true });
75-
expect(instance).toBeInstanceOf(RdfXmlParser);
76-
expect((<any> instance).dataFactory).toBeInstanceOf(DataFactory);
77-
expect((<any> instance).baseIRI).toEqual('');
78-
expect((<any> instance).defaultGraph).toBe(DF.defaultGraph());
79-
expect((<any> instance).strict).toBe(true);
80-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
81-
});
82-
83-
it('should be constructable with args with a custom data factory, base IRI, strict and default graph', () => {
68+
it('should be constructable with args with a custom data factory and default graph', () => {
8469
const dataFactory: any = { defaultGraph: () => 'abc' };
8570
const defaultGraph = DF.namedNode('abc');
86-
const instance = new RdfXmlParser({ dataFactory, baseIRI: 'myBaseIRI', defaultGraph, strict: true });
71+
const instance = new RdfXmlParser({ dataFactory, baseIRI: 'myBaseIRI', defaultGraph });
8772
expect(instance).toBeInstanceOf(RdfXmlParser);
8873
expect((<any> instance).dataFactory).toBe(dataFactory);
8974
expect((<any> instance).baseIRI).toEqual('myBaseIRI');
9075
expect((<any> instance).defaultGraph).toBe(defaultGraph);
91-
expect((<any> instance).strict).toBe(true);
92-
expect((<any> instance).saxStream).toBeInstanceOf(SAXStream);
76+
expect((<any> instance).saxParser).toBeInstanceOf(SaxesParser);
9377
});
9478

9579
describe('#parseNamespace', () => {
9680
it('should parse a tag without attributes', () => {
97-
const tag: Tag = { name: 'a', isSelfClosing: false, attributes: {} };
81+
const tag: SaxesTagPlain = { name: 'a', isSelfClosing: false, attributes: {} };
9882
return expect(RdfXmlParser.parseNamespace(tag, null)).toEqual([
9983
{
10084
xml: 'http://www.w3.org/XML/1998/namespace',
@@ -103,7 +87,7 @@ describe('RdfXmlParser', () => {
10387
});
10488

10589
it('should parse a tag with non-xmlns attributes', () => {
106-
const tag: Tag = {
90+
const tag: SaxesTagPlain = {
10791
attributes: {
10892
a: 'b',
10993
c: 'd',
@@ -121,7 +105,7 @@ describe('RdfXmlParser', () => {
121105
});
122106

123107
it('should parse a tag with a default xmlns attribute', () => {
124-
const tag: Tag = {
108+
const tag: SaxesTagPlain = {
125109
attributes: {
126110
xmlns: 'a',
127111
},
@@ -139,7 +123,7 @@ describe('RdfXmlParser', () => {
139123
});
140124

141125
it('should parse a tag with a xmlns attributes', () => {
142-
const tag: Tag = {
126+
const tag: SaxesTagPlain = {
143127
attributes: {
144128
'xmlns:a': '1',
145129
'xmlns:b': '2',
@@ -161,7 +145,7 @@ describe('RdfXmlParser', () => {
161145
});
162146

163147
it('should parse a tag with a xmlns attributes and a parent tag without ns', () => {
164-
const tag: Tag = {
148+
const tag: SaxesTagPlain = {
165149
attributes: {
166150
'xmlns:a': '1',
167151
'xmlns:b': '2',
@@ -183,7 +167,7 @@ describe('RdfXmlParser', () => {
183167
});
184168

185169
it('should parse a tag with a xmlns attributes and a parent tag with ns', () => {
186-
const tag: Tag = {
170+
const tag: SaxesTagPlain = {
187171
attributes: {
188172
'xmlns:a': '1',
189173
'xmlns:b': '2',
@@ -218,7 +202,7 @@ describe('RdfXmlParser', () => {
218202
{ a: 'b' },
219203
];
220204
const parser: any = {
221-
saxStream: {
205+
saxParser: {
222206
_parser: {
223207
column: 2,
224208
line: 1,
@@ -306,7 +290,7 @@ describe('RdfXmlParser', () => {
306290
});
307291

308292
it('should delegate xml errors', () => {
309-
return expect(parse(new RdfXmlParser({ strict: true }), `
293+
return expect(parse(new RdfXmlParser(), `
310294
abc`)).rejects.toBeTruthy();
311295
});
312296

@@ -2157,7 +2141,7 @@ abc`)).rejects.toBeTruthy();
21572141
<rdf:Description rdf:about="http://example.org/">
21582142
<ex:prop1 ex:prop2="abc">
21592143
<rdf:Description rdf:about="http://example.org/2" />
2160-
</ex:prop>
2144+
</ex:prop1>
21612145
</rdf:Description>
21622146
</rdf:RDF>`);
21632147
return expect(array)

0 commit comments

Comments
 (0)
Please sign in to comment.