1
1
import * as RDF from "@rdfjs/types" ;
2
2
import { resolve } from "relative-to-absolute-iri" ;
3
- import { createStream , SAXStream , Tag } from "sax " ;
4
- import { PassThrough , Transform , TransformCallback } from "stream" ;
3
+ import { SaxesParser , SaxesTagPlain } from "saxes " ;
4
+ import { PassThrough , Transform } from "readable- stream" ;
5
5
import EventEmitter = NodeJS . EventEmitter ;
6
6
import { ParseError } from "./ParseError" ;
7
7
import { DataFactory } from "rdf-data-factory" ;
@@ -52,9 +52,8 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
52
52
private readonly dataFactory : RDF . DataFactory ;
53
53
private readonly baseIRI : string ;
54
54
private readonly defaultGraph ?: RDF . Quad_Graph ;
55
- private readonly strict ?: boolean ;
56
55
private readonly allowDuplicateRdfIds ?: boolean ;
57
- private readonly saxStream : SAXStream ;
56
+ private readonly saxParser : SaxesParser ;
58
57
59
58
private readonly activeTagStack : IActiveTag [ ] = [ ] ;
60
59
private readonly nodeIds : { [ id : string ] : boolean } = { } ;
@@ -76,25 +75,20 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
76
75
this . defaultGraph = this . dataFactory . defaultGraph ( ) ;
77
76
}
78
77
79
- this . saxStream = createStream ( this . strict , { xmlns : false , position : this . trackPosition } ) ;
80
-
81
- // Workaround for an issue in SAX where non-strict mode either lower- or upper-cases all tags.
82
- if ( ! this . strict ) {
83
- ( < any > this . saxStream ) . _parser . looseCase = 'toString' ;
84
- }
78
+ this . saxParser = new SaxesParser ( { xmlns : false , position : this . trackPosition } ) ;
85
79
86
80
this . attachSaxListeners ( ) ;
87
81
}
88
82
89
83
/**
90
84
* Parse the namespace of the given tag,
91
85
* and take into account the namespace of the parent tag that was already parsed.
92
- * @param {Tag } tag A tag to parse the namespace from.
86
+ * @param {SaxesTagPlain } tag A tag to parse the namespace from.
93
87
* @param {IActiveTag } parentTag The parent tag, or null if this tag is the root.
94
88
* @return {{[p: string]: string}[] } An array of namespaces,
95
89
* where the last ones have a priority over the first ones.
96
90
*/
97
- public static parseNamespace ( tag : Tag , parentTag ?: IActiveTag ) : { [ prefix : string ] : string } [ ] {
91
+ public static parseNamespace ( tag : SaxesTagPlain , parentTag ?: IActiveTag ) : { [ prefix : string ] : string } [ ] {
98
92
const thisNs : { [ prefix : string ] : string } = { } ;
99
93
let hasNs : boolean = false ;
100
94
for ( const attributeKey in tag . attributes ) {
@@ -184,9 +178,9 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
184
178
return parsed ;
185
179
}
186
180
187
- public _transform ( chunk : any , encoding : BufferEncoding , callback : TransformCallback ) {
181
+ public _transform ( chunk : any , encoding : BufferEncoding , callback : ( error ?: Error | null , data ?: any ) => void ) {
188
182
try {
189
- this . saxStream . write ( chunk , encoding ) ;
183
+ this . saxParser . write ( chunk ) ;
190
184
} catch ( e ) {
191
185
return callback ( e ) ;
192
186
}
@@ -244,18 +238,18 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
244
238
}
245
239
246
240
protected attachSaxListeners ( ) {
247
- this . saxStream . on ( 'error' , ( error ) => this . emit ( 'error' , error ) ) ;
248
- this . saxStream . on ( 'opentag' , this . onTag . bind ( this ) ) ;
249
- this . saxStream . on ( 'text' , this . onText . bind ( this ) ) ;
250
- this . saxStream . on ( 'closetag' , this . onCloseTag . bind ( this ) ) ;
251
- this . saxStream . on ( 'doctype' , this . onDoctype . bind ( this ) ) ;
241
+ this . saxParser . on ( 'error' , ( error ) => this . emit ( 'error' , error ) ) ;
242
+ this . saxParser . on ( 'opentag' , this . onTag . bind ( this ) ) ;
243
+ this . saxParser . on ( 'text' , this . onText . bind ( this ) ) ;
244
+ this . saxParser . on ( 'closetag' , this . onCloseTag . bind ( this ) ) ;
245
+ this . saxParser . on ( 'doctype' , this . onDoctype . bind ( this ) ) ;
252
246
}
253
247
254
248
/**
255
249
* Handle the given tag.
256
- * @param {QualifiedTag } tag A SAX tag.
250
+ * @param {SaxesTagPlain } tag A SAX tag.
257
251
*/
258
- protected onTag ( tag : Tag ) {
252
+ protected onTag ( tag : SaxesTagPlain ) {
259
253
// Get parent tag
260
254
const parentTag : IActiveTag = this . activeTagStack . length
261
255
? this . activeTagStack [ this . activeTagStack . length - 1 ] : null ;
@@ -306,12 +300,12 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
306
300
307
301
/**
308
302
* Handle the given node element in resource-mode.
309
- * @param {QualifiedTag } tag A SAX tag.
303
+ * @param {SaxesTagPlain } tag A SAX tag.
310
304
* @param {IActiveTag } activeTag The currently active tag.
311
305
* @param {IActiveTag } parentTag The parent tag or null.
312
306
* @param {boolean } rootTag If we are currently processing the root tag.
313
307
*/
314
- protected onTagResource ( tag : Tag , activeTag : IActiveTag , parentTag : IActiveTag , rootTag : boolean ) {
308
+ protected onTagResource ( tag : SaxesTagPlain , activeTag : IActiveTag , parentTag : IActiveTag , rootTag : boolean ) {
315
309
const tagExpanded : IExpandedPrefix = RdfXmlParser . expandPrefixedTerm ( tag . name , activeTag . ns , this ) ;
316
310
317
311
activeTag . childrenParseType = ParseType . PROPERTY ;
@@ -342,7 +336,7 @@ export class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RD
342
336
let subjectValueBlank : boolean = false ;
343
337
let explicitType : string = null ;
344
338
for ( const attributeKey in tag . attributes ) {
345
- const attributeValue : string = tag . attributes [ attributeKey ] ;
339
+ const attributeValue = tag . attributes [ attributeKey ] ;
346
340
const attributeKeyExpanded : IExpandedPrefix = RdfXmlParser . expandPrefixedTerm ( attributeKey , activeTag . ns , this ) ;
347
341
if ( parentTag && attributeKeyExpanded . uri === RdfXmlParser . RDF ) {
348
342
switch ( attributeKeyExpanded . local ) {
@@ -475,11 +469,11 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
475
469
476
470
/**
477
471
* Handle the given property element in property-mode.
478
- * @param {QualifiedTag } tag A SAX tag.
472
+ * @param {SaxesTagPlain } tag A SAX tag.
479
473
* @param {IActiveTag } activeTag The currently active tag.
480
474
* @param {IActiveTag } parentTag The parent tag or null.
481
475
*/
482
- protected onTagProperty ( tag : Tag , activeTag : IActiveTag , parentTag : IActiveTag ) {
476
+ protected onTagProperty ( tag : SaxesTagPlain , activeTag : IActiveTag , parentTag : IActiveTag ) {
483
477
const tagExpanded : IExpandedPrefix = RdfXmlParser . expandPrefixedTerm ( tag . name , activeTag . ns , this ) ;
484
478
485
479
activeTag . childrenParseType = ParseType . RESOURCE ;
@@ -508,11 +502,11 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
508
502
// Collect all attributes as triples
509
503
// Assign subject value only after all attributes have been processed, because baseIRI may change the final val
510
504
let activeSubSubjectValue : string = null ;
511
- let subSubjectValueBlank : boolean = true ;
505
+ let subSubjectValueBlank = true ;
512
506
const predicates : RDF . NamedNode [ ] = [ ] ;
513
507
const objects : ( RDF . NamedNode | RDF . BlankNode | RDF . Literal ) [ ] = [ ] ;
514
508
for ( const propertyAttributeKey in tag . attributes ) {
515
- const propertyAttributeValue : string = tag . attributes [ propertyAttributeKey ] ;
509
+ const propertyAttributeValue = tag . attributes [ propertyAttributeKey ] ;
516
510
const propertyAttributeKeyExpanded : IExpandedPrefix = RdfXmlParser
517
511
. expandPrefixedTerm ( propertyAttributeKey , activeTag . ns , this ) ;
518
512
if ( propertyAttributeKeyExpanded . uri === RdfXmlParser . RDF ) {
@@ -755,7 +749,7 @@ while ${attributeValue} and ${activeSubjectValue} where found.`);
755
749
*/
756
750
protected onDoctype ( doctype : string ) {
757
751
doctype . replace ( / < ! E N T I T Y \s + ( [ ^ \s ] + ) \s + [ " ' ] ( [ ^ " ' ] + ) [ " ' ] \s * > / g, ( match , prefix , uri ) => {
758
- ( < any > this . saxStream ) . _parser . ENTITIES [ prefix ] = uri ;
752
+ this . saxParser . ENTITIES [ prefix ] = uri ;
759
753
return '' ;
760
754
} ) ;
761
755
}
@@ -780,10 +774,6 @@ export interface IRdfXmlParserArgs {
780
774
* The default graph for constructing quads.
781
775
*/
782
776
defaultGraph ?: RDF . Term ;
783
- /**
784
- * If the internal SAX parser should parse XML in strict mode, and error if it is invalid.
785
- */
786
- strict ?: boolean ;
787
777
/**
788
778
* If the internal position (line, column) should be tracked an emitted in error messages.
789
779
*/
0 commit comments