Skip to content

Commit da522d6

Browse files
authored
Merge pull request #9 from sweetrdf/issue7
Issue7
2 parents 82a1efa + 27be9e7 commit da522d6

File tree

4 files changed

+41
-25
lines changed

4 files changed

+41
-25
lines changed

src/quickRdfIo/NQuadsParser.php

+20-21
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ class NQuadsParser implements iParser, iQuadIterator {
6262
const BLANKNODE3_STRICT = '[-0-9_:A-Za-z\x{00B7}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0300}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{203F}-\x{2040}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}.]';
6363
const BLANKNODE4_STRICT = '[-0-9_:A-Za-z\x{00B7}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0300}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{203F}-\x{2040}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]';
6464
const BLANKNODE = '(_:[^\s<.]+)';
65-
const LITERAL_STRICT = '"((?>[^\x{22}\x{5C}\x{0A}\x{0D}]|\\\\[tbnrf"\'\\\\]|\\\\u[0-9A-Fa-f]{4}|\\\\U[0-9A-Fa-f]{8})*)"';
66-
const LITERAL = '"((?>[^"]|\\")*)"';
65+
const LITERAL = '"((?>[^\x{22}\x{5C}\x{0A}\x{0D}]|\\\\[tbnrf"\'\\\\]|\\\\u[0-9A-Fa-f]{4}|\\\\U[0-9A-Fa-f]{8})*)"';
6766
const STAR_START = '%\\G\s*<<%';
6867
const STAR_END = '%\\G\s*>>%';
6968
const READ_BUF_SIZE = 8096;
@@ -155,7 +154,7 @@ public function __construct(iDataFactory $dataFactory, bool $strict = false,
155154
$iri = self::IRIREF_STRICT;
156155
$blank = '(' . self::BLANKNODE1_STRICT . self::BLANKNODE2_STRICT . '(?:' . self::BLANKNODE3_STRICT . '*' . self::BLANKNODE4_STRICT . ')?)';
157156
$lang = self::LANGTAG_STRICT;
158-
$literal = self::LITERAL_STRICT;
157+
$literal = self::LITERAL;
159158
$lineEnd = "\\s*\\.$comment$eol";
160159
$flags = 'u';
161160
} else {
@@ -166,7 +165,7 @@ public function __construct(iDataFactory $dataFactory, bool $strict = false,
166165
$lang = self::LANGTAG;
167166
$literal = self::LITERAL;
168167
$lineEnd = "\\s*\\.";
169-
$flags = '';
168+
$flags = 'u';
170169
}
171170
$graph = '';
172171
if ($mode === self::MODE_QUADS || $mode === self::MODE_QUADS_STAR) {
@@ -259,9 +258,9 @@ private function quadGenerator(): Generator {
259258
while (true) {
260259
$n++;
261260
$this->line = $this->readLine();
262-
$ret = preg_match($this->regexp, $this->line, $matches, PREG_UNMATCHED_AS_NULL);
263-
if ($ret === 0 && !empty(trim($this->line))) {
264-
throw new RdfIoException("Can't parse line $n: " . $this->line);
261+
$ret = (int) preg_match($this->regexp, $this->line, $matches, PREG_UNMATCHED_AS_NULL);
262+
if (0 === $ret && !empty(trim($this->line))) {
263+
throw new RdfIoException("Can't parse line $n with error '" . preg_last_error_msg() . "': " . $this->line);
265264
}
266265
if (($matches[3] ?? null) !== null) {
267266
yield $this->makeQuad($matches);
@@ -320,10 +319,10 @@ private function starQuadGenerator(): Generator {
320319
$this->level = 0;
321320
$this->line = $this->readLine();
322321
try {
323-
yield $this->parseStar();
322+
yield $this->parseStar($n);
324323
} catch (RdfIoException $e) {
325324
$ret = preg_match($this->regexpCommentLine, $this->line);
326-
if ($ret === 0) {
325+
if (0 === (int) $ret) {
327326
throw $e;
328327
}
329328
}
@@ -333,23 +332,23 @@ private function starQuadGenerator(): Generator {
333332
}
334333
}
335334

336-
private function parseStar(): iQuad {
335+
private function parseStar(int $line): iQuad {
337336
//echo str_repeat("\t", $this->level) . "parsing " . substr($this->line, $this->offset);
338337
$matches = null;
339338
if (preg_match(self::STAR_START, $this->line, $matches, 0, $this->offset)) {
340339
$this->offset += strlen($matches[0]);
341340
$this->level++;
342-
$sbj = $this->parseStar();
341+
$sbj = $this->parseStar($line);
343342
$ret = preg_match($this->regexpPred, $this->line, $matches, PREG_UNMATCHED_AS_NULL, $this->offset);
344-
if ($ret === 0) {
345-
throw new RdfIoException("Failed parsing predicate " . substr($this->line, $this->offset));
343+
if (0 === (int) $ret) {
344+
throw new RdfIoException("Failed parsing predicate on line $line with error '" . preg_last_error_msg() . "': " . substr($this->line, $this->offset));
346345
}
347346
$this->offset += strlen($matches[0]);
348347
$pred = $this->dataFactory::namedNode($matches[1]);
349348
} else {
350349
$ret = preg_match($this->regexpSbjPred, $this->line, $matches, PREG_UNMATCHED_AS_NULL, $this->offset);
351-
if ($ret === 0) {
352-
throw new RdfIoException("Failed parsing subject and predicate " . substr($this->line, $this->offset));
350+
if (0 === (int) $ret) {
351+
throw new RdfIoException("Failed parsing subject and predicate on line $line with error '" . preg_last_error_msg() . "': " . substr($this->line, $this->offset));
353352
}
354353
$this->offset += strlen($matches[0]);
355354
if ($matches[1] !== null) {
@@ -362,7 +361,7 @@ private function parseStar(): iQuad {
362361
if (preg_match(self::STAR_START, $this->line, $matches, 0, $this->offset)) {
363362
$this->offset += strlen($matches[0]);
364363
$this->level++;
365-
$obj = $this->parseStar();
364+
$obj = $this->parseStar($line);
366365
$ret = preg_match($this->regexpGraph, $this->line, $matches, PREG_UNMATCHED_AS_NULL, $this->offset);
367366
$this->offset += strlen($matches[0]);
368367
if (($matches[1] ?? null) !== null) {
@@ -371,9 +370,9 @@ private function parseStar(): iQuad {
371370
$graph = $this->dataFactory::blankNode($matches[2]);
372371
}
373372
} else {
374-
$ret = preg_match($this->regexpObjGraph, $this->line, $matches, PREG_UNMATCHED_AS_NULL, $this->offset);
375-
if ($ret === 0) {
376-
throw new RdfIoException("Can't parse object " . substr($this->line, $this->offset));
373+
$ret = (int) preg_match($this->regexpObjGraph, $this->line, $matches, PREG_UNMATCHED_AS_NULL, $this->offset);
374+
if (0 === $ret) {
375+
throw new RdfIoException("Can't parse object on line $line with error '" . preg_last_error_msg() . "': " . substr($this->line, $this->offset));
377376
}
378377
$this->offset += strlen($matches[0]);
379378
if ($matches[1] !== null) {
@@ -393,8 +392,8 @@ private function parseStar(): iQuad {
393392
}
394393
$regexpEnd = $this->level > 0 ? self::STAR_END : $this->regexpLineEnd;
395394
$ret = preg_match($regexpEnd, $this->line, $matches, 0, $this->offset);
396-
if ($ret === 0) {
397-
throw new RdfIoException("Can't parse end " . substr($this->line, $this->offset));
395+
if (0 === (int) $ret) {
396+
throw new RdfIoException("Can't parse end on line $line with error '" . preg_last_error_msg() . "': " . substr($this->line, $this->offset));
398397
}
399398
$this->offset += strlen($matches[0]);
400399
$quad = $this->dataFactory::quad($sbj, $pred, $obj, $graph ?? null);

src/quickRdfIo/Util.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,15 @@ static public function getParser(string $formatOrFilename,
123123
'application/trig' => new TriGParser($dataFactory, ['documentIRI' => $baseUri]),
124124
'nt',
125125
'ntriples',
126+
'n-triples' => new NQuadsParser($dataFactory, false, NQuadsParser::MODE_TRIPLES),
126127
'ntriplesstar',
127-
'n-triples',
128128
'n-triples-star',
129129
'application/n-triples',
130130
'text/plain' => new NQuadsParser($dataFactory, false, NQuadsParser::MODE_TRIPLES_STAR),
131131
'nq',
132132
'nquads',
133-
'nquadstar',
134-
'n-quads',
133+
'n-quads' => new NQuadsParser($dataFactory, false, NQuadsParser::MODE_QUADS),
134+
'nquadsstar',
135135
'n-quads-star',
136136
'application/n-quads' => new NQuadsParser($dataFactory, false, NQuadsParser::MODE_QUADS_STAR),
137137
'xml',

tests/NQuadsParserTest.php

+16-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ private function getModes(?bool $strict, ?bool $quads, ?bool $star): array {
7878
*/
7979
private function readTestLines(string $filename): array {
8080
$tests = [];
81-
$data = file($filename) ?: throw new \RuntimeException("Failed to open $filename");
81+
$data = file($filename) ?: throw new \RuntimeException("Failed to open $filename");
8282
foreach ($data as $n => $l) {
8383
if (substr($l, 0, 1) !== '#') {
8484
$tests[$n + 1] = $l;
@@ -249,4 +249,19 @@ public function testInputExceptions(): void {
249249
$this->assertEquals('Input has to be a resource or Psr\Http\Message\StreamInterface object', $ex->getMessage());
250250
}
251251
}
252+
253+
/**
254+
* https://github.com/sweetrdf/quickRdfIo/issues/7
255+
*/
256+
public function testIssue7(): void {
257+
$input = __DIR__ . '/files/issue7.nt';
258+
$df = new DF();
259+
$dataset = new \quickRdf\Dataset();
260+
261+
foreach ($this->getModes(null, null, null)as $i) {
262+
$parser = new NQuadsParser($df, $i->strict, $i->mode);
263+
$dataset->add($parser->parseStream(fopen($input, 'r')));
264+
$this->assertCount(2, $dataset);
265+
}
266+
}
252267
}

0 commit comments

Comments
 (0)