Skip to content

Commit 70fb4ec

Browse files
committed
Correct lookahead within RDF* terms.
Fixes #218
1 parent aafca70 commit 70fb4ec

File tree

2 files changed

+45
-5
lines changed

2 files changed

+45
-5
lines changed

src/N3Lexer.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ export default class N3Lexer {
4040
this._langcode = /^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\-])/i;
4141
this._prefix = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:(?=[#\s<])/;
4242
this._prefixed = /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?:[ \t]+|(?=\.?[,;!\^\s#()\[\]\{\}"'<>]))/;
43-
this._variable = /^\?(?:(?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=[.,;!\^\s#()\[\]\{\}"'<])/;
43+
this._variable = /^\?(?:(?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=[.,;!\^\s#()\[\]\{\}"'<>])/;
4444
this._blank = /^_:((?:[0-9A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?:[ \t]+|(?=\.?[,;:\s#()\[\]\{\}"'<>]))/;
45-
this._number = /^[\-+]?(?:(\d+\.\d*|\.?\d+)[eE][\-+]?|\d*(\.)?)\d+(?=\.?[,;:\s#()\[\]\{\}"'<])/;
46-
this._boolean = /^(?:true|false)(?=[.,;\s#()\[\]\{\}"'<])/;
45+
this._number = /^[\-+]?(?:(\d+\.\d*|\.?\d+)[eE][\-+]?|\d*(\.)?)\d+(?=\.?[,;:\s#()\[\]\{\}"'<>])/;
46+
this._boolean = /^(?:true|false)(?=[.,;\s#()\[\]\{\}"'<>])/;
4747
this._keyword = /^@[a-z]+(?=[\s#<:])/i;
4848
this._sparqlKeyword = /^(?:PREFIX|BASE|GRAPH)(?=[\s#<])/i;
49-
this._shortPredicates = /^a(?=[\s()\[\]\{\}"'<])/;
49+
this._shortPredicates = /^a(?=[\s()\[\]\{\}"'<>])/;
5050
this._newline = /^[ \t]*(?:#[^\n\r]*)?(?:\r\n|\n|\r)[ \t]*/;
5151
this._comment = /#([^\n\r]*)/;
5252
this._whitespace = /^[ \t]+/;

test/N3Lexer-test.js

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ describe('Lexer', function () {
866866
{ type: '.', line: 3 },
867867
{ type: 'eof', line: 3 }));
868868

869-
it('should tokenize an RDF* statement with literals',
869+
it('should tokenize an RDF* statement with string literals',
870870
shouldTokenize('<<"string"@en "string"@nl-be "string"@EN>> .',
871871
{ type: '<<', line: 1 },
872872
{ type: 'literal', value: 'string', line: 1 },
@@ -879,6 +879,36 @@ describe('Lexer', function () {
879879
{ type: '.', line: 1 },
880880
{ type: 'eof', line: 1 }));
881881

882+
it('should tokenize an RDF* statement with integers',
883+
shouldTokenize('<<1 2 3>>.',
884+
{ type: '<<', line: 1 },
885+
{ type: 'literal', value: '1', prefix: 'http://www.w3.org/2001/XMLSchema#integer', line: 1 },
886+
{ type: 'literal', value: '2', prefix: 'http://www.w3.org/2001/XMLSchema#integer', line: 1 },
887+
{ type: 'literal', value: '3', prefix: 'http://www.w3.org/2001/XMLSchema#integer', line: 1 },
888+
{ type: '>>', line: 1 },
889+
{ type: '.', line: 1 },
890+
{ type: 'eof', line: 1 }));
891+
892+
it('should tokenize an RDF* statement with decimals',
893+
shouldTokenize('<<1.2 3.4 5.6>>.',
894+
{ type: '<<', line: 1 },
895+
{ type: 'literal', value: '1.2', prefix: 'http://www.w3.org/2001/XMLSchema#decimal', line: 1 },
896+
{ type: 'literal', value: '3.4', prefix: 'http://www.w3.org/2001/XMLSchema#decimal', line: 1 },
897+
{ type: 'literal', value: '5.6', prefix: 'http://www.w3.org/2001/XMLSchema#decimal', line: 1 },
898+
{ type: '>>', line: 1 },
899+
{ type: '.', line: 1 },
900+
{ type: 'eof', line: 1 }));
901+
902+
it('should tokenize an RDF* statement with booleans',
903+
shouldTokenize('<<true false true>>.',
904+
{ type: '<<', line: 1 },
905+
{ type: 'literal', value: 'true', prefix: 'http://www.w3.org/2001/XMLSchema#boolean', line: 1 },
906+
{ type: 'literal', value: 'false', prefix: 'http://www.w3.org/2001/XMLSchema#boolean', line: 1 },
907+
{ type: 'literal', value: 'true', prefix: 'http://www.w3.org/2001/XMLSchema#boolean', line: 1 },
908+
{ type: '>>', line: 1 },
909+
{ type: '.', line: 1 },
910+
{ type: 'eof', line: 1 }));
911+
882912
it('should tokenize a prefixed iri followed by the end of a QuadTerm',
883913
shouldTokenize('c:c>> .',
884914
{ type: 'prefixed', prefix: 'c', value: 'c', line: 1 },
@@ -906,6 +936,16 @@ describe('Lexer', function () {
906936
{ type: '.', line: 1 },
907937
{ type: 'eof', line: 1 }));
908938

939+
it('should tokenize an RDF* statement with variables',
940+
shouldTokenize('<<?a ?b ?c>> .',
941+
{ type: '<<', line: 1 },
942+
{ type: 'var', value: '?a', line: 1 },
943+
{ type: 'var', value: '?b', line: 1 },
944+
{ type: 'var', value: '?c', line: 1 },
945+
{ type: '>>', line: 1 },
946+
{ type: '.', line: 1 },
947+
{ type: 'eof', line: 1 }));
948+
909949
it('should tokenize an RDF* statement with mixed types',
910950
shouldTokenize('<<<http://ex.org/?bla#foo> "string"@nl-be c:c>> .',
911951
{ type: '<<', line: 1 },

0 commit comments

Comments
 (0)