Skip to content

Commit bb6f1cc

Browse files
committed
Fix streams with split unicode sequences.
1 parent 458ac6a commit bb6f1cc

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

lib/N3Lexer.js

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -423,13 +423,26 @@ N3Lexer.prototype = {
423423
// Otherwise, the input must be a stream
424424
else {
425425
this._input = '';
426+
this._pendingBuffer = null;
426427
if (typeof input.setEncoding === 'function')
427428
input.setEncoding('utf8');
428429
// Adds the data chunk to the buffer and parses as far as possible
429430
input.on('data', function (data) {
430-
if (self._input !== null) {
431-
self._input += data;
432-
self._tokenizeToEnd(callback, false);
431+
if (self._input !== null && data.length !== 0) {
432+
// Prepend any previous pending writes
433+
if (self._pendingBuffer) {
434+
data = Buffer.concat([self._pendingBuffer, data]);
435+
self._pendingBuffer = null;
436+
}
437+
// Hold if the buffer ends in an incomplete unicode sequence
438+
if (data[data.length - 1] & 0x80) {
439+
self._pendingBuffer = data;
440+
}
441+
// Otherwise, tokenize as far as possible
442+
else {
443+
self._input += data;
444+
self._tokenizeToEnd(callback, false);
445+
}
433446
}
434447
});
435448
// Parses until the end

test/N3StreamParser-test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ describe('N3StreamParser', function () {
3131
it('should parse decimals that are split across chunks in the stream',
3232
shouldParse('<sub> <pred> 11.2 .'.match(/.{1,2}/g), 1));
3333

34-
it.skip('should parse non-breaking spaces that are split across chunks in the stream correctly', function (done) {
34+
it('should parse non-breaking spaces that are split across chunks in the stream correctly', function (done) {
3535
var buffer = Buffer.from('<sub> <pred> " " .'),
3636
chunks = [buffer, buffer.slice(0, 15), buffer.slice(15, buffer.length)];
3737
shouldParse(chunks, 2, function (triples) {

0 commit comments

Comments
 (0)