File tree Expand file tree Collapse file tree 10 files changed +413
-10
lines changed
Expand file tree Collapse file tree 10 files changed +413
-10
lines changed Original file line number Diff line number Diff line change 1010 nodejs :
1111 uses : zakodium/workflows/.github/workflows/nodejs.yml@nodejs-v1
1212 with :
13- node-version-matrix : ' [14, 16]'
13+ node-version-matrix : ' [14, 16, 18 ]'
1414 lint-check-types : true
Original file line number Diff line number Diff line change 119119
120120lib
121121lib-esm
122- big.xml
122+ big.xml
123+
124+ script /medline.xml
Original file line number Diff line number Diff line change 3939 "homepage" : " https://github.com/cheminfo/arraybuffer-xml-parser#readme" ,
4040 "devDependencies" : {
4141 "@types/he" : " ^1.1.2" ,
42- "@types/jest" : " ^27.5.0 " ,
42+ "@types/jest" : " ^27.5.1 " ,
4343 "cheminfo-build" : " ^1.1.11" ,
44- "eslint" : " ^8.15 .0" ,
44+ "eslint" : " ^8.16 .0" ,
4545 "eslint-config-cheminfo-typescript" : " ^10.4.0" ,
4646 "he" : " ^1.2.0" ,
4747 "iobuffer" : " ^5.1.0" ,
4848 "jest" : " ^28.1.0" ,
4949 "pako" : " ^2.0.4" ,
5050 "prettier" : " ^2.6.2" ,
5151 "rimraf" : " ^3.0.2" ,
52- "ts-jest" : " ^28.0.2 " ,
53- "typescript" : " ^4.6.4 " ,
52+ "ts-jest" : " ^28.0.3 " ,
53+ "typescript" : " ^4.7.2 " ,
5454 "uint8-base64" : " ^0.1.1"
5555 },
5656 "dependencies" : {
Original file line number Diff line number Diff line change 1+ import { parseStream } from '../lib/index.js' ;
2+ import { open } from 'fs/promises' ;
3+
4+ /*
5+ In order to test this script you should first build the package: `npm run prepack`
6+ And you also need a (big) file from medline called 'medline.xml'
7+ */
8+
9+ async function doAll ( ) {
10+ const file = await open ( new URL ( 'medline.xml' , import . meta. url ) , 'r' ) ;
11+ const stream = file . readableWebStream ( ) ;
12+ let i = 0 ;
13+ for await ( const entry of parseStream ( stream , 'PubmedArticle' ) ) {
14+ console . log ( entry ) ;
15+ console . log ( i ++ ) ;
16+ }
17+ }
18+
19+ doAll ( ) ;
Original file line number Diff line number Diff line change 1+ import { open } from 'fs/promises' ;
2+ import { join } from 'path' ;
3+
4+ import { parseStream } from '../parseStream' ;
5+
6+ describe ( 'parseStream' , ( ) => {
7+ it ( 'simple case' , async ( ) => {
8+ // eslint-disable-next-line jest/no-if
9+ if ( Number ( process . versions . node . split ( '.' ) [ 0 ] ) >= 18 ) {
10+ const file = await open ( join ( __dirname , 'assets/sample.xml' ) , 'r' ) ;
11+ const CHUNK_SIZE = 10 ;
12+ const transformStream = new TransformStream ( {
13+ start : function start ( ) { } , // required.
14+ transform : async function transform ( chunk , controller ) {
15+ if ( chunk === null ) controller . terminate ( ) ;
16+ chunk = new Uint8Array ( await chunk ) ;
17+ for ( let i = 0 ; i < chunk . length ; i += CHUNK_SIZE ) {
18+ controller . enqueue ( chunk . slice ( i , i + CHUNK_SIZE ) ) ;
19+ }
20+ } ,
21+ } ) ;
22+
23+ const results = [ ] ;
24+ //@ts -expect-error feature is too new
25+ const readableStream = file . readableWebStream ( ) ;
26+ for await ( let entry of parseStream (
27+ readableStream . pipeThrough ( transformStream ) ,
28+ 'address' ,
29+ ) ) {
30+ results . push ( entry ) ;
31+ //console.log(entry);
32+ }
33+ expect ( results ) . toMatchInlineSnapshot ( `
34+ Array [
35+ Object {
36+ "buildingNo": 1,
37+ "city": "New York",
38+ "flatNo": 1,
39+ "street": "Park Ave",
40+ },
41+ Object {
42+ "buildingNo": 33,
43+ "city": "Boston",
44+ "flatNo": 24,
45+ "street": "Centre St",
46+ },
47+ Object {
48+ "buildingNo": 1,
49+ "city": "Moscow",
50+ "flatNo": 2,
51+ "street": "Kahovka",
52+ },
53+ Object {
54+ "buildingNo": 3,
55+ "city": "Tula",
56+ "flatNo": 78,
57+ "street": "Lenina",
58+ },
59+ ]
60+ ` ) ;
61+ }
62+ } ) ;
63+ } ) ;
Original file line number Diff line number Diff line change 11export * from './parse' ;
2+ export * from './parseStream' ;
Original file line number Diff line number Diff line change 1+ import {
2+ defaultOptions ,
3+ StreamParseOptions ,
4+ } from './traversable/defaultOptions' ;
5+ import { getTraversableGenerator } from './traversable/getTraversableGenerator' ;
6+ import { traversableToJSON } from './traversableToJSON' ;
7+
8+ /**
9+ * Parse a web stream representing an XML and emit objects
10+ */
11+ export async function * parseStream (
12+ readableStream : ReadableStream ,
13+ lookupTagName : string ,
14+ options : StreamParseOptions = { } ,
15+ ) {
16+ options = { ...defaultOptions , ...options } ;
17+
18+ for await ( const traversableEntry of getTraversableGenerator (
19+ readableStream ,
20+ lookupTagName ,
21+ options ,
22+ ) ) {
23+ yield traversableToJSON ( traversableEntry , options ) ;
24+ }
25+ }
Original file line number Diff line number Diff line change 11import { decoder } from './utils/utf8Decoder' ;
22
3+ /**
4+ * Search for the corresponding closing tag '>'
5+ * @param data
6+ * @param i
7+ * @returns
8+ */
39export function closingIndexForOpeningTag (
410 data : Uint8Array ,
511 i : number ,
@@ -25,8 +31,5 @@ export function closingIndexForOpeningTag(
2531 }
2632 endIndex ++ ;
2733 }
28- return {
29- data : decoder . decode ( data . subarray ( i , i + endIndex ) ) ,
30- index : 0 ,
31- } ;
34+ throw new Error ( 'Could not find closing tag' ) ;
3235}
Original file line number Diff line number Diff line change @@ -7,6 +7,20 @@ export const decoder = {
77 return utf8Decoder . decode ( array ) ;
88 } ,
99} ;
10+
11+ export interface StreamParseOptions extends ParseOptions {
12+ /**
13+ * What is the maximal size (in bytes) of an entry
14+ * @default 1e7
15+ */
16+ maxEntrySize ?: number ;
17+ /**
18+ * What is the maximal size for the buffer
19+ * @default 2e8
20+ */
21+ maxBufferSize ?: number ;
22+ }
23+
1024export interface ParseOptions {
1125 /**
1226 * should we remove ascii < 32
@@ -92,6 +106,7 @@ export interface ParseOptions {
92106 */
93107 stopNodes ?: string [ ] ;
94108}
109+
95110export const defaultOptions : ParseOptions = {
96111 trimValues : true ,
97112 attributeNamePrefix : '$' ,
You can’t perform that action at this time.
0 commit comments