11const { readFileSync } = require ( 'fs' ) ;
22const { join } = require ( 'path' ) ;
33
4+ const pako = require ( 'pako' ) ;
5+ const { decode } = require ( 'uint8-base64' ) ;
6+
47const data = readFileSync ( join ( __dirname , 'big.xml' ) ) ;
58const { parse } = require ( '../lib/index.js' ) ;
69
@@ -11,11 +14,119 @@ const result = parse(data, {
1114 dynamicTypingAttributeValue : false ,
1215 ignoreAttributes : false ,
1316 dynamicTypingNodeValue : false ,
14- tagValueProcessor : ( value , tagName ) => {
15- // return decoder.decode(value);
17+ tagValueProcessor : ( value , node ) => {
18+ if ( node . tagName !== 'binary' ) return decoder . decode ( value ) ;
19+ if ( ! node . parent . children ) {
20+ console . log ( node ) ;
21+ }
22+ const ontologies = node . parent . children . cvParam . map (
23+ ( entry ) => entry . attributes . accession ,
24+ ) ;
25+ try {
26+ return decodeBase64 ( node . value , { ontologies } ) ;
27+ } catch ( e ) {
28+ console . log ( node ) ;
29+ }
1630 } ,
1731} ) ;
1832console . timeEnd ( 'start' ) ;
1933//console.log(
2034// result.indexedmzML.mzML.run.spectrumList.spectrum[1].binaryDataArrayList,
2135//);
36+
37+ function decodeBase64 ( base64 , options = { } ) {
38+ let {
39+ endian = 'little' ,
40+ precision,
41+ float = true ,
42+ compression = '' ,
43+ ontologies,
44+ } = options ;
45+
46+ if ( ontologies ) {
47+ if ( ontologies . includes ( 'MS:1000519' ) ) {
48+ precision = 32 ;
49+ float = false ;
50+ }
51+ if ( ontologies . includes ( 'MS:1000520' ) ) precision = 16 ;
52+ if ( ontologies . includes ( 'MS:1000521' ) ) precision = 32 ;
53+ if ( ontologies . includes ( 'MS:1000522' ) ) {
54+ float = false ;
55+ precision = 64 ;
56+ }
57+ if ( ontologies . includes ( 'MS:1000523' ) ) precision = 64 ;
58+ if ( ontologies . includes ( 'MS:1000574' ) ) compression = 'zlib' ;
59+ }
60+
61+ let uint8Array = decode ( base64 ) ;
62+ switch ( compression . toLowerCase ( ) ) {
63+ case 'zlib' :
64+ uint8Array = pako . inflate ( uint8Array ) ;
65+ break ;
66+ case '' :
67+ case 'none' :
68+ break ;
69+ default :
70+ throw new Error ( `Unknow compression algorithm: ${ compression } ` ) ;
71+ }
72+
73+ switch ( endian . toLowerCase ( ) ) {
74+ case 'little' :
75+ break ;
76+ case 'network' :
77+ case 'big' :
78+ {
79+ // we will invert in place the data
80+ let step ;
81+ switch ( precision ) {
82+ case 32 :
83+ step = 4 ;
84+ break ;
85+ case 64 :
86+ step = 8 ;
87+ break ;
88+ default :
89+ throw new Error ( 'Can not process bigendian file' ) ;
90+ }
91+ for (
92+ let i = 0 ;
93+ i < uint8Array . length - ( uint8Array . length % step ) ;
94+ i += step
95+ ) {
96+ for ( let j = 0 ; j < step / 2 ; j ++ ) {
97+ const temp = uint8Array [ i + j ] ;
98+ uint8Array [ i + j ] = uint8Array [ i + step - 1 - j ] ;
99+ uint8Array [ i + step - 1 - j ] = temp ;
100+ }
101+ }
102+ }
103+ break ;
104+ default :
105+ throw new TypeError ( `Attributes endian not correct: ${ endian } ` ) ;
106+ }
107+
108+ /*
109+ We should take care that the length of the Uint8Array is correct but the buffer
110+ may be a little bit bigger because when decoding base 64 it may end with = or ==
111+ and we plan the size in the buffer.
112+ */
113+ if ( float ) {
114+ switch ( precision ) {
115+ case 32 :
116+ return new Float32Array ( uint8Array . buffer , 0 , uint8Array . length / 4 ) ;
117+ case 64 :
118+ return new Float64Array ( uint8Array . buffer , 0 , uint8Array . length / 8 ) ;
119+ default :
120+ throw new TypeError ( `Incorrect precision: ${ precision } ` ) ;
121+ }
122+ } else {
123+ switch ( precision ) {
124+ case 32 :
125+ return new Int32Array ( uint8Array . buffer , 0 , uint8Array . length / 4 ) ;
126+ case 64 :
127+ return new BigInt64Array ( uint8Array . buffer , 0 , uint8Array . length / 8 ) ;
128+ default :
129+ throw new TypeError ( `Incorrect precision: ${ precision } ` ) ;
130+ }
131+ }
132+ }
0 commit comments