@@ -2,7 +2,7 @@ import fs from 'node:fs'
22import { writeFile } from 'node:fs/promises'
33import path from 'node:path'
44import { JSDOM } from 'jsdom'
5- import puppeteer , { Browser , Page } from 'puppeteer'
5+ import puppeteer , { Browser } from 'puppeteer'
66
77import { batchRequest , syncBatchRequest } from './request'
88import { quickSort } from './sort'
@@ -18,23 +18,22 @@ import {
1818} from './utils'
1919
2020import {
21- IXCrawlBaseConifg ,
22- IFetchHTMLConfig ,
23- IFetchDataConfig ,
24- IFetchFileConfig ,
25- IStartPollingConfig ,
26- IFetchBaseConifg ,
27- IFileInfo ,
28- IFetchHTML ,
29- IRequestResItem ,
30- IRequestConfig ,
31- IIntervalTime ,
32- IFetchCommon ,
33- IFetchCommonArr
34- } from './types'
35-
36- function mergeConfig < T extends IFetchBaseConifg > (
37- baseConfig : IXCrawlBaseConifg ,
21+ FetchBaseConifgV1 ,
22+ FetchDataConfig ,
23+ FetchFileConfig ,
24+ FetchHTML ,
25+ FetchHTMLConfig ,
26+ FetchResCommonArrV1 ,
27+ FetchResCommonV1 ,
28+ FileInfo ,
29+ IntervalTime ,
30+ StartPollingConfig
31+ } from './types/api'
32+ import { XCrawlBaseConifg } from './types'
33+ import { RequestConfig , RequestResItem } from './types/request'
34+
35+ function mergeConfig < T extends FetchBaseConifgV1 > (
36+ baseConfig : XCrawlBaseConifg ,
3837 rawConfig : T
3938) : T {
4039 const newConfig = structuredClone ( rawConfig )
@@ -43,22 +42,22 @@ function mergeConfig<T extends IFetchBaseConifg>(
4342 const requestConifgArr = isArray ( newConfig . requestConifg )
4443 ? newConfig . requestConifg
4544 : [ newConfig . requestConifg ]
46- for ( const requestItem of requestConifgArr ) {
47- const { url, timeout, proxy } = requestItem
45+ for ( const requesttem of requestConifgArr ) {
46+ const { url, timeout, proxy } = requesttem
4847
4948 // 1.1.baseUrl
5049 if ( ! isUndefined ( baseConfig . baseUrl ) ) {
51- requestItem . url = baseConfig . baseUrl + url
50+ requesttem . url = baseConfig . baseUrl + url
5251 }
5352
5453 // 1.2.timeout
5554 if ( isUndefined ( timeout ) ) {
56- requestItem . timeout = baseConfig . timeout
55+ requesttem . timeout = baseConfig . timeout
5756 }
5857
5958 // 1.3.porxy
6059 if ( isUndefined ( proxy ) ) {
61- requestItem . proxy = baseConfig . proxy
60+ requesttem . proxy = baseConfig . proxy
6261 }
6362 }
6463
@@ -72,9 +71,9 @@ function mergeConfig<T extends IFetchBaseConifg>(
7271
7372async function useBatchRequestByMode (
7473 mode : 'async' | 'sync' | undefined ,
75- requestConifg : IRequestConfig | IRequestConfig [ ] ,
76- intervalTime : IIntervalTime | undefined ,
77- callback : ( requestResItem : IRequestResItem ) => void
74+ requestConifg : RequestConfig | RequestConfig [ ] ,
75+ intervalTime : IntervalTime | undefined ,
76+ callback : ( requestRestem : RequestResItem ) => void
7877) {
7978 const requestConfigQueue = isArray ( requestConifg )
8079 ? requestConifg
@@ -87,25 +86,33 @@ async function useBatchRequestByMode(
8786 }
8887}
8988
90- export function createFetchHTML ( baseConfig : IXCrawlBaseConifg ) {
91- // 初始值
89+ export function createFetchHTML ( baseConfig : XCrawlBaseConifg ) {
9290 let browser : Browser | null = null
93- let page : Page | null = null
94- let useTotal = 0
91+ let createBrowserState : Promise < void > | null = null
92+ let callTotal = 0
9593
9694 async function fetchHTML (
97- config : IFetchHTMLConfig ,
98- callback ?: ( res : IFetchHTML ) => void
99- ) : Promise < IFetchHTML > {
100- // 完成初始化
101- if ( useTotal === 0 ) {
102- browser = await puppeteer . launch ( )
103- page = await browser . newPage ( )
104- await page . setViewport ( { width : 1280 , height : 1024 } )
95+ config : FetchHTMLConfig ,
96+ callback ?: ( res : FetchHTML ) => void
97+ ) : Promise < FetchHTML > {
98+ // 记录调用次数, 为关闭浏览器
99+ callTotal ++
100+
101+ // 只创建一次浏览器
102+ if ( callTotal === 1 ) {
103+ createBrowserState = puppeteer . launch ( ) . then ( ( res ) => {
104+ browser = res
105+ } )
106+ }
107+
108+ // 等待浏览器创建完毕
109+ if ( createBrowserState ) {
110+ await Promise . all ( [ createBrowserState ] )
111+ createBrowserState = null
105112 }
106113
107- // 记录调用次数
108- useTotal ++
114+ const page = await browser ! . newPage ( )
115+ await page . setViewport ( { width : 1280 , height : 1024 } )
109116
110117 const { requestConifg } = mergeConfig ( baseConfig , {
111118 requestConifg : isString ( config ) ? { url : config } : config
@@ -127,13 +134,14 @@ export function createFetchHTML(baseConfig: IXCrawlBaseConifg) {
127134 const content = await page ! . content ( )
128135
129136 // 关闭浏览器
130- if ( -- useTotal === 0 ) {
137+ if ( -- callTotal === 0 ) {
131138 await browser ! . close ( )
132139 }
133140
134- const res : IFetchHTML = {
141+ const res : FetchHTML = {
135142 httpResponse,
136143 data : {
144+ page,
137145 content,
138146 jsdom : new JSDOM ( content )
139147 }
@@ -149,23 +157,23 @@ export function createFetchHTML(baseConfig: IXCrawlBaseConifg) {
149157 return fetchHTML
150158}
151159
152- export function createFetchData ( baseConfig : IXCrawlBaseConifg ) {
160+ export function createFetchData ( baseConfig : XCrawlBaseConifg ) {
153161 async function fetchData < T = any > (
154- config : IFetchDataConfig ,
155- callback ?: ( res : IFetchCommon < T > ) => void
156- ) : Promise < IFetchCommonArr < T > > {
162+ config : FetchDataConfig ,
163+ callback ?: ( res : FetchResCommonV1 < T > ) => void
164+ ) : Promise < FetchResCommonArrV1 < T > > {
157165 const { requestConifg, intervalTime } = mergeConfig ( baseConfig , config )
158166
159- const container : IFetchCommonArr < T > = [ ]
160- function handleResItem ( requestResItem : IRequestResItem ) {
161- const contentType = requestResItem . headers [ 'content-type' ] ?? ''
162- const rawData = requestResItem . data
167+ const container : FetchResCommonArrV1 < T > = [ ]
168+ function handleRestem ( requestRestem : RequestResItem ) {
169+ const contentType = requestRestem . headers [ 'content-type' ] ?? ''
170+ const rawData = requestRestem . data
163171
164172 const data = contentType . includes ( 'text' )
165173 ? rawData . toString ( )
166174 : JSON . parse ( rawData . toString ( ) )
167175
168- const itemRes = { ...requestResItem , data }
176+ const itemRes = { ...requestRestem , data }
169177
170178 if ( callback ) {
171179 callback ( itemRes )
@@ -178,7 +186,7 @@ export function createFetchData(baseConfig: IXCrawlBaseConifg) {
178186 baseConfig . mode ,
179187 requestConifg ,
180188 intervalTime ,
181- handleResItem
189+ handleRestem
182190 )
183191
184192 const res = quickSort (
@@ -190,26 +198,26 @@ export function createFetchData(baseConfig: IXCrawlBaseConifg) {
190198 return fetchData
191199}
192200
193- export function createFetchFile ( baseConfig : IXCrawlBaseConifg ) {
201+ export function createFetchFile ( baseConfig : XCrawlBaseConifg ) {
194202 async function fetchFile (
195- config : IFetchFileConfig ,
196- callback ?: ( res : IFetchCommon < IFileInfo > ) => void
197- ) : Promise < IFetchCommonArr < IFileInfo > > {
203+ config : FetchFileConfig ,
204+ callback ?: ( res : FetchResCommonV1 < FileInfo > ) => void
205+ ) : Promise < FetchResCommonArrV1 < FileInfo > > {
198206 const { requestConifg, intervalTime, fileConfig } = mergeConfig (
199207 baseConfig ,
200208 config
201209 )
202210
203- const container : IFetchCommonArr < IFileInfo > = [ ]
211+ const container : FetchResCommonArrV1 < FileInfo > = [ ]
204212 const saveFileArr : Promise < void > [ ] = [ ]
205213 const saveFileErrorArr : { message : string ; valueOf : ( ) => number } [ ] = [ ]
206214
207215 if ( ! fs . existsSync ( fileConfig . storeDir ) ) {
208216 fs . mkdirSync ( fileConfig . storeDir )
209217 }
210218
211- function handleResItem ( requestResItem : IRequestResItem ) {
212- const { id, headers, data } = requestResItem
219+ function handleRestem ( requestRestem : RequestResItem ) {
220+ const { id, headers, data } = requestRestem
213221
214222 const mimeType = headers [ 'content-type' ] ?? ''
215223 const fileExtension = fileConfig . extension ?? mimeType . split ( '/' ) . pop ( )
@@ -219,7 +227,7 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
219227 `${ fileName } .${ fileExtension } `
220228 )
221229
222- const saveFileItem = writeFile ( filePath , data )
230+ const saveFiletem = writeFile ( filePath , data )
223231 . catch ( ( err ) => {
224232 const message = `File save error at id ${ id } : ${ err . message } `
225233 const valueOf = ( ) => id
@@ -232,7 +240,7 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
232240 if ( isError ) return
233241
234242 const res = {
235- ...requestResItem ,
243+ ...requestRestem ,
236244 data : { fileName, mimeType, size : data . length , filePath }
237245 }
238246
@@ -243,14 +251,14 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
243251 container . push ( res )
244252 } )
245253
246- saveFileArr . push ( saveFileItem )
254+ saveFileArr . push ( saveFiletem )
247255 }
248256
249257 await useBatchRequestByMode (
250258 baseConfig . mode ,
251259 requestConifg ,
252260 intervalTime ,
253- handleResItem
261+ handleRestem
254262 )
255263
256264 // 等待保存文件任务完成
@@ -280,17 +288,15 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
280288}
281289
282290export function startPolling (
283- config : IStartPollingConfig ,
291+ config : StartPollingConfig ,
284292 callback : ( count : number ) => void
285293) {
286- const { Y , M , d, h, m } = config
294+ const { d, h, m } = config
287295
288- const year = ! isUndefined ( Y ) ? Y * 1000 * 60 * 60 * 24 * 365 : 0
289- const month = ! isUndefined ( M ) ? M * 1000 * 60 * 60 * 24 * 30 : 0
290296 const day = ! isUndefined ( d ) ? d * 1000 * 60 * 60 * 24 : 0
291297 const hour = ! isUndefined ( h ) ? h * 1000 * 60 * 60 : 0
292298 const minute = ! isUndefined ( m ) ? m * 1000 * 60 : 0
293- const total = year + month + day + hour + minute
299+ const total = day + hour + minute
294300
295301 let count = 0
296302 function startCallback ( ) {
0 commit comments