File tree Expand file tree Collapse file tree 4 files changed +51
-24
lines changed
Expand file tree Collapse file tree 4 files changed +51
-24
lines changed Original file line number Diff line number Diff line change @@ -11,8 +11,9 @@ import {
1111 IFetchDataConfig ,
1212 IFetchFileConfig ,
1313 IFetchBaseConifg ,
14- IFileInfo ,
1514 IFetchCommon ,
15+ IFileInfo ,
16+ IFetchHTML ,
1617 IRequestResItem ,
1718 IRequestConfig ,
1819 IIntervalTime
@@ -77,7 +78,7 @@ export default class XCrawl {
7778 return requestRes
7879 }
7980
80- async fetchHTML ( config : string | IFetchHTMLConfig ) : Promise < JSDOM > {
81+ async fetchHTML ( config : IFetchHTMLConfig ) : Promise < IFetchHTML > {
8182 const rawRequestConifg : IFetchHTMLConfig = isString ( config )
8283 ? { url : config }
8384 : config
@@ -86,11 +87,18 @@ export default class XCrawl {
8687 requestConifg : rawRequestConifg
8788 } )
8889
89- const requestResItem = await request ( requestConifg )
90+ const requestRes = await request ( requestConifg )
91+ const rawData = requestRes . data . toString ( )
9092
91- const dom = new JSDOM ( requestResItem . data )
93+ const res : IFetchHTML = {
94+ ...requestRes ,
95+ data : {
96+ raw : rawData ,
97+ jsdom : new JSDOM ( rawData )
98+ }
99+ }
92100
93- return dom
101+ return res
94102 }
95103
96104 async fetchData < T = any > ( config : IFetchDataConfig ) : Promise < IFetchCommon < T > > {
Original file line number Diff line number Diff line change 11import { IncomingHttpHeaders } from 'node:http'
2+ import { JSDOM } from 'jsdom'
23
34export interface IAnyObject extends Object {
45 [ key : string | number | symbol ] : any
@@ -67,7 +68,7 @@ export interface IFetchBaseConifg {
6768 intervalTime ?: IIntervalTime
6869}
6970
70- export interface IFetchHTMLConfig extends IRequestConfig { }
71+ export type IFetchHTMLConfig = string | IRequestConfig
7172
7273export interface IFetchDataConfig extends IFetchBaseConifg { }
7374
@@ -77,16 +78,25 @@ export interface IFetchFileConfig extends IFetchBaseConifg {
7778 }
7879}
7980
81+ export type IFetchCommon < T > = {
82+ id : number
83+ statusCode : number | undefined
84+ headers : IncomingHttpHeaders
85+ data : T
86+ } [ ]
87+
8088export interface IFileInfo {
8189 fileName : string
8290 mimeType : string
8391 size : number
8492 filePath : string
8593}
8694
87- export type IFetchCommon < T > = {
88- id : number
95+ export interface IFetchHTML {
8996 statusCode : number | undefined
9097 headers : IncomingHttpHeaders
91- data : T
92- } [ ]
98+ data : {
99+ raw : string
100+ jsdom : JSDOM
101+ }
102+ }
Original file line number Diff line number Diff line change @@ -10,19 +10,19 @@ const testXCrawl = new XCrawl({
1010 mode : 'sync'
1111} )
1212
13- testXCrawl
14- . fetchData ( {
15- requestConifg : [
16- { url : 'http://localhost:3001/home' } ,
17- { url : 'http://localhost:9001/api/home/wonderfulplace' } ,
18- { url : 'http://localhost:9001/api/home/goodprice' } ,
19- { url : 'http://localhost:3001/home' } ,
20- { url : 'http://localhost:9001/ai/home/goodprice' }
21- ]
22- } )
23- . then ( ( res ) => {
24- console . log ( res )
25- } )
13+ // testXCrawl
14+ // .fetchData({
15+ // requestConifg: [
16+ // { url: 'http://localhost:3001/home' },
17+ // { url: 'http://localhost:9001/api/home/wonderfulplace' },
18+ // { url: 'http://localhost:9001/api/home/goodprice' },
19+ // { url: 'http://localhost:3001/home' },
20+ // { url: 'http://localhost:9001/ai/home/goodprice' }
21+ // ]
22+ // })
23+ // .then((res) => {
24+ // console.log(res)
25+ // })
2626
2727// testXCrawl.fetchHTML({ url: 'https://www.bilibili.com/' }).then((jsdom) => {
2828// const document = jsdom.window.document
@@ -52,3 +52,12 @@ testXCrawl
5252// console.log(res)
5353// })
5454// })
55+
56+ testXCrawl . fetchHTML ( 'https://cn.bing.com' ) . then ( ( res ) => {
57+ const { jsdom } = res . data
58+ } )
59+
60+ testXCrawl . fetchHTML ( 'https://docs.github.com/zh/get-started' ) . then ( ( res ) => {
61+ const { jsdom } = res . data
62+ console . log ( jsdom . window . document . querySelector ( 'title' ) ?. textContent )
63+ } )
You can’t perform that action at this time.
0 commit comments