Add polling function

coder-hxl · coder-hxl · commit d46b0c01e209 · 2023-02-09T13:08:54.000+08:00
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -14,6 +14,7 @@ module.exports = {
   rules: {
     '@typescript-eslint/no-explicit-any': 'off',
     '@typescript-eslint/no-empty-interface': 'off',
-    '@typescript-eslint/no-var-requires': 'off'
+    '@typescript-eslint/no-var-requires': 'off',
+    '@typescript-eslint/no-non-null-assertion': 'off'
   }
 }
diff --git a/README.md b/README.md
@@ -6,8 +6,9 @@ XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resou
 
 ## highlights
 
-- Simple configuration to grab HTML, JSON, file resources, etc.
+- Simple configuration to grab HTML, JSON, file resources, etc
 - Batch requests can choose mode asynchronous or synchronous
+- polling function
 - Anthropomorphic request interval
 
 ## Install
@@ -54,6 +55,7 @@ class XCrawl {
   fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML>
   fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
   fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
+  fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void
 }
 ```
 
@@ -168,6 +170,28 @@ myXCrawl.fetchFile({
 })
 ```
 
+### fetchPolling
+
+fetchPolling is a method of the [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, typically used to perform polling operations, such as getting news every once in a while.
+
+#### 类型
+
+```ts
+function fetchPolling(
+  config: IFetchPollingConfig,
+  callback: (count: number) => void
+): void
+```
+
+#### 示例
+
+```js
+myXCrawl.fetchPolling({ h: 1, m: 30 }, () => {
+  // will be executed every one and a half hours
+  // fetchHTML/fetchData/fetchFile
+})
+```
+
 ## Types
 
 #### IAnyObject
@@ -249,6 +273,18 @@ interface IFetchFileConfig extends IFetchBaseConifg {
 }
 ```
 
+#### IFetchPollingConfig
+
+```ts
+interface IFetchPollingConfig {
+ Y?: number // Year (365 days per year)
+ M?: number // Month (30 days per month)
+ d?: number // day
+ h?: number // hour
+ m?: number // minute
+}
+```
+
 #### IFetchCommon
 
 ```ts
diff --git a/document/cn.md b/document/cn.md
@@ -8,6 +8,7 @@ XCrawl 是 Nodejs 多功能爬虫库。只需简单的配置即可抓取 HTML 
 
 - 简单的配置即可抓取 HTML 、JSON 、文件资源等等
 - 批量请求可选择模式 异步 或 同步
+- 轮询功能
 - 拟人化的请求间隔时间
 
 ## 安装
@@ -20,7 +21,7 @@ npm install x-crawl
 
 ## 示例
 
-获取 bilibili 国漫主页的推荐轮播图片为例: 
+每隔一天就获取 bilibili 国漫主页的推荐轮播图片为例: 
 
 ```js
 // 1.导入模块 ES/CJS
@@ -32,18 +33,21 @@ const myXCrawl = new XCrawl({
   intervalTime: { max: 6000, min: 2000 } // 控制请求频率
 })
 
-// 3.调用 fetchHTML API 爬取 HTML
-myXCrawl.fetchHTML('https://www.bilibili.com/guochuang/').then((res) => {
-  const { jsdom } = res.data  // 默认使用了 JSDOM 库解析 HTML
+// 3.调用 fetchPolling API 开始轮询功能，每隔一天会调用回调函数
+myXCrawl.fetchPolling({ d: 1 }, () => {
+  // 3.1.调用 fetchHTML API 爬取 HTML
+  myXCrawl.fetchHTML('https://www.bilibili.com/guochuang/').then((res) => {
+    const { jsdom } = res.data  // 默认使用了 JSDOM 库解析 HTML
   
-   // 3.1.获取轮播图片的 src
-  const imgSrc = []
-  const recomEls = jsdom.window.document.querySelectorAll('.chief-recom-item')
-  recomEls.forEach((item) => imgSrc.push(item.querySelector('img').src))
+     // 3.2.获取轮播图片的 src
+    const imgSrc = []
+    const recomEls = jsdom.window.document.querySelectorAll('.chief-recom-item')
+    recomEls.forEach((item) => imgSrc.push(item.querySelector('img').src))
  
-  // 3.2.调用 fetchFile API 爬取图片
-  const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` }))
-  myXCrawl.fetchFile({ requestConifg, fileConfig: { storeDir: './upload' } })
+    // 3.3.调用 fetchFile API 爬取图片
+    const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` }))
+    myXCrawl.fetchFile({ requestConifg, fileConfig: { storeDir: './upload' } })
+  })
 })
 ```
 
@@ -63,6 +67,7 @@ class XCrawl {
   fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML>
   fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
   fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
+  fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void
 }
 ```
 
@@ -177,6 +182,28 @@ myXCrawl.fetchFile({
 })
 ```
 
+### fetchPolling
+
+fetchPolling 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document/cn.md#%E7%A4%BA%E4%BE%8B-1) 实例的方法，通常用于进行轮询操作，比如每隔一段时间获取新闻之类的。
+
+#### 类型
+
+```ts
+function fetchPolling(
+  config: IFetchPollingConfig,
+  callback: (count: number) => void
+): void
+```
+
+#### 示例
+
+```js
+myXCrawl.fetchPolling({ h: 1, m: 30 }, () => {
+  // 每隔一个半小时会执行一次
+  // fetchHTML/fetchData/fetchFile
+})
+```
+
 ## 类型
 
 #### IAnyObject
@@ -258,13 +285,25 @@ interface IFetchFileConfig extends IFetchBaseConifg {
 }
 ```
 
+#### IFetchPollingConfig
+
+```ts
+interface IFetchPollingConfig {
+  Y?: number // 年 (按每年365天)
+  M?: number // 月 (按每月30天)
+  d?: number // 日
+  h?: number // 小时
+  m?: number // 分钟
+}
+```
+
 #### IFetchCommon
 
 ```ts
 type IFetchCommon<T> = {
   id: number
   statusCode: number | undefined
-  headers: IncomingHttpHeaders // node:http type
+  headers: IncomingHttpHeaders // node:http 类型
   data: T
 }[]
 ```
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "private": true,
   "name": "x-crawl",
-  "version": "0.1.6",
+  "version": "0.2.0",
   "author": "CoderHxl",
   "description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
   "license": "MIT",
diff --git a/publish/README.md b/publish/README.md
@@ -6,8 +6,9 @@ XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resou
 
 ## highlights
 
-- Simple configuration to grab HTML, JSON, file resources, etc.
+- Simple configuration to grab HTML, JSON, file resources, etc
 - Batch requests can choose mode asynchronous or synchronous
+- polling function
 - Anthropomorphic request interval
 
 ## Install
@@ -50,11 +51,11 @@ Create a crawler instance via new XCrawl. The request queue is maintained by the
 
 ```ts
 class XCrawl {
-  private readonly baseConfig
   constructor(baseConfig?: IXCrawlBaseConifg)
   fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML>
   fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
   fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
+  fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void): void
 }
 ```
 
@@ -169,6 +170,28 @@ myXCrawl.fetchFile({
 })
 ```
 
+### fetchPolling
+
+fetchPolling is a method of the [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, typically used to perform polling operations, such as getting news every once in a while.
+
+#### 类型
+
+```ts
+function fetchPolling(
+  config: IFetchPollingConfig,
+  callback: (count: number) => void
+): void
+```
+
+#### 示例
+
+```js
+myXCrawl.fetchPolling({ h: 1, m: 30 }, () => {
+  // will be executed every one and a half hours
+  // fetchHTML/fetchData/fetchFile
+})
+```
+
 ## Types
 
 #### IAnyObject
@@ -250,6 +273,18 @@ interface IFetchFileConfig extends IFetchBaseConifg {
 }
 ```
 
+#### IFetchPollingConfig
+
+```ts
+interface IFetchPollingConfig {
+ Y?: number // Year (365 days per year)
+ M?: number // Month (30 days per month)
+ d?: number // day
+ h?: number // hour
+ m?: number // minute
+}
+```
+
 #### IFetchCommon
 
 ```ts
diff --git a/publish/package.json b/publish/package.json
@@ -1,6 +1,6 @@
 {
   "name": "x-crawl",
-  "version": "0.1.6",
+  "version": "0.2.0",
   "author": "CoderHxl",
   "description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",
   "license": "MIT",
diff --git a/src/index.ts b/src/index.ts
@@ -10,14 +10,16 @@ import {
   log,
   logError,
   logNumber,
-  logSuccess
+  logSuccess,
+  logWarn
 } from './utils'
 
 import {
   IXCrawlBaseConifg,
   IFetchHTMLConfig,
   IFetchDataConfig,
   IFetchFileConfig,
+  IFetchPollingConfig,
   IFetchBaseConifg,
   IFetchCommon,
   IFileInfo,
@@ -167,4 +169,24 @@ export default class XCrawl {
 
     return container
   }
+
+  fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void) {
+    const { Y, M, d, h, m } = config
+
+    const year = !isUndefined(Y) ? Y * 1000 * 60 * 60 * 24 * 365 : 0
+    const month = !isUndefined(M) ? M * 1000 * 60 * 60 * 24 * 30 : 0
+    const day = !isUndefined(d) ? d * 1000 * 60 * 60 * 24 : 0
+    const hour = !isUndefined(h) ? h * 1000 * 60 * 60 : 0
+    const minute = !isUndefined(m) ? m * 1000 * 60 : 0
+    const total = year + month + day + hour + minute
+
+    let count = 0
+    function cb() {
+      console.log(logWarn(`Start the ${logWarn.bold(++count)} polling`))
+      callback(count)
+    }
+
+    cb()
+    setInterval(cb, total)
+  }
 }
diff --git a/src/types.ts b/src/types.ts
@@ -78,6 +78,14 @@ export interface IFetchFileConfig extends IFetchBaseConifg {
   }
 }
 
+export interface IFetchPollingConfig {
+  Y?: number
+  M?: number
+  d?: number
+  h?: number
+  m?: number
+}
+
 export type IFetchCommon<T> = {
   id: number
   statusCode: number | undefined
diff --git a/src/utils.ts b/src/utils.ts
@@ -18,6 +18,7 @@ export const log = console.log
 export const logNumber = chalk.hex('#a57fff')
 export const logSuccess = chalk.green
 export const logError = chalk.red
+export const logWarn = chalk.yellow
 
 export function isUndefined(value: any): value is undefined {
   return typeof value === 'undefined'
diff --git a/test/start/index.js b/test/start/index.js
diff --git a/test/start/index.ts b/test/start/index.ts

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@ module.exports = {`
`14`	`14`	`rules: {`
`15`	`15`	`'@typescript-eslint/no-explicit-any': 'off',`
`16`	`16`	`'@typescript-eslint/no-empty-interface': 'off',`
`17`		`- '@typescript-eslint/no-var-requires': 'off'`
	`17`	`+ '@typescript-eslint/no-var-requires': 'off',`
	`18`	`+ '@typescript-eslint/no-non-null-assertion': 'off'`
`18`	`19`	`}`
`19`	`20`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"private": true,`
`3`	`3`	`"name": "x-crawl",`
`4`		`- "version": "0.1.6",`
	`4`	`+ "version": "0.2.0",`
`5`	`5`	`"author": "CoderHxl",`
`6`	`6`	`"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",`
`7`	`7`	`"license": "MIT",`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "x-crawl",`
`3`		`- "version": "0.1.6",`
	`3`	`+ "version": "0.2.0",`
`4`	`4`	`"author": "CoderHxl",`
`5`	`5`	`"description": "XCrawl is a Nodejs multifunctional crawler library. Crawl HTML, JSON, file resources, etc. through simple configuration.",`
`6`	`6`	`"license": "MIT",`
Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,14 @@ export interface IFetchFileConfig extends IFetchBaseConifg {`
`78`	`78`	`}`
`79`	`79`	`}`
`80`	`80`
	`81`	`+export interface IFetchPollingConfig {`
	`82`	`+ Y?: number`
	`83`	`+ M?: number`
	`84`	`+ d?: number`
	`85`	`+ h?: number`
	`86`	`+ m?: number`
	`87`	`+}`
	`88`	`+`
`81`	`89`	`export type IFetchCommon<T> = {`
`82`	`90`	`id: number`
`83`	`91`	`statusCode: number \| undefined`