|
1 | 1 | import fs from 'node:fs' |
2 | 2 | import { writeFile } from 'node:fs/promises' |
3 | 3 | import path from 'node:path' |
4 | | -import puppeteer, { Browser, Protocol } from 'puppeteer' |
| 4 | +import puppeteer, { Browser, HTTPResponse, Page, Protocol } from 'puppeteer' |
5 | 5 |
|
6 | 6 | import { useBatchCrawlHandleByMode } from './batchCrawlHandle' |
7 | 7 | import { request } from './request' |
@@ -166,35 +166,43 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) { |
166 | 166 | } |
167 | 167 |
|
168 | 168 | async function crawlPageHandle(handleConfig: CrawlBaseConfigV1) { |
169 | | - const page = await browser!.newPage() |
170 | | - await page.setViewport({ width: 1280, height: 1024 }) |
| 169 | + let page: Page | null = null |
| 170 | + let httpResponse: HTTPResponse | null = null |
| 171 | + |
| 172 | + try { |
| 173 | + page = await browser!.newPage() |
| 174 | + await page.setViewport({ width: 1280, height: 1024 }) |
| 175 | + |
| 176 | + if (handleConfig.proxy) { |
| 177 | + await browser!.createIncognitoBrowserContext({ |
| 178 | + proxyServer: handleConfig.proxy |
| 179 | + }) |
| 180 | + } else { |
| 181 | + await browser!.createIncognitoBrowserContext({ |
| 182 | + proxyServer: undefined |
| 183 | + }) |
| 184 | + } |
171 | 185 |
|
172 | | - if (handleConfig.proxy) { |
173 | | - await browser!.createIncognitoBrowserContext({ |
174 | | - proxyServer: handleConfig.proxy |
175 | | - }) |
176 | | - } else { |
177 | | - await browser!.createIncognitoBrowserContext({ |
178 | | - proxyServer: undefined |
179 | | - }) |
180 | | - } |
| 186 | + if (handleConfig.headers) { |
| 187 | + await page.setExtraHTTPHeaders( |
| 188 | + handleConfig.headers as any as Record<string, string> |
| 189 | + ) |
| 190 | + } |
181 | 191 |
|
182 | | - if (handleConfig.headers) { |
183 | | - await page.setExtraHTTPHeaders( |
184 | | - handleConfig.headers as any as Record<string, string> |
185 | | - ) |
186 | | - } |
| 192 | + if (handleConfig.cookies) { |
| 193 | + await page.setCookie( |
| 194 | + ...parseCrawlPageCookies(handleConfig.url, handleConfig.cookies) |
| 195 | + ) |
| 196 | + } |
187 | 197 |
|
188 | | - if (handleConfig.cookies) { |
189 | | - await page.setCookie( |
190 | | - ...parseCrawlPageCookies(handleConfig.url, handleConfig.cookies) |
191 | | - ) |
| 198 | + httpResponse = await page.goto(handleConfig.url, { |
| 199 | + timeout: handleConfig.timeout |
| 200 | + }) |
| 201 | + } catch (error) { |
| 202 | + await page?.close() |
| 203 | + throw error |
192 | 204 | } |
193 | 205 |
|
194 | | - const httpResponse = await page.goto(handleConfig.url, { |
195 | | - timeout: handleConfig.timeout |
196 | | - }) |
197 | | - |
198 | 206 | return { httpResponse, browser: browser!, page } |
199 | 207 | } |
200 | 208 |
|
|
0 commit comments