Skip to content
This repository was archived by the owner on Mar 6, 2024. It is now read-only.

Commit eace839

Browse files
committed
fix bug
1 parent fb1d6d0 commit eace839

File tree

7 files changed

+107
-83
lines changed

7 files changed

+107
-83
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ QQ交流群(573484012)
2121

2222
目前支持的文件格式有 zip,exe,dmg,iso,jar,msi,rar,tmp,xlsx,mdf,com,casm,for,lib,lst,msg,obj,pas,wki,bas,map,bak,dot,bat,sh,rpm
2323

24+
#### 自定义爬取
25+
26+
自定义XPath表达式,将匹配的网页内容存储至MySQL数据库
27+
28+
![xpath](xpath.png)
29+
30+
> [了解XPath语法](http://www.w3school.com.cn/xpath/xpath_syntax.asp)
31+
2432
#### 爬虫工作流程
2533

2634
![工作流程](workflow.png)

pom.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,20 @@
4646
<groupId>com.zhazhapan</groupId>
4747
<artifactId>util</artifactId>
4848
<version>1.0.7</version>
49+
<exclusions>
50+
<exclusion>
51+
<artifactId>commons-codec</artifactId>
52+
<groupId>commons-codec</groupId>
53+
</exclusion>
54+
<exclusion>
55+
<artifactId>commons-io</artifactId>
56+
<groupId>commons-io</groupId>
57+
</exclusion>
58+
<exclusion>
59+
<artifactId>log4j</artifactId>
60+
<groupId>log4j</groupId>
61+
</exclusion>
62+
</exclusions>
4963
</dependency>
5064
<dependency>
5165
<groupId>mysql</groupId>

src/main/java/com/zhazhapan/vspider/Crawler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,14 @@ public void downloadURL(String url, String html) {
122122
if (Checker.isNotEmpty(MysqlConfig.getFields())) {
123123
for (Pair<String, String> pair : MysqlConfig.getFields()) {
124124
preSlice.append(pair.getValue()).append(",");
125-
postSlice.append(SpiderUtils.evaluate(pair.getKey(), html)).append(",");
125+
postSlice.append("'").append(SpiderUtils.evaluate(pair.getKey(), html).replaceAll("'", "\\\\'"))
126+
.append("',");
126127
}
127128
String pre = preSlice.toString();
128129
String post = postSlice.toString();
129130
String sql = pre.substring(0, pre.length() - 1) + ")" + post.substring(0, post.length() - 1) + ")";
130131
if (MysqlConfig.isEnableSql()) {
131-
SpiderUtils.saveFile(DefaultConfigValues.SQL_PATH, sql + "\r\n", true);
132+
SpiderUtils.saveFile(DefaultConfigValues.SQL_PATH, sql + ";\r\n", true);
132133
}
133134
try {
134135
SpiderApplication.statement.executeUpdate(sql);

src/main/java/com/zhazhapan/vspider/controller/MainController.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ public void customCrawling() {
407407
MysqlConfig.setEnableCustom(true);
408408
MysqlConfig.setEnableSql(controller.enableSql.isSelected());
409409
String[] mapping = mappings.split(ValueConsts.COMMA_SIGN);
410+
MysqlConfig.getFields().clear();
410411
for (String s : mapping) {
411412
String[] keyValue = s.split("->");
412413
Pair<String, String> map = new Pair<>(keyValue[0].trim(), keyValue[1].trim());

src/main/resources/view/CustomCrawling.fxml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
</HBox.margin>
6161
</TextField>
6262
</HBox>
63-
<Label text="XPath与字段名映射关系, 用英文逗号分隔多个映射(忽略多余的空格)" VBox.vgrow="NEVER">
63+
<Label text="XPath与字段名映射关系, 用英文逗号分隔多个映射(忽略多余的空格):xpath表达式 -> 字段名" VBox.vgrow="NEVER">
6464
<VBox.margin>
6565
<Insets top="10.0" />
6666
</VBox.margin>

0 commit comments

Comments
 (0)