Skip to content

Commit 4526124

Browse files
GooDRomkaYatskov
andauthored
SelectorPageURL (#61)
* Added SelectorPageURL which can extract current Page Url * Fixed some mistakes * Updated version * Fixed code style Co-authored-by: Alexander <[email protected]>
1 parent 8b4322b commit 4526124

File tree

6 files changed

+62
-14
lines changed

6 files changed

+62
-14
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "web-scraper-chrome-extension",
3-
"version": "0.3.614",
3+
"version": "0.3.615",
44
"description": "Web data extraction tool implemented as chrome extension",
55
"scripts": {
66
"lint": "eslint --ext .js src",

src/_locales/en/messages.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
"SelectorElementScroll": { "message": "Element scroll down" },
137137
"SelectorElementClick": { "message": "Element click" },
138138
"SelectorGroup": { "message": "Grouped" },
139+
"SelectorPageURL": { "message": "Page URL" },
139140

140141
"sitemap_scrape_config_requestInterval": { "message": "Request interval" },
141142
"sitemap_scrape_config_requestIntervalRandomness": { "message": "Request interval randomness" },

src/_locales/ru/messages.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142
"SelectorGroup": { "message": "Группа" },
143143
"ConstantValue": { "message": "Константа" },
144144
"SelectorInputValue": { "message": "Вставка значения" },
145+
"SelectorPageURL": { "message": "URL страницы" },
145146

146147
"sitemap_scrape_requestInterval": { "message": "Интервал между запросами" },
147148
"sitemap_scrape_requestIntervalRandomness": { "message": "Случайность между запросами" },

src/scripts/Controller.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ export default class SitemapController {
5454
{
5555
type: 'SelectorElementStyle',
5656
},
57+
{
58+
type: 'SelectorPageURL',
59+
},
5760
{
5861
type: 'SelectorHTML',
5962
},
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import Selector from '../Selector';
2+
3+
export default class SelectorPageURL extends Selector {
4+
constructor(options) {
5+
super(options);
6+
this.updateData(options, this.getFeatures());
7+
}
8+
9+
canReturnMultipleRecords() {
10+
return false;
11+
}
12+
13+
canHaveChildSelectors() {
14+
return false;
15+
}
16+
17+
canHaveLocalChildSelectors() {
18+
return false;
19+
}
20+
21+
canCreateNewJobs() {
22+
return false;
23+
}
24+
25+
willReturnElements() {
26+
return false;
27+
}
28+
29+
async _getData() {
30+
return [{ [this.id]: document.location.href }];
31+
}
32+
33+
getDataColumns() {
34+
return [this.id];
35+
}
36+
37+
getFeatures() {
38+
return ['delay', 'textmanipulation'];
39+
}
40+
}

src/scripts/SelectorList.js

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@ import SelectorElementClick from './Selector/SelectorElementClick';
1414
import SelectorElementScroll from './Selector/SelectorElementScroll';
1515
import SelectorElementAttribute from './Selector/SelectorElementAttribute';
1616
import SelectorTable from './Selector/SelectorTable';
17+
import SelectorPageURL from './Selector/SelectorPageURL';
1718

1819
export default class SelectorList extends Array {
1920
static createSelector(options) {
2021
switch (options.type) {
2122
case 'ConstantValue':
2223
return new ConstantValue(options);
24+
case 'SelectorPageURL':
25+
return new SelectorPageURL(options);
2326
case 'SelectorDocument':
2427
return new SelectorDocument(options);
2528
case 'SelectorElement':
@@ -96,9 +99,9 @@ export default class SelectorList extends Array {
9699
return this;
97100
}
98101

99-
let getAllChildSelectors = function(parentSelectorId, resultSelectors) {
102+
let getAllChildSelectors = function (parentSelectorId, resultSelectors) {
100103
this.forEach(
101-
function(selector) {
104+
function (selector) {
102105
if (selector.hasParentSelector(parentSelectorId)) {
103106
if (resultSelectors.indexOf(selector) === -1) {
104107
resultSelectors.push(selector);
@@ -121,7 +124,7 @@ export default class SelectorList extends Array {
121124
*/
122125
getDirectChildSelectors(parentSelectorId) {
123126
let resultSelectors = new SelectorList();
124-
this.forEach(function(selector) {
127+
this.forEach(function (selector) {
125128
if (selector.hasParentSelector(parentSelectorId)) {
126129
resultSelectors.push(selector);
127130
}
@@ -131,15 +134,15 @@ export default class SelectorList extends Array {
131134

132135
clone() {
133136
let resultList = new SelectorList();
134-
this.forEach(function(selector) {
137+
this.forEach(function (selector) {
135138
resultList.push(selector);
136139
});
137140
return resultList;
138141
}
139142

140143
fullClone() {
141144
let resultList = new SelectorList();
142-
this.forEach(function(selector) {
145+
this.forEach(function (selector) {
143146
resultList.push(JSON.parse(JSON.stringify(selector)));
144147
});
145148
return resultList;
@@ -149,7 +152,7 @@ export default class SelectorList extends Array {
149152
let resultList = this.clone();
150153
for (let i in arguments) {
151154
arguments[i].forEach(
152-
function(selector) {
155+
function (selector) {
153156
resultList.push(selector);
154157
}.bind(this)
155158
);
@@ -178,9 +181,9 @@ export default class SelectorList extends Array {
178181
resultList.push(this.getSelector(selectorId));
179182

180183
// recursively find all parent selectors that could lead to the page where selectorId is used.
181-
let findParentSelectors = function(selector) {
184+
let findParentSelectors = function (selector) {
182185
selector.parentSelectors.forEach(
183-
function(parentSelectorId) {
186+
function (parentSelectorId) {
184187
if (parentSelectorId === '_root') return;
185188
let parentSelector = this.getSelector(parentSelectorId);
186189
if (resultList.indexOf(parentSelector) !== -1) return;
@@ -205,11 +208,11 @@ export default class SelectorList extends Array {
205208
*/
206209
getSinglePageAllChildSelectors(parentSelectorId) {
207210
let resultList = new SelectorList();
208-
let addChildSelectors = function(parentSelector) {
211+
let addChildSelectors = function (parentSelector) {
209212
if (parentSelector.willReturnElements()) {
210213
let childSelectors = this.getDirectChildSelectors(parentSelector.id);
211214
childSelectors.forEach(
212-
function(childSelector) {
215+
function (childSelector) {
213216
if (resultList.indexOf(childSelector) === -1) {
214217
resultList.push(childSelector);
215218
addChildSelectors(childSelector);
@@ -249,7 +252,7 @@ export default class SelectorList extends Array {
249252
*/
250253
toJSON() {
251254
let result = [];
252-
this.forEach(function(selector) {
255+
this.forEach(function (selector) {
253256
result.push(selector);
254257
});
255258
return result;
@@ -303,10 +306,10 @@ export default class SelectorList extends Array {
303306
let RecursionFound = false;
304307

305308
this.forEach(
306-
function(topSelector) {
309+
function (topSelector) {
307310
let visitedSelectors = [];
308311

309-
let checkRecursion = function(parentSelector) {
312+
let checkRecursion = function (parentSelector) {
310313
// already visited
311314
if (visitedSelectors.indexOf(parentSelector) !== -1) {
312315
RecursionFound = true;

0 commit comments

Comments
 (0)