Skip to content

Commit 61f73b4

Browse files
Merge pull request #381 from PyAr/transmit-redirection-score
Transmit redirection scores to destination articles
2 parents 85767e1 + c61ca44 commit 61f73b4

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

src/preprocessing/preprocess.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/python
22
# -*- coding: utf-8 -*-
33

4-
# Copyright 2020 CDPedistas (see AUTHORS.txt)
4+
# Copyright 2020-2021 CDPedistas (see AUTHORS.txt)
55
#
66
# This program is free software: you can redistribute it and/or modify it
77
# under the terms of the GNU General Public License version 3, as published
@@ -177,9 +177,10 @@ def process(self):
177177
scores_log.write("{}|R|{:d}\n".format(
178178
to3dirs.to_pagina(page_path), this_total_score))
179179

180-
# save the extra pages score (that may exist or not in the dump)
181-
for extra_page, extra_score in other_pages_scores:
182-
scores_log.write("{}|E|{:d}\n".format(extra_page, extra_score))
180+
# save the extra pages score (that may exist or not in the dump) even if page
181+
# is discarded (e.g. for transfering score from redirect pages to its targets)
182+
for extra_page, extra_score in other_pages_scores:
183+
scores_log.write("{}|E|{:d}\n".format(extra_page, extra_score))
183184

184185
# with score or discarded, log it as processed
185186
processed_before_log.write(page_path + "\n")

tests/test_preprocess.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2013-2020 CDPedistas (see AUTHORS.txt)
1+
# Copyright 2013-2021 CDPedistas (see AUTHORS.txt)
22
#
33
# This program is free software: you can redistribute it and/or modify it
44
# under the terms of the GNU General Public License version 3, as published
@@ -199,6 +199,28 @@ def test_empty_dir(self, tmp_path, wikisite):
199199
ws.commit()
200200
assert os.path.getsize(config.LOG_PREPROCESADO) == 0
201201

202+
def test_transmit_redirection_score_to_destination(self, mocker, tmp_path, wikisite):
203+
"""Test that extra scores produced while processing a redirection are not discarded."""
204+
ws = wikisite(str(tmp_path))
205+
206+
# mock preprocessor that discards the redirection and transmits the score to destination
207+
omit_redirects = mocker.Mock(return_value=(None, [('destination', 1234)]))
208+
mocker.patch.object(ws, 'preprocessors', [omit_redirects])
209+
210+
# dummy redirection article that will be discarded
211+
article = tmp_path.joinpath('r', 'e', 'd', 'redirection')
212+
article.parent.mkdir(parents=True)
213+
article.touch()
214+
215+
ws.process()
216+
ws.commit()
217+
218+
with open(preprocess.LOG_SCORES_ACCUM, 'rt', encoding='utf-8') as fh:
219+
scores = fh.read()
220+
221+
# real score of redirection is discarded, extra score of destination is saved
222+
assert scores == 'destination|E|1234\n'
223+
202224

203225
class TestPagesSelector(object):
204226
"""Tests for the PagesSelector"""

0 commit comments

Comments
 (0)