Skip to content

Commit 388da6f

Browse files
Add a tool to automate imports (#119)
* Add a tool to automate imports * Fix lint errors * Fix issues reported by CodeRabbitAI * Fix more lint issues * Provide further safeguards against paths starting with - * Update test cases against -- injection prevention * Ease type hint checkers * Use single quotes instead of double quotes * Fix syntax error * Mark the start of operations in logs * Use single quotes, again * Fix runtime error * Handle KeyboardInterrupt when running scripts * Add suggestions from RhinosF1 * Mark run_scripts() with pragma: no cover * Improve coverage * Use context manager to create the file * pass * Update version.py --------- Co-authored-by: RhinosF1 <[email protected]> Co-authored-by: RhinosF1 <[email protected]>
1 parent 7032397 commit 388da6f

File tree

3 files changed

+321
-1
lines changed

3 files changed

+321
-1
lines changed

miraheze/mediawiki/mwimport.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import os
4+
import shlex
5+
import subprocess
6+
import sys
7+
8+
9+
def parse_args(input_args: list | None = None, check_paths: bool = True) -> argparse.Namespace:
10+
parser = argparse.ArgumentParser(description='A script to automate manual wiki imports')
11+
parser.add_argument(
12+
'--no-log', dest='nolog', action='store_true',
13+
help='Whether or not to disable logging to the server admin log',
14+
)
15+
parser.add_argument(
16+
'--confirm', '--yes', '-y', dest='confirm', action='store_true',
17+
help='Whether or not to skip the initial confirmation prompt',
18+
)
19+
parser.add_argument('--version', help='MediaWiki version to use (automatically detected if not passed)')
20+
parser.add_argument('--xml', help='XML file to import')
21+
parser.add_argument('--username-prefix', help='Interwiki prefix for importing XML')
22+
parser.add_argument('--images', help='Directory of images to import')
23+
parser.add_argument(
24+
'--images-comment', help='The comment passed to importImages.php'
25+
' (example: "Importing images from https://example.com ([[phorge:T1234|T1234]])")',
26+
)
27+
parser.add_argument(
28+
'--search-recursively', action='store_true',
29+
help='Whether or not to pass --search-recursively (check files in subdirectories) to importImages.php',
30+
)
31+
parser.add_argument('wiki', help='Database name of the wiki to import to')
32+
33+
args = parser.parse_args(input_args)
34+
if not args.xml and not args.images:
35+
raise ValueError('--xml and/or --images must be passed')
36+
if args.images and not args.images_comment:
37+
raise ValueError('--images-comment must be passed when importing images')
38+
39+
# This is honestly only for unit testing as I can't really think of a reason
40+
# to disable this check in production
41+
if check_paths:
42+
# This is not meant to be comprehensive, but just to make sure that someone
43+
# doesn't typo a path or so
44+
if args.xml and not os.path.exists(args.xml):
45+
raise ValueError(f'Cannot find XML to import: {repr(args.xml)}')
46+
if args.images and not os.path.exists(args.images):
47+
raise ValueError(f'Cannot find images to import: {repr(args.images)}')
48+
49+
return args
50+
51+
52+
def log(message: str): # pragma: no cover
53+
subprocess.run(
54+
['/usr/local/bin/logsalmsg', message],
55+
check=True,
56+
)
57+
58+
59+
def get_version(wiki: str) -> str: # pragma: no cover
60+
return subprocess.run(
61+
['/usr/local/bin/getMWVersion', wiki],
62+
stdout=subprocess.PIPE,
63+
check=True,
64+
text=True,
65+
).stdout.strip()
66+
67+
68+
def get_scripts(args: argparse.Namespace) -> list[list[str]]:
69+
scripts = []
70+
71+
if args.xml:
72+
script = ['importDump', '--no-updates']
73+
if args.username_prefix:
74+
script.append(f'--username-prefix={args.username_prefix}')
75+
script.extend(['--', args.xml])
76+
scripts.append(script)
77+
78+
if args.images:
79+
script = ['importImages', f'--comment={args.images_comment}']
80+
if args.search_recursively:
81+
script.append('--search-recursively')
82+
script.extend(['--', args.images])
83+
scripts.append(script)
84+
85+
if args.xml:
86+
scripts.append(['rebuildall'])
87+
scripts.append(['initEditCount'])
88+
89+
scripts.append(['initSiteStats', '--update'])
90+
91+
version = args.version or get_version(args.wiki)
92+
scripts = [
93+
# This is a hack to squeeze the --wiki argument after the script name, but before any of the other arguments
94+
# (adding --wiki to every script manually is kinda clutters the whole list since most maintenance scripts
95+
# run on a single wiki, and all of the ones used here also run on a single wiki)
96+
['sudo', '-u', 'www-data', 'php', f'/srv/mediawiki/{version}/maintenance/run.php', script[0], f'--wiki={args.wiki}', *script[1:]] for script in scripts
97+
]
98+
return scripts
99+
100+
101+
def run_scripts(args: argparse.Namespace, scripts: list[list[str]]) -> int: # pragma: no cover
102+
for script in scripts:
103+
print(f'Running {shlex.join(script)}')
104+
if not args.nolog:
105+
print('Logging execution...')
106+
log(f'{shlex.join(script)} (START)')
107+
108+
proc = subprocess.Popen(script)
109+
try:
110+
proc.wait()
111+
except KeyboardInterrupt:
112+
proc.terminate()
113+
proc.wait()
114+
115+
if not args.nolog:
116+
print('Logging execution end...')
117+
log(f'{shlex.join(script)} (END - exit={proc.returncode})')
118+
119+
if proc.returncode:
120+
return proc.returncode
121+
122+
return 0
123+
124+
125+
def run(): # pragma: no cover
126+
try:
127+
args = parse_args()
128+
except ValueError as e:
129+
print(f'{type(e).__name__}: {e}', file=sys.stderr)
130+
return 1
131+
132+
scripts = get_scripts(args)
133+
134+
print('Will run:')
135+
for script in scripts:
136+
print(f'* {shlex.join(script)}')
137+
if not args.confirm and input("Type 'Y' to confirm: ").upper() != 'Y':
138+
return 1
139+
140+
if not args.nolog:
141+
print('Logging start...')
142+
log(f'Starting import for {args.wiki} (XML: {args.xml}; Images: {args.images}) (START)')
143+
144+
return_code = run_scripts(args, scripts)
145+
146+
if not args.nolog:
147+
print('Logging end...')
148+
log(f'Finished import for {args.wiki} (XML: {args.xml}; Images: {args.images}) (END - exit={return_code})')
149+
150+
return return_code
151+
152+
153+
if __name__ == '__main__': # pragma: no cover
154+
sys.exit(run())

miraheze/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = '0.0.7'
1+
VERSION = '0.0.8'

tests/test_mwimport.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import os
2+
import tempfile
3+
import pytest
4+
from miraheze.mediawiki import mwimport
5+
6+
7+
def test_parse_args_xml_images():
8+
args = mwimport.parse_args([
9+
'--xml=dump.xml',
10+
'--images=images',
11+
'--images-comment=Importing from https://example.com',
12+
'examplewiki',
13+
], False)
14+
assert args.xml == 'dump.xml'
15+
assert args.images == 'images'
16+
assert args.images_comment == 'Importing from https://example.com'
17+
assert args.wiki == 'examplewiki'
18+
19+
20+
def test_parse_args_images_recursively():
21+
args = mwimport.parse_args([
22+
'--images=images',
23+
'--search-recursively',
24+
'--images-comment=Importing from https://example.com',
25+
'examplewiki',
26+
], False)
27+
assert args.images == 'images'
28+
assert args.search_recursively
29+
assert args.images_comment == 'Importing from https://example.com'
30+
assert args.wiki == 'examplewiki'
31+
32+
33+
def test_parse_args_no_log_and_confirm_and_version():
34+
args = mwimport.parse_args([
35+
'--no-log',
36+
'--confirm',
37+
'--version=0.42',
38+
'--xml=dump.xml',
39+
'examplewiki',
40+
], False)
41+
assert args.nolog
42+
assert args.confirm
43+
assert args.version == '0.42'
44+
45+
46+
def test_parse_args_username_prefix():
47+
args = mwimport.parse_args([
48+
'--xml=dump.xml',
49+
'--username-prefix=w',
50+
'examplewiki',
51+
], False)
52+
assert args.username_prefix == 'w'
53+
54+
55+
def test_parse_args_need_xml_or_images():
56+
with pytest.raises(ValueError, match='--xml and/or --images must be passed'):
57+
mwimport.parse_args([
58+
'examplewiki',
59+
])
60+
61+
62+
def test_parse_args_images_need_comment():
63+
with pytest.raises(ValueError, match='--images-comment must be passed when importing images'):
64+
mwimport.parse_args([
65+
'--images=images',
66+
'examplewiki',
67+
])
68+
69+
70+
def test_parse_args_missing_xml():
71+
with pytest.raises(ValueError, match="Cannot find XML to import: '/dev/no xml'"):
72+
mwimport.parse_args([
73+
'--xml=/dev/no xml',
74+
'examplewiki',
75+
])
76+
77+
78+
def test_parse_args_missing_images():
79+
with pytest.raises(ValueError, match="Cannot find images to import: '/dev/no images'"):
80+
mwimport.parse_args([
81+
'--images=/dev/no images',
82+
'--images-comment=Importing from https://example.com',
83+
'examplewiki',
84+
])
85+
86+
87+
def test_parse_args_both_xml_images_exists():
88+
with tempfile.TemporaryDirectory() as tempdir:
89+
xml = os.path.join(tempdir, 'dump.xml')
90+
with open(xml, 'w'):
91+
# Intentionally empty since we just need to create the file, and it being empty can do
92+
pass
93+
94+
images = os.path.join(tempdir, 'images')
95+
os.mkdir(images)
96+
97+
args = mwimport.parse_args([
98+
f'--xml={xml}',
99+
f'--images={images}',
100+
'--images-comment=Importing from https://example.com',
101+
'examplewiki',
102+
])
103+
104+
assert args.xml == xml
105+
assert args.images == images
106+
107+
108+
def test_get_scripts_xml_images():
109+
args = mwimport.parse_args([
110+
'--version=0.42',
111+
'--xml=dump.xml',
112+
'--images=images',
113+
'--images-comment=Importing from https://example.com',
114+
'examplewiki',
115+
], False)
116+
scripts = mwimport.get_scripts(args)
117+
expected = [
118+
['importDump', '--no-updates', '--', 'dump.xml'],
119+
['importImages', '--comment=Importing from https://example.com', '--', 'images'],
120+
['rebuildall'],
121+
['initEditCount'],
122+
['initSiteStats', '--update'],
123+
]
124+
expected = [
125+
['sudo', '-u', 'www-data', 'php', '/srv/mediawiki/0.42/maintenance/run.php', script[0], '--wiki=examplewiki', *script[1:]] for script in expected
126+
]
127+
assert scripts == expected
128+
129+
130+
def test_get_scripts_username_prefix():
131+
args = mwimport.parse_args([
132+
'--version=0.42',
133+
'--xml=dump.xml',
134+
'--username-prefix=w',
135+
'examplewiki',
136+
], False)
137+
scripts = mwimport.get_scripts(args)
138+
expected = [
139+
['importDump', '--no-updates', '--username-prefix=w', '--', 'dump.xml'],
140+
['rebuildall'],
141+
['initEditCount'],
142+
['initSiteStats', '--update'],
143+
]
144+
expected = [
145+
['sudo', '-u', 'www-data', 'php', '/srv/mediawiki/0.42/maintenance/run.php', script[0], '--wiki=examplewiki', *script[1:]] for script in expected
146+
]
147+
assert scripts == expected
148+
149+
150+
def test_get_scripts_search_recursively():
151+
args = mwimport.parse_args([
152+
'--version=0.42',
153+
'--images=images',
154+
'--images-comment=Importing from https://example.com',
155+
'--search-recursively',
156+
'examplewiki',
157+
], False)
158+
scripts = mwimport.get_scripts(args)
159+
expected = [
160+
['importImages', '--comment=Importing from https://example.com', '--search-recursively', '--', 'images'],
161+
['initSiteStats', '--update'],
162+
]
163+
expected = [
164+
['sudo', '-u', 'www-data', 'php', '/srv/mediawiki/0.42/maintenance/run.php', script[0], '--wiki=examplewiki', *script[1:]] for script in expected
165+
]
166+
assert scripts == expected

0 commit comments

Comments
 (0)