Skip to content

Commit 1805b3b

Browse files
authored
Merge pull request #12 from PLPeeters/bugfix/row-regex-freeze
Fix freeze when reading corrupt XLSX files Thanks again!
2 parents 5f8a5ae + 8fe1f42 commit 1805b3b

File tree

5 files changed

+19
-17
lines changed

5 files changed

+19
-17
lines changed

CONTRIBUTORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
In alphabetical order:
77

88
* `Mark Skelton <https://github.com/mtskelton>`_
9+
* `Pierre-Louis Peeters <https://github.com/PLPeeters>`_

changelog.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ name: pyexcel-xlsxr
22
organisation: pyexcel
33
releases:
44
- changes:
5+
- action: Fixed
6+
details:
7+
- 'Fix freeze when parsing certain corrupt XLSX files'
8+
date: 31.10.2025
9+
version: 0.6.3
510
- action: Fixed
611
details:
712
- 'Fix reading of files with more than 26 columns'

pyexcel_xlsxr/messy_xlsx.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,12 @@
1010
STYLE_FILENAME = "xl/styles.xml"
1111
SHARED_STRING = "xl/sharedStrings.xml"
1212
WORK_BOOK = "xl/workbook.xml"
13-
SHEET_MATCHER = "xl/worksheets/(work)?sheet([0-9]+)?.xml"
14-
SHEET_INDEX_MATCHER = "xl/worksheets/(work)?sheet(([0-9]+)?).xml"
15-
XLSX_ROW_MATCH = re.compile(rb".*?(<row.*?<\/.*?row>).*?", re.MULTILINE)
16-
NUMBER_FMT_MATCHER = re.compile(
17-
rb".*?(<numFmts.*?<\/.*?numFmts>).*?", re.MULTILINE
18-
)
19-
XFS_FMT_MATCHER = re.compile(
20-
rb".*?(<cellXfs.*?<\/.*?cellXfs>).*?", re.MULTILINE
21-
)
22-
SHEET_FMT_MATCHER = re.compile(rb".*?(<sheet .*?\/>).*?", re.MULTILINE)
23-
DATE_1904_MATCHER = re.compile(rb".*?(<workbookPr.*?\/>).*?", re.MULTILINE)
13+
SHEET_MATCHER = re.compile(r"xl/worksheets/(?:work)?sheet([0-9]+)?.xml")
14+
XLSX_ROW_MATCH = re.compile(rb"<row\b[^>]*>.*?</row>", re.DOTALL)
15+
NUMBER_FMT_MATCHER = re.compile(rb"<numFmts\b[^>]*>.*?</numFmts>", re.DOTALL)
16+
XFS_FMT_MATCHER = re.compile(rb"<cellXfs\b[^>]*>.*?</cellXfs>", re.DOTALL)
17+
SHEET_FMT_MATCHER = re.compile(rb"<sheet\b.*?/>", re.DOTALL)
18+
DATE_1904_MATCHER = re.compile(rb"<workbookPr\b.*?/>", re.DOTALL)
2419
# "xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"
2520
# But it not used for now
2621
X14AC_NAMESPACE = b'xmlns:x14ac="http://not.used.com/"'
@@ -159,14 +154,15 @@ def find_sheets(file_list):
159154
return [
160155
sheet_file
161156
for sheet_file in file_list
162-
if re.match(SHEET_MATCHER, sheet_file)
157+
if SHEET_MATCHER.match(sheet_file)
163158
]
164159

165160

166161
def get_sheet_index(file_name):
167-
if re.match(SHEET_MATCHER, file_name):
168-
result = re.search(SHEET_INDEX_MATCHER, file_name)
169-
index = int(result.group(3)) if result.group(3) else 1
162+
sheet_match = SHEET_MATCHER.match(file_name)
163+
164+
if sheet_match:
165+
index = int(sheet_match.group(1)) if sheet_match.group(1) else 1
170166
return index - 1
171167
else:
172168
raise Exception("Invalid sheet file name")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def filter_out_test_code(file_handle):
196196
keywords=KEYWORDS,
197197
python_requires=PYTHON_REQUIRES,
198198
extras_require=EXTRAS_REQUIRE,
199-
tests_require=["nose"],
199+
tests_require=["pytest~=8.4"],
200200
install_requires=INSTALL_REQUIRES,
201201
packages=PACKAGES,
202202
include_package_data=True,

tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
nose
1+
pytest~=8.4
22
mock;python_version<"3"
33
codecov
44
coverage

0 commit comments

Comments
 (0)