Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions oletools/ftguess.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,8 +866,6 @@ def main():
python_version = '%d.%d.%d' % sys.version_info[0:3]
print ('ftguess %s on Python %s - http://decalage.info/python/oletools' %
(__version__, python_version))
print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
print ('Please report any issue at https://github.com/decalage2/oletools/issues')
print ('')

DEFAULT_LOG_LEVEL = "warning" # Default log level
Expand Down
3 changes: 0 additions & 3 deletions oletools/mraptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,6 @@ def main():
# Print help if no arguments are passed
if len(args) == 0:
print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
print('This is work in progress, please report issues at %s' % URL_ISSUES)
print(__doc__)
parser.print_help()
print('\nAn exit code is returned based on the analysis result:')
for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
Expand All @@ -263,7 +261,6 @@ def main():

# print banner with version
print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
print('This is work in progress, please report issues at %s' % URL_ISSUES)

log_helper.enable_logging(level=options.loglevel)
# enable logging in the modules:
Expand Down
2 changes: 0 additions & 2 deletions oletools/msodde.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,6 @@

# banner to be printed at program start
BANNER = """msodde %s - http://decalage.info/python/oletools
THIS IS WORK IN PROGRESS - Check updates regularly!
Please report any issue at https://github.com/decalage2/oletools/issues
""" % __version__

# === LOGGING =================================================================
Expand Down
3 changes: 0 additions & 3 deletions oletools/oleid.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,9 +513,6 @@ def main():
"""Called when running this file as script. Shows all info on input file."""
# print banner with version
print('oleid %s - http://decalage.info/oletools' % __version__)
print('THIS IS WORK IN PROGRESS - Check updates regularly!')
print('Please report any issue at '
'https://github.com/decalage2/oletools/issues')
print('')

parser = argparse.ArgumentParser(description=__doc__)
Expand Down
2 changes: 0 additions & 2 deletions oletools/olemeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ def process_ole(ole):
def main():
# print banner with version
print('olemeta %s - http://decalage.info/python/oletools' % __version__)
print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
print ('Please report any issue at https://github.com/decalage2/oletools/issues')

usage = 'usage: olemeta [options] <filename> [filename2 ...]'
parser = optparse.OptionParser(usage=usage)
Expand Down
3 changes: 0 additions & 3 deletions oletools/oleobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,6 @@ def main(cmd_line_args=None):
# print banner with version
ensure_stdout_handles_unicode()
print('oleobj %s - http://decalage.info/oletools' % __version__)
print('THIS IS WORK IN PROGRESS - Check updates regularly!')
print('Please report any issue at '
'https://github.com/decalage2/oletools/issues')
print('')

usage = 'usage: %(prog)s [options] <filename> [filename2 ...]'
Expand Down
2 changes: 0 additions & 2 deletions oletools/oletimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ def process_ole(ole):
def main():
# print banner with version
print('oletimes %s - http://decalage.info/python/oletools' % __version__)
print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
print ('Please report any issue at https://github.com/decalage2/oletools/issues')

usage = 'usage: oletimes [options] <filename> [filename2 ...]'
parser = optparse.OptionParser(usage=usage)
Expand Down
55 changes: 54 additions & 1 deletion oletools/olevba.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@
import email.feedparser
import string # for printable
import json # for json output mode (argument --json)
from random import random

# import lxml or ElementTree for XML parsing:
try:
Expand Down Expand Up @@ -2372,7 +2373,7 @@ def detect_vba_strings(vba_code):
# Otherwise, start and end offsets are incorrect.
vba_code = vba_code.expandtabs()
# Split the VBA code line by line to avoid MemoryError on large scripts:
for vba_line in vba_code.splitlines():
for vba_line in split_vba_code(vba_code):
for tokens, start, end in vba_expr_str.scanString(vba_line):
encoded = vba_line[start:end]
decoded = tokens[0]
Expand All @@ -2393,6 +2394,50 @@ def detect_vba_strings(vba_code):
return results


#: max length of vba code lines that is analyzed in one go. Bigger code chunks
#: are split. Reduce this if you run into memory trouble
MAX_CODE_LINE_LEN = 32000
MAX_CODE_LINE_OVERLAP = 500


def split_vba_code(vba_code):
""" Split vba code (or what is suspected to be one) into manageable parts

Tries a regular :py:meth:`str.splitlines`, and if that fails (e.g. in case
of non-vba-code in text files or mis-interpreted rtf) splits the string at
random into large overlapping chunks.

This prevents MemoryErrors in the following parsing of that line, most of
all if deobfuscating.
"""
if MAX_CODE_LINE_LEN < 10:
raise ValueError('unreasonably small value for max code line length')
if MAX_CODE_LINE_OVERLAP < 0:
raise ValueError('unreasonably small value for max code line overlap')
if MAX_CODE_LINE_OVERLAP > MAX_CODE_LINE_LEN:
raise ValueError('overlap must be smaller than chunks')
HALF_LEN = int(MAX_CODE_LINE_LEN//2)
HALF_OVERLAP = int(MAX_CODE_LINE_OVERLAP//2)

for line in vba_code.splitlines():
line_len = len(line)
mean_idx_add = 1.5 * HALF_LEN - 1.5 * HALF_OVERLAP
n_chunks = int(line_len / mean_idx_add) # only an approximation
start_idx = 0
chunk_idx = 0
while (line_len - start_idx) > MAX_CODE_LINE_LEN:
chunk_idx += 1
chunk_size = HALF_LEN + int(random() * HALF_LEN)
log.debug('splitting line of size {0}, yielding chunk of size {1},'
' starting at {2} (number {3} of approx. {4})'
.format(line_len, chunk_size, start_idx, chunk_idx,
n_chunks))
yield line[start_idx:start_idx+chunk_size]
overlap = HALF_OVERLAP + int(random() * HALF_OVERLAP)
start_idx += max(1, chunk_size - overlap)
yield line[start_idx:] # yield the rest


def json2ascii(json_obj, encoding='utf8', errors='replace'):
"""
ensure there is no unicode in json and all strings are safe to decode
Expand Down Expand Up @@ -2792,6 +2837,11 @@ def __init__(self, filename, data=None, container=None, relaxed=True, encoding=D
# It must start with "ID" in uppercase, no whitespace or newline allowed before by Excel:
if data.startswith(b'ID'):
self.open_slk(data)
# check whether this is mso data
if is_mso_file(data):
log.debug('Found ActiveMime header, decompressing MSO container')
ole_data = mso_file_extract(data)
self.open_ole(ole_data)
# Check if this is a plain text VBA or VBScript file:
# To avoid scanning binary files, we simply check for some control chars:
if self.type is None and b'\x00' not in data:
Expand Down Expand Up @@ -3569,6 +3619,9 @@ def extract_macros(self):
log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
log.debug('Traceback:', exc_info=True)
# do not raise the error, as it is unlikely to be a compressed macro stream
# instead, yield the code as-is, maybe it just was not compressed
log.debug('Try analyzing uncompressed code')
yield (self.filename, d.name, d.name, compressed_code)
if self.xlm_macros:
vba_code = ''
for line in self.xlm_macros:
Expand Down
2 changes: 0 additions & 2 deletions oletools/rtfobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,8 +1011,6 @@ def main():
python_version = '%d.%d.%d' % sys.version_info[0:3]
print ('rtfobj %s on Python %s - http://decalage.info/python/oletools' %
(__version__, python_version))
print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
print ('Please report any issue at https://github.com/decalage2/oletools/issues')
print ('')

DEFAULT_LOG_LEVEL = "warning" # Default log level
Expand Down
4 changes: 4 additions & 0 deletions tests/msodde/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def test_invalid_text(self):
""" check that text file argument leads to non-zero exit status """
self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in os.environ and os.environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_encrypted(self):
"""
check that encrypted files lead to non-zero exit status
Expand Down Expand Up @@ -119,6 +121,8 @@ def do_test_validity(self, filename, expect_error=None):
class TestErrorOutput(unittest.TestCase):
"""msodde does not specify error by return code but text output."""

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in os.environ and os.environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_crypt_output(self):
"""Check for helpful error message when failing to decrypt."""
for suffix in 'doc', 'docm', 'docx', 'ppt', 'pptm', 'pptx', 'xls', \
Expand Down
18 changes: 18 additions & 0 deletions tests/oleobj/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import unittest
from tempfile import mkdtemp
from shutil import rmtree
from os import listdir, environ
from os.path import join, isfile
from hashlib import md5
from glob import glob
Expand Down Expand Up @@ -91,10 +92,14 @@ def tearDown(self):
elif self.temp_dir:
rmtree(self.temp_dir)

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in environ and environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_md5(self):
""" test all files in oleobj test dir """
self.do_test_md5(['-d', self.temp_dir])

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in environ and environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_md5_args(self):
"""
test that oleobj can be called with -i and -v
Expand Down Expand Up @@ -158,6 +163,19 @@ def test_non_streamed(self):
return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file,
only_run_every=4)

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in environ and environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_nodump(self):
"""Ensure that with --nodump nothing is ever written to disc."""
data_dir = join(DATA_BASE_DIR, 'oleobj')
for sample_name, _, _ in SAMPLES:
args = ['-d', self.temp_dir, '--nodump', join(data_dir, sample_name)]
call_and_capture('oleobj', args,
accept_nonzero_exit=True)
temp_dir_contents = listdir(self.temp_dir)
if temp_dir_contents:
self.fail('Found file in temp dir despite "--nodump": {}'.format(temp_dir_contents))


class TestSaneFilenameCreation(unittest.TestCase):
""" Test sanitization / creation of sane filenames """
Expand Down
4 changes: 3 additions & 1 deletion tests/olevba/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def test_rtf_behaviour(self):
raise self.fail('Found "warn" in output line: "{}"'
.format(line.rstrip()))

@unittest.skipIf('OLETOOLS_TEST_SKIP_SLOW' in os.environ and os.environ['OLETOOLS_TEST_SKIP_SLOW'] == '1',
"Skip slower tests")
def test_crypt_return(self):
"""
Test that encrypted files give a certain return code.
Expand Down Expand Up @@ -105,7 +107,7 @@ def test_crypt_return(self):
.format(ret_code, args + [filename, ]))

# test only first file with all arg combinations, others just
# without arg (test takes too long otherwise
# without arg (test takes too long otherwise)
ADD_ARGS = ([], )

def test_xlm(self):
Expand Down