-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdriver_load_manifest.py
More file actions
123 lines (105 loc) · 5.72 KB
/
driver_load_manifest.py
File metadata and controls
123 lines (105 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import argparse
import jsonpickle
import os
import angr
from collections import defaultdict
from dataflow_dataset_generation.manifest import Manifest
def main():
parser = argparse.ArgumentParser(description='Load and inspect a manifest file created for a dataset.')
parser.add_argument('--manifest', required=False, help='The file containing the manifest. (This file can be created from driver_creat_manifest.py)')
parser.add_argument('--manifest-entries', required=False, help='The file containing the manifest entries')
parser.add_argument('--uniq-srcs', action='store_true', required=False, default=False, help='Only use unique sources (NOTE: this will disable variation gained via compilation options)')
parser.add_argument('--uniq-bins', action='store_true', required=False, default=False, help='Only use unique binaries')
parser.add_argument('--uniq-funcs', action='store_true', required=False, default=False, help='Only use unique functions')
parser.add_argument('--break-limit', type=int, default=None, required=False, help='Limit the number of manifest entries to add (only applicable when using manifest-entries)')
parser.add_argument('--write-bin-paths', required=False, help='Create a file with the specified name, containing the paths to the binary files of the (possibly filtered) manifest entries.')
args = parser.parse_args()
manifest_file = args.manifest
manifest_entries_file = args.manifest_entries
uniq_srcs = args.uniq_srcs
uniq_bins = args.uniq_bins
uniq_funcs = args.uniq_funcs
break_limit = args.break_limit
write_bin_path_file = args.write_bin_paths
if manifest_file is None and manifest_entries_file is None:
print("You must specify either manifest file or manifest entries file.")
return
if manifest_file is not None and manifest_entries_file is not None:
print("You cannot both specify manifest file and manifest entries file")
return
if manifest_file is not None:
if break_limit is not None:
print("WARN: You specified a break limit, but this can only be used when loading from a manifest-entries file")
print("Loading manifest: {}".format(manifest_file))
with open(manifest_file, 'r') as fd:
manifest = jsonpickle.decode(fd.read())
dataset_path = os.path.dirname(manifest_file)
if manifest_entries_file is not None:
print("Creating manifest...")
manifest = Manifest()
with open(manifest_entries_file, 'r') as fd:
for i, line in enumerate(fd):
if break_limit is not None and i >= break_limit:
break
manifest_entry = jsonpickle.decode(line)
manifest.add_entry(manifest_entry)
dataset_path = os.path.dirname(manifest_entries_file)
if uniq_srcs or uniq_bins or uniq_funcs:
print("Filtering manifest entries on: unique source: {} unique binaries: {} unique funcs: {}".format(uniq_srcs, uniq_bins, uniq_funcs))
new_manifest = Manifest()
num_filtered = 0
srcs_excluded = 0
bins_excluded = 0
funcs_excluded = 0
for entry in manifest.entries:
add = True
if uniq_srcs and entry.src_md5 in new_manifest.src_md5s:
add = False
srcs_excluded += 1
if uniq_bins and entry.bin_md5 in new_manifest.bin_md5s:
add = False
bins_excluded += 1
if uniq_funcs and entry.tgt_func_md5 in new_manifest.func_md5s:
add = False
funcs_excluded += 1
if add:
new_manifest.add_entry(entry)
else:
num_filtered += 1
print("Filtered {} entries ({} src, {} bin, {} func (not mutually-exclusive))".format(num_filtered, srcs_excluded, bins_excluded, funcs_excluded))
manifest = new_manifest
num_entries = len(manifest.entries)
num_srcs = len(manifest.src_names)
num_uniq_srcs = len(manifest.src_md5s)
perc_uniq_srcs = (num_uniq_srcs / num_srcs) * 100.0
num_bins = len(manifest.bin_names)
num_uniq_bins = len(manifest.bin_md5s)
perc_uniq_bins = (num_uniq_bins / num_bins) * 100.0
num_uniq_funcs = len(manifest.func_md5s)
# Since every binary has one function, we can calculate percentage unique functions over binaries
perc_uniq_funcs = (num_uniq_funcs / num_bins) * 100.0
print("Num entries: {}".format(num_entries))
print("Num source files: {} ({} unique, {:.2f}% unique)".format(num_srcs, num_uniq_srcs, perc_uniq_srcs))
print("Num binary files: {} ({} unique, {:.2f}% unique)".format(num_bins, num_uniq_bins, perc_uniq_bins))
print("Uniq funcs: {} ({:.2f}%)".format(len(manifest.func_md5s), perc_uniq_funcs))
print("Compiler versions:")
for compiler_name, compiler_version in manifest.compiler_versions:
print(compiler_name)
print(compiler_version)
if write_bin_path_file is not None:
print("Writing binary paths to file: {}".format(write_bin_path_file))
with open(write_bin_path_file, 'w') as fd:
for entry in manifest.entries:
bin_file_path = os.path.join(dataset_path, 'bin', entry.bin_file_name)
fd.write(bin_file_path + '\n')
num_missing_tgt_write_insns = 0
num_missing_tgt_read_insns = 0
for entry in manifest.entries:
if entry.tgt_write_addr is None:
num_missing_tgt_write_insns += 1
if entry.tgt_read_addr is None:
num_missing_tgt_read_insns += 1
print("Number of entries with no target write address: {}".format(num_missing_tgt_write_insns))
print("Number of entries with no target read address: {}".format(num_missing_tgt_read_insns))
if __name__ == "__main__":
main()