Skip to content

Commit af37b4a

Browse files
committed
change ordering of requests to progress by bin resolution level instead of term presentation
1 parent 78673b6 commit af37b4a

1 file changed

Lines changed: 37 additions & 12 deletions

File tree

  • python/interpret-core/interpret/glassbox/_ebm

python/interpret-core/interpret/glassbox/_ebm/_bin.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,19 @@ def eval_terms(X, n_samples, feature_names_in, feature_types_in, bins, term_feat
3535

3636
# prior to calling this function, call remove_extra_bins which will eliminate extra work in this function
3737

38-
# this generator function returns data in whatever order it thinks is most efficient. Normally for
38+
# This generator function returns data as the feature data within terms gets read. Normally for
3939
# mains it returns them in order, but pairs will be returned as their data completes and they can
4040
# be mixed in with mains. So, if we request data for [(0), (1), (2), (3), (4), (1, 3)] the return sequence
41-
# could be [(0), (1), (2), (3), (1, 3), (4)]. More complicated pair/triples return even more randomized ordering.
41+
# would be [(0), (1), (2), (3), (1, 3), (4)]. More complicated pair/triples return even more randomized ordering.
4242
# For additive models the results can be processed in any order, so this imposes no penalities on us.
4343

4444
_log.info("eval_terms")
4545

46+
# Flatten the term_features array to make one entry per feature within each term
47+
# each item in the list contains placeholders for the binned array that we need
48+
# to complete the term. We fill these with None initially. At the end of the array
49+
# is the term_idx. So it looks approximately like this:
50+
# eg: [[None, 0], [None, 1], [None, 2], [None, None, 3], [None, None, None, 4]]
4651
all_requirements = list(
4752
chain.from_iterable(
4853
map(
@@ -59,25 +64,37 @@ def eval_terms(X, n_samples, feature_names_in, feature_types_in, bins, term_feat
5964
),
6065
)
6166

67+
# get the per-feature (per-term) binning for all levels
68+
# eg: [[{'a': 0, 'b': 1}], [[1, 2, 3, 4, 5], [1, 3, 5]]]
6269
all_bin_levels1, all_bin_levels2 = tee(
6370
map(bins.__getitem__, chain.from_iterable(term_features)), 2
6471
)
6572

73+
# get a per-feature per-term list of indexes that will be used into the bins list
74+
# eg: [0, 1, 0, 0, 1, 2]
75+
levels = list(
76+
map(
77+
min,
78+
zip(
79+
map((-1).__add__, map(len, all_bin_levels2)),
80+
map((-2).__add__, map(len, all_requirements)),
81+
),
82+
)
83+
)
84+
85+
# index into the bins list per-feature per-terms to get the actual binning
86+
# eg: [{'a': 0, 'b': 1}, [3.5, 6.5, 8.5], {'x': 0, 'y': 1}]
6687
feature_bins1, feature_bins2 = tee(
6788
map(
6889
getitem,
6990
all_bin_levels1,
70-
map(
71-
min,
72-
zip(
73-
map((-1).__add__, map(len, all_bin_levels2)),
74-
map((-2).__add__, map(len, all_requirements)),
75-
),
76-
),
91+
levels,
7792
),
7893
2,
7994
)
8095

96+
# replace the continuous bins with None values
97+
# eg: [{'a': 0, 'b': 1}, None, {'x': 0, 'y': 1}]
8198
all_feature_bins = list(
8299
map(
83100
getitem,
@@ -86,15 +103,18 @@ def eval_terms(X, n_samples, feature_names_in, feature_types_in, bins, term_feat
86103
)
87104
)
88105

106+
# generate the list of requests which consist of keys (feature_idx, id(categories))
107+
# and values (bin_level_idx, categories)
108+
# where categories is None for continous features
109+
# eg: {(0, id({'a': 0, 'b': 1})): (1, {'a': 0, 'b': 1}), ...}
89110
requests = dict(
90111
zip(
91112
zip(chain.from_iterable(term_features), map(id, all_feature_bins)),
92-
zip(count(), all_feature_bins),
113+
zip(levels, all_feature_bins),
93114
)
94115
)
95116

96-
# Order requests by (feature_idx, term order) for implementation independence.
97-
# Since term_features is sorted by # dimensions, this also orders by # dimensions.
117+
# Order requests by (feature_idx, category_level) for implementation independence.
98118
requests = sorted(
99119
zip(
100120
map(_itemgetter0, requests.keys()),
@@ -105,12 +125,17 @@ def eval_terms(X, n_samples, feature_names_in, feature_types_in, bins, term_feat
105125

106126
request_feature_idxs = list(map(_itemgetter0, requests))
107127

128+
# the request keys that we expect back
108129
keys1, keys2 = tee(
109130
zip(chain.from_iterable(term_features), map(id, all_feature_bins)), 2
110131
)
111132

112133
waiting = {}
113134
# sum is used to iterate outside the interpreter. The result is not used.
135+
# make a fast lookup so that we can determine which terms are affected
136+
# by the completion of a feature. The value returned from the key
137+
# has space to store the result to accumulate binned features for higher
138+
# order interactions that require more than one feature.
114139
sum(
115140
map(
116141
truth,

0 commit comments

Comments
 (0)