The canonical_target_name() function in utils.py accepts 4 parameters, but only 1 parameter is passed to the call to canonical_target_name() in parse_all.py.
|
cont = { |
|
'label': 'contains', # also stored as 'type' |
|
# target_names (list), cont_names (list) |
|
'target_names': [canonical_target_name(ex[0]['word'])], |
|
'cont_names': [canonical_name(ex[1]['word'])], |
|
# target_ids (list), cont_ids (list) |
|
# - p_id prepended in indexer.py |
|
'target_ids': ['%s_%d_%d' % (ex[0]['ner'].lower(), |
|
ex[0]['characterOffsetBegin'], |
|
ex[0]['characterOffsetEnd'])], |
|
'cont_ids': ['%s_%d_%d' % (ex[1]['ner'].lower(), |
|
ex[1]['characterOffsetBegin'], |
|
ex[1]['characterOffsetEnd'])], |
|
# excerpt_t (sentence) |
|
'sentence': ' '.join([t['originalText'] for \ |
|
t in ex[2]['tokens']]), |
|
# source: 'corenlp' (later, change to 'jsre') |
|
'source': 'corenlp', |
|
} |
|
def canonical_target_name(name, id, targets, aliases): |
|
""" |
|
Gets canonical target name |
|
:param name - name whose canonical name is to be looked up |
|
:return canonical name |
|
""" |
|
name = name.strip() |
|
# Look up 'name' in the aliases; if found, replace with its antecedent |
|
# Note: this is super permissive. Exact match on id is safe, |
|
# but we're also allowing any exact-text match with any other |
|
# known target name. |
|
all_targets = [t['annotation_id_s'] for t in targets |
|
if t['name'] == name] |
|
name_aliases = [a['arg2_s'] for a in aliases |
|
if ((a['arg1_s'] == id) or |
|
(a['arg1_s'] in all_targets))] |
|
if len(name_aliases) > 0: |
|
# Ideally there is only one; let's use the first one |
|
can_name = [t['name'] for t in targets \ |
|
if t['annotation_id_s'] == name_aliases[0]] |
|
print('Mapping <%s> to <%s>' % (name, can_name[0])) |
|
name = can_name[0] |
|
|
|
return re.sub(r"[\s_-]+", " ", name).title().replace(' ', '_') |
The
canonical_target_name()function inutils.pyaccepts 4 parameters, but only 1 parameter is passed to the call tocanonical_target_name()inparse_all.py.parser-indexer-py/src/parserindexer/parse_all.py
Lines 140 to 158 in 3f4a084
parser-indexer-py/src/parserindexer/utils.py
Lines 138 to 161 in 3f4a084