Skip to content

Commit 135ce24

Browse files
committed
Extract authors/contributors as objects with name and orcid.
1 parent bf9ac43 commit 135ce24

File tree

5 files changed

+116
-9
lines changed

5 files changed

+116
-9
lines changed

lib/tess/rdf/event_extractor.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def extract_params
2222

2323
params[:capacity] = extract_value(RDF::Vocab::SCHEMA.maximumAttendeeCapacity)
2424

25-
contact = extract_person(RDF::Vocabulary::Term.new('http://schema.org/contact', attributes: {}))
25+
contact = extract_contact(RDF::Vocabulary::Term.new('http://schema.org/contact', attributes: {}))
2626
params[:contact] = [contact[:name], contact[:email]].compact.join(' - ') if contact
2727

2828
legacy_topics = extract_values(RDF::Vocabulary::Term.new('http://schema.org/topic', attributes: {}))

lib/tess/rdf/extraction.rb

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,23 @@ def extract_names(predicate, subject: resource)
105105
[:thing, RDF::Vocab::SCHEMA.name, :name]).map { |r| r[:name] }.compact.uniq.sort
106106
end
107107

108-
def extract_person(predicate, subject: resource)
108+
def extract_people(predicate, subject: resource)
109+
query([subject, predicate, :person],
110+
[:person, RDF::Vocab::SCHEMA.name, :name],
111+
[:person, RDF::Vocab::SCHEMA.identifier, :identifier, { optional: true }]).map do |n|
112+
p = { name: n[:name] }
113+
# Check `identifier` and `@id` for ORCID
114+
[:person, :identifier].each do |attr|
115+
if n[attr]
116+
match = n[attr].match(/(\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[\dx])/i)
117+
p[:orcid] = match[1] if match
118+
end
119+
end
120+
p
121+
end.compact
122+
end
123+
124+
def extract_contact(predicate, subject: resource)
109125
query([subject, predicate, :person],
110126
[:person, RDF::Vocab::SCHEMA.name, :name, { optional: true }],
111127
[:person, RDF::Vocab::SCHEMA.email, :email, { optional: true }]).first

lib/tess/rdf/material_extractor.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ def extract_params
2020
params[:date_published] = extract_value(RDF::Vocab::SCHEMA.datePublished)
2121
params[:status] = extract_value(RDF::Vocab::SCHEMA.creativeWorkStatus)
2222

23-
params[:authors] = (extract_names(RDF::Vocab::SCHEMA.author) | extract_names(RDF::Vocab::SIOC.has_creator)).sort
24-
params[:contributors] = extract_names(RDF::Vocab::SCHEMA.contributor)
23+
params[:authors] = (extract_people(RDF::Vocab::SCHEMA.author) | extract_names(RDF::Vocab::SIOC.has_creator).sort)
24+
params[:contributors] = extract_people(RDF::Vocab::SCHEMA.contributor)
2525
params[:target_audience] = extract_audience
2626
params[:resource_type] = extract_values(RDF::Vocab::SCHEMA.learningResourceType)
2727
params[:external_resources] = extract_mentions

test/extraction_test.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,13 @@ class ExtractionTest < Test::Unit::TestCase
8484
refute params.key?(:scientific_topic_names)
8585
refute params.key?(:scientific_topic_uris)
8686
assert_equal ["assembly"], params[:keywords]
87-
assert_equal ["Helena Rasche", "Saskia Hiltemann", "Simon Gladman"], params[:authors]
87+
people = [{ name: 'Simon Gladman'},
88+
{ name: 'Helena Rasche', orcid: '0000-0001-9760-8992'},
89+
{ name: 'Saskia Hiltemann', orcid: '0000-0003-3803-468X'}].sort_by { |p| p[:name] }
90+
assert_equal people, params[:authors].sort_by { |p| p[:name] }
8891
assert_equal ["Students"], params[:target_audience]
8992
assert_equal ["hands-on tutorial"], params[:resource_type]
90-
assert_equal ["Helena Rasche", "Saskia Hiltemann", "Simon Gladman"], params[:contributors]
93+
assert_equal people, params[:contributors].sort_by { |p| p[:name] }
9194
assert_equal "Beginner", params[:difficulty_level]
9295
end
9396

@@ -107,10 +110,12 @@ class ExtractionTest < Test::Unit::TestCase
107110
refute params.key?(:scientific_topic_names)
108111
assert_equal ["http://edamontology.org/topic_3174"], params[:scientific_topic_uris]
109112
refute params.key?(:keywords)
110-
assert_equal ["Bérénice Batut", "Saskia Hiltemann"], params[:authors]
113+
people = [{ name: 'Bérénice Batut', orcid: '0000-0001-9852-1987'},
114+
{ name: 'Saskia Hiltemann', orcid: '0000-0003-3803-468X'}].sort_by { |p| p[:name] }
115+
assert_equal people, params[:authors].sort_by { |p| p[:name] }
111116
assert_equal ["Students"], params[:target_audience]
112117
assert_equal ["slides"], params[:resource_type]
113-
assert_equal ["Bérénice Batut", "Saskia Hiltemann"], params[:contributors]
118+
assert_equal people, params[:contributors].sort_by { |p| p[:name] }
114119
assert params[:node_names].include?('Belgium')
115120
end
116121

@@ -285,7 +290,7 @@ class ExtractionTest < Test::Unit::TestCase
285290
assert_equal ['ELIXIR RIR', 'BridgeDb'], params[:keywords]
286291
assert_equal 'https://bioconductor.org/packages/release/bioc/vignettes/BridgeDbR/inst/doc/AGPL-3', params[:licence]
287292
assert_equal '1.17.5', params[:version]
288-
assert_equal ['Egon Willighagen'], params[:authors]
293+
assert_equal [{ name: 'Egon Willighagen', orcid: '0000-0001-7542-0286' }], params[:authors]
289294
end
290295

291296
test 'extract event from legacy Edinburgh Genomics Event markup' do

test/field_test.rb

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,92 @@ class FieldTest < Test::Unit::TestCase
280280
assert_equal 'de', course_instance_extractor(json).send(:extract_language)
281281
end
282282

283+
test 'extract authors' do
284+
json = %(
285+
[{
286+
"@context": "https://schema.org/",
287+
"@type": "LearningResource",
288+
"http://purl.org/dc/terms/conformsTo": {
289+
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
290+
"@type": "CreativeWork"
291+
},
292+
"name": "Dummy Material",
293+
"author": {
294+
"@type": "Person",
295+
"@id": "https://orcid.org/0000-0001-9062-6303",
296+
"name": "Patricia Palagi"
297+
}
298+
}])
299+
authors = learning_resource_extractor(json).send(:extract_people, RDF::Vocab::SCHEMA.author)
300+
assert_equal 1, authors.length
301+
assert_equal '0000-0001-9062-6303', authors.first[:orcid]
302+
assert_equal 'Patricia Palagi', authors.first[:name]
303+
304+
json = %(
305+
[{
306+
"@context": "https://schema.org/",
307+
"@type": "LearningResource",
308+
"http://purl.org/dc/terms/conformsTo": {
309+
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
310+
"@type": "CreativeWork"
311+
},
312+
"name": "Dummy Material",
313+
"author": {
314+
"@type": "Person",
315+
"@id": "#aperson",
316+
"name": "Patricia Palagi",
317+
"identifier": "https://orcid.org/0000-0001-9062-6303"
318+
}
319+
}])
320+
authors = learning_resource_extractor(json).send(:extract_people, RDF::Vocab::SCHEMA.author)
321+
assert_equal 1, authors.length
322+
assert_equal '0000-0001-9062-6303', authors.first[:orcid]
323+
assert_equal 'Patricia Palagi', authors.first[:name]
324+
325+
json = %(
326+
[{
327+
"@context": "https://schema.org/",
328+
"@type": "LearningResource",
329+
"http://purl.org/dc/terms/conformsTo": {
330+
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
331+
"@type": "CreativeWork"
332+
},
333+
"name": "Dummy Material",
334+
"author": {
335+
"@type": "Person",
336+
"@id": "#aperson",
337+
"name": "Thomas B. Hickey",
338+
"identifier": " 0000-0002-1694-233X "
339+
}
340+
}])
341+
authors = learning_resource_extractor(json).send(:extract_people, RDF::Vocab::SCHEMA.author)
342+
assert_equal 1, authors.length
343+
assert_equal '0000-0002-1694-233X', authors.first[:orcid]
344+
assert_equal 'Thomas B. Hickey', authors.first[:name]
345+
346+
347+
json = %(
348+
[{
349+
"@context": "https://schema.org/",
350+
"@type": "LearningResource",
351+
"http://purl.org/dc/terms/conformsTo": {
352+
"@id": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
353+
"@type": "CreativeWork"
354+
},
355+
"name": "Dummy Material",
356+
"author": {
357+
"@type": "Person",
358+
"@id": "#aperson",
359+
"name": "Patricia Palagi",
360+
"identifier": "https://something.that.isnt.an.orcid"
361+
}
362+
}])
363+
authors = learning_resource_extractor(json).send(:extract_people, RDF::Vocab::SCHEMA.author)
364+
assert_equal 1, authors.length
365+
assert_nil authors.first[:orcid]
366+
assert_equal 'Patricia Palagi', authors.first[:name]
367+
end
368+
283369
private
284370

285371
def course_extractor(fixture, format: :jsonld, base_uri: 'https://example.com/my.json')

0 commit comments

Comments
 (0)