Redesign the JSON-LD output for occurrence records
Closed this issue · 7 comments
The occurrence-level JSON-LD output is nor semantically-correct. Better would be to present as an annotation such that we can have metadata on the link between subject and object as desired.
@rdmpage Am toying with a more semantically appropriate way to represent JSON-LD from Bionomia w/o breaking downstream re-use. I saw your README at https://github.com/rdmpage/wild-json-ld.
Besides the nasty context for now, does this seem better? The doubly nested reverse is...ugh. But, have no idea how to better represent this.
{
"@context":{
"@vocab":"http://schema.org/",
"identified":"http://rs.tdwg.org/dwc/iri/identifiedBy",
"recorded":"http://rs.tdwg.org/dwc/iri/recordedBy",
"PreservedSpecimen":"http://rs.tdwg.org/dwc/terms/PreservedSpecimen",
"as":"https://www.w3.org/ns/activitystreams#",
"oa":"http://www.w3.org/ns/oa#",
"annotation":"http://www.w3.org/ns/oa#Annotation",
"datasetKey":"http://rs.gbif.org/terms/1.0/datasetKey",
"occurrenceID":"http://rs.tdwg.org/dwc/terms/occurrenceID",
"basisOfRecord":"http://rs.tdwg.org/dwc/terms/basisOfRecord",
"dateIdentified":"http://rs.tdwg.org/dwc/terms/dateIdentified",
"decimalLatitude":"http://rs.tdwg.org/dwc/terms/decimalLatitude",
"decimalLongitude":"http://rs.tdwg.org/dwc/terms/decimalLongitude",
"country":"http://rs.tdwg.org/dwc/terms/country",
"countryCode":"http://rs.tdwg.org/dwc/terms/countryCode",
"eventDate":"http://rs.tdwg.org/dwc/terms/eventDate",
"year":"http://rs.tdwg.org/dwc/terms/year",
"kingdom":"http://rs.tdwg.org/dwc/terms/kingdom",
"family":"http://rs.tdwg.org/dwc/terms/family",
"identifiedBy":"http://rs.tdwg.org/dwc/terms/identifiedBy",
"institutionCode":"http://rs.tdwg.org/dwc/terms/institutionCode",
"collectionCode":"http://rs.tdwg.org/dwc/terms/collectionCode",
"catalogNumber":"http://rs.tdwg.org/dwc/terms/catalogNumber",
"recordedBy":"http://rs.tdwg.org/dwc/terms/recordedBy",
"scientificName":"http://rs.tdwg.org/dwc/terms/scientificName",
"typeStatus":"http://rs.tdwg.org/dwc/terms/typeStatus",
"recordedByID":"http://rs.tdwg.org/dwc/terms/recordedByID",
"identifiedByID":"http://rs.tdwg.org/dwc/terms/identifiedByID"
},
"@type":"Person",
"@id":"http://localhost:4567/0000-0001-7618-5230",
"givenName":"David Peter",
"familyName":"Shorthouse",
"name":"David Peter Shorthouse",
"alternateName":[
"David P. Shorthouse",
"David Shorthouse",
"David Peter Shorthouse"
],
"sameAs":"https://orcid.org/0000-0001-7618-5230",
"as:prev":null,
"as:current":"http://localhost:4567/0000-0001-7618-5230/specimens.jsonld?page=1",
"as:next":null,
"@reverse":{
"identified":[
{
"@type":"PreservedSpecimen",
"@id":"http://localhost:4567/occurrence/769279710",
"sameAs":"https://gbif.org/occurrence/769279710",
"datasetKey":"8971dfba-f762-11e1-a439-00145eb45e9a",
"occurrenceID":"urn:catalog:UASM:UASM329573",
"basisOfRecord":"PRESERVED_SPECIMEN",
"dateIdentified":"2010",
"decimalLatitude":"56.839",
"decimalLongitude":"-118.340",
"country":"Canada",
"countryCode":"CA",
"eventDate":"2004-07",
"year":null,
"kingdom":"Animalia",
"family":"Linyphiidae",
"identifiedBy":"Shorthouse, D.",
"institutionCode":"University of Alberta Museums (UAM)",
"collectionCode":"UASM",
"catalogNumber":"UASM329573",
"recordedBy":"Pinzon, J.",
"scientificName":"Oreonetides vaginatus",
"typeStatus":null,
"recordedByID":null,
"identifiedByID":null,
"@reverse":{
"annotation":[
{
"@type":"oa:Annotation",
"@id":"BionomiaLink26380456",
"oa:motivation":"identifying",
"oa:target":{
"oa:source":"https://gbif.org/occurrence/769279710",
"oa:selector":{
"oa:type":"TextQuoteSelector",
"oa:exact":"Identified by"
}
},
"oa:creator":{
"@type":"Person",
"@id":"http://localhost:4567/0000-0001-7618-5230",
"sameAs":"https://orcid.org/0000-0001-7618-5230",
"name":"David Peter Shorthouse"
},
"oa:created":"2021-01-18T12:37:30-05:00",
"oa:modified":null
}
]
}
},
{
"@type":"PreservedSpecimen",
"@id":"http://localhost:4567/occurrence/769281222",
"sameAs":"https://gbif.org/occurrence/769281222",
"datasetKey":"8971dfba-f762-11e1-a439-00145eb45e9a",
"occurrenceID":"urn:catalog:UASM:UASM329574",
"basisOfRecord":"PRESERVED_SPECIMEN",
"dateIdentified":"2010",
"decimalLatitude":"56.786",
"decimalLongitude":"-118.349",
"country":"Canada",
"countryCode":"CA",
"eventDate":"2004-07",
"year":null,
"kingdom":"Animalia",
"family":"Linyphiidae",
"identifiedBy":"Shorthouse, D.",
"institutionCode":"University of Alberta Museums (UAM)",
"collectionCode":"UASM",
"catalogNumber":"UASM329574",
"recordedBy":"Pinzon, J.",
"scientificName":"Oreonetides vaginatus",
"typeStatus":null,
"recordedByID":null,
"identifiedByID":null,
"@reverse":{
"annotation":[
{
"@type":"oa:Annotation",
"@id":"BionomiaLink42153317",
"oa:motivation":"identifying",
"oa:target":{
"oa:source":"https://gbif.org/occurrence/769281222",
"oa:selector":{
"oa:type":"TextQuoteSelector",
"oa:exact":"Identified by"
}
},
"oa:creator":{
"@type":"Person",
"@id":"http://localhost:4567/0000-0001-7618-5230",
"sameAs":"https://orcid.org/0000-0001-7618-5230",
"name":"David Peter Shorthouse"
},
"oa:created":"2021-03-12T10:11:13-05:00",
"oa:modified":null
}
]
}
},
{
"@type":"PreservedSpecimen",
"@id":"http://localhost:4567/occurrence/769279986",
"sameAs":"https://gbif.org/occurrence/769279986",
"datasetKey":"8971dfba-f762-11e1-a439-00145eb45e9a",
"occurrenceID":"urn:catalog:UASM:UASM329612",
"basisOfRecord":"PRESERVED_SPECIMEN",
"dateIdentified":"2010",
"decimalLatitude":"56.785",
"decimalLongitude":"-118.355",
"country":"Canada",
"countryCode":"CA",
"eventDate":"2004-07",
"year":null,
"kingdom":"Animalia",
"family":"Linyphiidae",
"identifiedBy":"Shorthouse, D.",
"institutionCode":"University of Alberta Museums (UAM)",
"collectionCode":"UASM",
"catalogNumber":"UASM329612",
"recordedBy":"Pinzon, J.",
"scientificName":"Tunagyna debilis",
"typeStatus":null,
"recordedByID":null,
"identifiedByID":null,
"@reverse":{
"annotation":[
{
"@type":"oa:Annotation",
"@id":"BionomiaLink112282193",
"oa:motivation":"identifying",
"oa:target":{
"oa:source":"https://gbif.org/occurrence/769279986",
"oa:selector":{
"oa:type":"TextQuoteSelector",
"oa:exact":"Identified by"
}
},
"oa:creator":{
"@type":"Person",
"@id":"http://localhost:4567/0000-0001-7618-5230",
"sameAs":"https://orcid.org/0000-0001-7618-5230",
"name":"David Peter Shorthouse"
},
"oa:created":"2021-09-22T08:15:12-04:00",
"oa:modified":null
}
]
}
}
],
"recorded":[
]
}
}
@dshorthouse This sort of thing does my head in :( I think the first @reverse
works fine, it generates the statements you want (e.g., some occurrence
dwciri:identifiedBy
some person
).
I'm not sure I buy the annotation, either the way it's shown here, or whether annotation is actually what you want. When I translate this into triples I get things like:
<https://json-ld.org/playground/BionomiaLink42153317> <http://www.w3.org/ns/oa#Annotation> <http://localhost:4567/occurrence/769281222> .
which misuses <http://www.w3.org/ns/oa#Annotation>
, it's not a predicate, it's the class of annotations. The connect between an occurrence and an annotation as you've modelled it is oa:source
, which could be used instead of annotation
in the @reverse
statement, but then you're in trouble because the tree-structure would require yet a further @reverse
for @target
before you get to the annotation! So I think this way lies madness, and the tree structure starts to get unmanageable.
I'm wondering why use annotations to model this? I don't think these are annotations, what you're really trying to do is give the provenance for the assertion some occurrence
dwciri:identifiedBy
some person
. Annotations don't really express that. It feels a bit of a hack to assert dwciri:identifiedBy
by annotating some text label on the GBIF page .
I think you might want to think about either (a) using nanopubs to express the provenance of the statement that someone identified something, or (b) maybe use something like schema:Role
to handle provenance. Nanopubs are cool but come with some overhead. But they capture what you want. I'm not sure it would be useful trying to embed them into the JSON-LD tree above though.
The other approach is to embed provenance directly in the graph using schema:Role
, see "A Scalable Approach to Incrementally Building Knowledge Graphs" 10.1007/978-3-319-43997-6_15 PDF here It's a cool idea, but it breaks everyone's expectation of just getting simple triples. I think it's probably a reasonable model for when we aggregate data across multiple sources and want to track potential conflicts.
Hope some of this makes sense.
Thanks for having a look. Indeed, it's spectacular mess. What I'm really trying to do is say:
some person
stated at timestamp
that some occurrence
dwciri:identifiedBy
some person
Give the naked assertion some qualifier as it were. So yes, it is provenance but I do not see how/where I can express the first bit as qualifier unless I consider this some sort of annotation. This would work just fine if I stripped out the some person
stated at timestamp
qualifier entirely (poorly modelled as an annotation above) but then do I sacrifice trust?
I suppose another way of looking at this is:
What is the source of the assertion that some occurrence
dwciri:identifiedBy
some person
?
OK this is how I think we'd represent this:
{
"@context": {
"@vocab": "http://schema.org/",
"prov": "http://www.w3.org/TR/prov-o/"
},
"@id": "http://thing.org/royLichtenstein",
"@type": "Person",
"name": [{
"@type": "Role",
"name": "Roy L.",
"prov:wasGeneratedBy": [
"http://a.source.org/",
"http://another.source.org/"
]
},
{
"@type": "Role",
"name": "Lichtenstein",
"prov:wasGeneratedBy": "http://someother.source.org/"
}
],
"birthDate": {
"@type": "Role",
"birthDate": "1923-10-27",
"prov:wasGeneratedBy": "http://a.source.org/"
}
}
And here it is in triples:
<http://thing.org/royLichtenstein> <http://schema.org/birthDate> _:b0 .
<http://thing.org/royLichtenstein> <http://schema.org/name> _:b1 .
<http://thing.org/royLichtenstein> <http://schema.org/name> _:b2 .
<http://thing.org/royLichtenstein> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
_:b0 <http://schema.org/birthDate> "1923-10-27" .
_:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Role> .
_:b0 <http://www.w3.org/TR/prov-o/wasGeneratedBy> "http://a.source.org/" .
_:b1 <http://schema.org/name> "Roy L." .
_:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Role> .
_:b1 <http://www.w3.org/TR/prov-o/wasGeneratedBy> "http://a.source.org/" .
_:b1 <http://www.w3.org/TR/prov-o/wasGeneratedBy> "http://another.source.org/" .
_:b2 <http://schema.org/name> "Lichtenstein" .
_:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Role> .
_:b2 <http://www.w3.org/TR/prov-o/wasGeneratedBy> "http://someother.source.org/" .
And as a graph:
In a sense this is a classic case of indirection, schema:name
doesn't point to the value for the name, it points to a pointer to the value for the name. AFAIK the choice of schema:Role
as the type of the intermediate node is arbitrary, but it has been used in various ways to do this sort of thing, e.g. http://blog.schema.org/2014/06/introducing-role.html I also used this in Ozymandias to model order of authorship, but now I've switched to using RDF lists instead.
OK, I think am getting closer. A big part of the messiness here is that the root entity is of type PreservedSpecimen
that has a DwC-based property recordedBy
, which may be interpreted lists of other entities of type Person
whose association with that property recordedBy
was created by yet other Person
via schema:Role
.
Reverted back to original JSON-LD output without the arguably messy, annotation component(s).