From b5e38b960c380f0f7868d8fc4038ea3c3a0c52ee Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 13 Jul 2020 08:50:25 +0100 Subject: Add ontology and schema for license information. For review. --- bh20sequploader/bh20seq-options.yml | 4 ++ bh20sequploader/bh20seq-schema.yml | 25 ++++++++++++ doc/blog/using-covid-19-pubseq-part5.org | 68 +++++++++++++++++++++++++++++++- example/maximum_metadata_example.yaml | 10 ++++- 4 files changed, 104 insertions(+), 3 deletions(-) diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml index c553f41..d1ea398 100644 --- a/bh20sequploader/bh20seq-options.yml +++ b/bh20sequploader/bh20seq-options.yml @@ -3,6 +3,10 @@ # being unique or at least using the same options in different containing # types. +license_type: + CC0 Public Domain Dedication: http://creativecommons.org/publicdomain/zero/1.0/ + CC-BY-4.0 Attribution 4.0 International: http://creativecommons.org/licenses/by/4.0/ + host_age_unit: Years: http://purl.obolibrary.org/obo/UO_0000036 Months: http://purl.obolibrary.org/obo/UO_0000035 diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index a8ab920..b3d4d12 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -1,5 +1,6 @@ $base: http://biohackathon.org/bh20-seq-schema $namespaces: + cc: http://creativecommons.org/ns# sch: https://schema.org/ efo: http://www.ebi.ac.uk/efo/ obo: http://purl.obolibrary.org/obo/ @@ -9,6 +10,30 @@ $namespaces: $graph: +- name: licenseSchema + type: record + fields: + license_type: + doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#License + title: + doc: Attribution title related to data license + type: string? + jsonldPredicate: + _id: http://semanticscience.org/resource/SIO_001167 + attribution_url: + doc: Attribution URL related to data license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + attribution_source: + doc: Attribution source URL related to data license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + - name: hostSchema type: record fields: diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org index fe1908a..4b0ea64 100644 --- a/doc/blog/using-covid-19-pubseq-part5.org +++ b/doc/blog/using-covid-19-pubseq-part5.org @@ -40,7 +40,7 @@ All from that one metadata schema. * Modifying the schema -One of the first things we wanted to do is to add a field for the data +One of the first things we want to do is to add a field for the data license. Initially we only support CC-4.0 as a license by default, but now we want to give uploaders the option to make it an even more liberal CC0 license. The first step is to find a good ontology term @@ -51,4 +51,70 @@ attribution license https://creativecommons.org/licenses/by/4.0/. According to this [[https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf][document]] we should really also add fields for attributionName and attributionURL. +A minimal triple should be + +: id xhtml:license . + +Other suggestions are + +: id dc:title "Description" . +: id cc:attributionName "Your Name" . +: id cc:attributionURL + +and 'dc:source' which indicates the original source of any modified +work, specified as a URI. +The prefix 'cc:' is an abbreviation for http://creativecommons.org/ns#. + +Going back to the schema, where does it fit? Under host, sample, +virus, technology or submitter block? It could fit under sample, but +actually the license concerns the whole metadata block and sequence, +so I think we can fit under its own license tag. For example + + +id: placeholder + +: license: +: license_type: http://creativecommons.org/licenses/by/4.0/ +: attribution_title: "Sample ID" +: attribution_name: "John doe, Joe Boe, Jonny Oe" +: attribution_url: http://covid19.genenetwork.org/id +: attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888 + +So, let's update the example. Notice the license info is optional - if it is missing +we just assume the default CC-4.0. + +One thing that is interesting is that in the name space https://creativecommons.org/ns there +is no mention of a title. I think it is useful, however, because we have no such field. +So, we'll add it simply as a title field. Now the draft schema is + +#+BEGIN_SRC js +- name: licenseSchema + type: record + fields: + license_type: + doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#License + title: + doc: Attribution title related to license + type: string? + jsonldPredicate: + _id: http://semanticscience.org/resource/SIO_001167 + attribution_url: + doc: Attribution URL related to license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + attribution_source: + doc: Attribution source URL + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work +#+END_SRC + +Now, we are no ontology experts, right? So, next we submit a patch to our source tree and +ask for feedback before wiring it up in the data entry form. The pull request was +submitted here FIXME. + /Note: work in progress/ diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml index 432877c..54736f8 100644 --- a/example/maximum_metadata_example.yaml +++ b/example/maximum_metadata_example.yaml @@ -1,5 +1,11 @@ id: placeholder +license: + license_type: http://creativecommons.org/licenses/by/4.0/ + title: "Sample" + attribution_name: "John doe, Joe Boe, Jonny Oe" + attribution_url: http://covid19.genenetwork.org/id + host: host_id: XX1 host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606 @@ -13,9 +19,9 @@ host: additional_host_information: Optional free text field for additional information sample: - sample_id: Id of the sample as defined by the submitter + sample_id: Id of the sample as defined by the submitter collector_name: Name of the person that took the sample - collecting_institution: Institute that was responsible of sampling + collecting_institution: Institute that was responsible of sampling specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835] collection_date: "2020-01-01" collection_location: http://www.wikidata.org/entity/Q148 -- cgit v1.2.3