From b106dc843cdc60902b0c6d423291584086fd2868 Mon Sep 17 00:00:00 2001 From: lltommy Date: Tue, 14 Apr 2020 22:56:33 +0200 Subject: Updated yaml schema and examples - plus ShEX shape to go along with it. Still work in progress but getting better and better I guess --- bh20sequploader/bh20seq-schema.yml | 15 ++++---- bh20sequploader/supporting_webuploader.yml | 20 ++++++++++ bh20sequploader/validation_shape.rdf | 59 ++++++++++++++++++++++++++++++ example/metadata.yaml | 9 ++--- example/minimal_example.yaml | 3 ++ 5 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 bh20sequploader/supporting_webuploader.yml create mode 100644 bh20sequploader/validation_shape.rdf diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index 2d2e4c9..a901be0 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -23,11 +23,12 @@ $graph: type: string jsonldPredicate: _id: http://semanticscience.org/resource/SIO_000115 - host_common_name: - doc: Text label for the host species (e.g. homo sapiens) - type: string? - jsonldPredicate: - _id: http://purl.obolibrary.org/obo/NOMEN_0000037 +# Removed_common_name. Not necessary since we turned species into IRI +# host_common_name: +# doc: Text label for the host species (e.g. homo sapiens) +# type: string? +# jsonldPredicate: +# _id: http://purl.obolibrary.org/obo/NOMEN_0000037 host_sex: doc: Sex of the host as define in NCIT, IRI expected (http://purl.obolibrary.org/obo/C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female) or unkown (http://purl.obolibrary.org/obo/NCIT_C17998)) type: string @@ -120,7 +121,7 @@ $graph: fields: virus_species: doc: The name of a taxon from the NCBI taxonomy database - type: string? + type: string jsonldPredicate: _id: http://edamontology.org/data_1875 _type: "@id" @@ -145,7 +146,7 @@ $graph: _id: http://www.ebi.ac.uk/efo/EFO_0002699 sequencing_coverage: doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x) - type: string? + type: int? jsonldPredicate: _id: http://purl.obolibrary.org/obo/FLU_0000848 diff --git a/bh20sequploader/supporting_webuploader.yml b/bh20sequploader/supporting_webuploader.yml new file mode 100644 index 0000000..5ad8f6c --- /dev/null +++ b/bh20sequploader/supporting_webuploader.yml @@ -0,0 +1,20 @@ +host_age_unit: + year: http://purl.obolibrary.org/obo/UO_0000036 + month: http://purl.obolibrary.org/obo/UO_0000035 + week: http://purl.obolibrary.org/obo/UO_0000035 + day: http://purl.obolibrary.org/obo/UO_0000034 + hour: http://purl.obolibrary.org/obo/UO_0000032 + +host_sex: + Male: http://purl.obolibrary.org/obo/C20197 + Female: http://purl.obolibrary.org/obo/NCIT_C27993 + unknown: http://purl.obolibrary.org/obo/NCIT_C17998 + +host_species: + OLS-ontology: ncbitaxon + +virus_species: + OLS-ontology: ncbitaxon + +collection_location: + OLS-ontology: gaz \ No newline at end of file diff --git a/bh20sequploader/validation_shape.rdf b/bh20sequploader/validation_shape.rdf new file mode 100644 index 0000000..1d1e7d2 --- /dev/null +++ b/bh20sequploader/validation_shape.rdf @@ -0,0 +1,59 @@ +PREFIX : +PREFIX MainSchema: +PREFIX hostSchema: +PREFIX xsd: +PREFIX obo: +PREFIX sio: +PREFIX efo: +PREFIX evs: +PREFIX edam: + +:submissionShape { + MainSchema:host @:hostShape ; + MainSchema:sample @:sampleShape ; + MainSchema:submitter @:submitterShape ; + MainSchema:technology @:technologyShape ; + MainSchema:virus @:virusShape; +} + +:hostShape { + efo:EFO_0000532 [ obo:NCBITaxon_~ ] ; + obo:PATO_0000047 [ obo:NCIT_C20197 obo:NCIT_C27993 obo:NCIT_C17998 ] ; + sio:SIO_000115 xsd:string ; + obo:PATO_0000011 xsd:integer ?; + obo:NCIT_C42574 [ obo:UO_~ ] ?; + sio:SIO_001167 xsd:string ?; + efo:EFO_0000727 xsd:string ?; +} + +:sampleShape { + obo:OBI_0001895 xsd:string ; + sio:SIO_000115 xsd:string ; + sio:SIO_001167 xsd:string ; + evs:C25164 xsd:string ?; + obo:GAZ_00000448 [obo:GAZ_~] ?; + obo:OBI_0001472 xsd:string ?; + obo:OBI_0001479 xsd:string ?; +} + +:submitterShape { + sio:SIO_000116 xsd:string ; + obo:NCIT_C37984 xsd:string ; + obo:NCIT_C37900 xsd:string ?; + obo:NCIT_C42781 xsd:string ?; + obo:OBI_0600047 xsd:string ?; + sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}/?; + sio:SIO_000172 xsd:string ?; + efo:EFO_0001741 xsd:string ?; +} + +:technologyShape { + obo:OBI_0600047 xsd:string ; + obo:FLU_0000848 xsd:integer ?; + efo:EFO_0002699 xsd:string ?; +} + +:virusShape{ + edam:data_1875 [ obo:NCBITaxon_~ ] ; + sio:SIO_010055 xsd:string ?; +} \ No newline at end of file diff --git a/example/metadata.yaml b/example/metadata.yaml index 15e4e44..5d9cffc 100644 --- a/example/metadata.yaml +++ b/example/metadata.yaml @@ -3,7 +3,6 @@ id: placeholder host: host_id: XX1 host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606 - host_common_name: string host_sex: http://purl.obolibrary.org/obo/NCIT_C27993 host_age: 20 host_age_unit: http://purl.obolibrary.org/obo/UO_0000036 @@ -16,8 +15,8 @@ sample: collector_name: XXX collecting_institution: XXX specimen_source: XXX - collection_date: XXX - collection_location: http://purl.obolibrary.org/obo/NCIT_C16428 + collection_date: 2020-01 + collection_location: http://purl.obolibrary.org/obo/GAZ_00002459 sample_storage_conditions: XXX additional_collection_information: XXX @@ -28,7 +27,7 @@ virus: technology: sample_sequencing_technology: XX sequence_assembly_method: XX - sequencing_coverage: 70x + sequencing_coverage: 70 submitter: submitter_name: tester @@ -38,4 +37,4 @@ submitter: provider_sample_id: string submitter_sample_id: string authors: testAuthor - submitter_orchid: X12 + submitter_orchid: https://orcid.org/0000-0002-1825-0097 diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml index 43adfa8..0080c6d 100644 --- a/example/minimal_example.yaml +++ b/example/minimal_example.yaml @@ -10,6 +10,9 @@ sample: collector_name: XXX collecting_institution: XXX +virus: + virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049 + technology: sample_sequencing_technology: XX -- cgit v1.2.3