From b106dc843cdc60902b0c6d423291584086fd2868 Mon Sep 17 00:00:00 2001
From: lltommy
Date: Tue, 14 Apr 2020 22:56:33 +0200
Subject: Updated yaml schema and examples - plus ShEX shape to go along with
it. Still work in progress but getting better and better I guess
---
bh20sequploader/bh20seq-schema.yml | 15 ++++----
bh20sequploader/supporting_webuploader.yml | 20 ++++++++++
bh20sequploader/validation_shape.rdf | 59 ++++++++++++++++++++++++++++++
example/metadata.yaml | 9 ++---
example/minimal_example.yaml | 3 ++
5 files changed, 94 insertions(+), 12 deletions(-)
create mode 100644 bh20sequploader/supporting_webuploader.yml
create mode 100644 bh20sequploader/validation_shape.rdf
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 2d2e4c9..a901be0 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -23,11 +23,12 @@ $graph:
type: string
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_000115
- host_common_name:
- doc: Text label for the host species (e.g. homo sapiens)
- type: string?
- jsonldPredicate:
- _id: http://purl.obolibrary.org/obo/NOMEN_0000037
+# Removed_common_name. Not necessary since we turned species into IRI
+# host_common_name:
+# doc: Text label for the host species (e.g. homo sapiens)
+# type: string?
+# jsonldPredicate:
+# _id: http://purl.obolibrary.org/obo/NOMEN_0000037
host_sex:
doc: Sex of the host as define in NCIT, IRI expected (http://purl.obolibrary.org/obo/C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female) or unkown (http://purl.obolibrary.org/obo/NCIT_C17998))
type: string
@@ -120,7 +121,7 @@ $graph:
fields:
virus_species:
doc: The name of a taxon from the NCBI taxonomy database
- type: string?
+ type: string
jsonldPredicate:
_id: http://edamontology.org/data_1875
_type: "@id"
@@ -145,7 +146,7 @@ $graph:
_id: http://www.ebi.ac.uk/efo/EFO_0002699
sequencing_coverage:
doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
- type: string?
+ type: int?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
diff --git a/bh20sequploader/supporting_webuploader.yml b/bh20sequploader/supporting_webuploader.yml
new file mode 100644
index 0000000..5ad8f6c
--- /dev/null
+++ b/bh20sequploader/supporting_webuploader.yml
@@ -0,0 +1,20 @@
+host_age_unit:
+ year: http://purl.obolibrary.org/obo/UO_0000036
+ month: http://purl.obolibrary.org/obo/UO_0000035
+ week: http://purl.obolibrary.org/obo/UO_0000035
+ day: http://purl.obolibrary.org/obo/UO_0000034
+ hour: http://purl.obolibrary.org/obo/UO_0000032
+
+host_sex:
+ Male: http://purl.obolibrary.org/obo/C20197
+ Female: http://purl.obolibrary.org/obo/NCIT_C27993
+ unknown: http://purl.obolibrary.org/obo/NCIT_C17998
+
+host_species:
+ OLS-ontology: ncbitaxon
+
+virus_species:
+ OLS-ontology: ncbitaxon
+
+collection_location:
+ OLS-ontology: gaz
\ No newline at end of file
diff --git a/bh20sequploader/validation_shape.rdf b/bh20sequploader/validation_shape.rdf
new file mode 100644
index 0000000..1d1e7d2
--- /dev/null
+++ b/bh20sequploader/validation_shape.rdf
@@ -0,0 +1,59 @@
+PREFIX :
+PREFIX MainSchema:
+PREFIX hostSchema:
+PREFIX xsd:
+PREFIX obo:
+PREFIX sio:
+PREFIX efo:
+PREFIX evs:
+PREFIX edam:
+
+:submissionShape {
+ MainSchema:host @:hostShape ;
+ MainSchema:sample @:sampleShape ;
+ MainSchema:submitter @:submitterShape ;
+ MainSchema:technology @:technologyShape ;
+ MainSchema:virus @:virusShape;
+}
+
+:hostShape {
+ efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
+ obo:PATO_0000047 [ obo:NCIT_C20197 obo:NCIT_C27993 obo:NCIT_C17998 ] ;
+ sio:SIO_000115 xsd:string ;
+ obo:PATO_0000011 xsd:integer ?;
+ obo:NCIT_C42574 [ obo:UO_~ ] ?;
+ sio:SIO_001167 xsd:string ?;
+ efo:EFO_0000727 xsd:string ?;
+}
+
+:sampleShape {
+ obo:OBI_0001895 xsd:string ;
+ sio:SIO_000115 xsd:string ;
+ sio:SIO_001167 xsd:string ;
+ evs:C25164 xsd:string ?;
+ obo:GAZ_00000448 [obo:GAZ_~] ?;
+ obo:OBI_0001472 xsd:string ?;
+ obo:OBI_0001479 xsd:string ?;
+}
+
+:submitterShape {
+ sio:SIO_000116 xsd:string ;
+ obo:NCIT_C37984 xsd:string ;
+ obo:NCIT_C37900 xsd:string ?;
+ obo:NCIT_C42781 xsd:string ?;
+ obo:OBI_0600047 xsd:string ?;
+ sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}/?;
+ sio:SIO_000172 xsd:string ?;
+ efo:EFO_0001741 xsd:string ?;
+}
+
+:technologyShape {
+ obo:OBI_0600047 xsd:string ;
+ obo:FLU_0000848 xsd:integer ?;
+ efo:EFO_0002699 xsd:string ?;
+}
+
+:virusShape{
+ edam:data_1875 [ obo:NCBITaxon_~ ] ;
+ sio:SIO_010055 xsd:string ?;
+}
\ No newline at end of file
diff --git a/example/metadata.yaml b/example/metadata.yaml
index 15e4e44..5d9cffc 100644
--- a/example/metadata.yaml
+++ b/example/metadata.yaml
@@ -3,7 +3,6 @@ id: placeholder
host:
host_id: XX1
host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
- host_common_name: string
host_sex: http://purl.obolibrary.org/obo/NCIT_C27993
host_age: 20
host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
@@ -16,8 +15,8 @@ sample:
collector_name: XXX
collecting_institution: XXX
specimen_source: XXX
- collection_date: XXX
- collection_location: http://purl.obolibrary.org/obo/NCIT_C16428
+ collection_date: 2020-01
+ collection_location: http://purl.obolibrary.org/obo/GAZ_00002459
sample_storage_conditions: XXX
additional_collection_information: XXX
@@ -28,7 +27,7 @@ virus:
technology:
sample_sequencing_technology: XX
sequence_assembly_method: XX
- sequencing_coverage: 70x
+ sequencing_coverage: 70
submitter:
submitter_name: tester
@@ -38,4 +37,4 @@ submitter:
provider_sample_id: string
submitter_sample_id: string
authors: testAuthor
- submitter_orchid: X12
+ submitter_orchid: https://orcid.org/0000-0002-1825-0097
diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml
index 43adfa8..0080c6d 100644
--- a/example/minimal_example.yaml
+++ b/example/minimal_example.yaml
@@ -10,6 +10,9 @@ sample:
collector_name: XXX
collecting_institution: XXX
+virus:
+ virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+
technology:
sample_sequencing_technology: XX
--
cgit v1.2.3