From 6032a373003affa641ca1e70a44c29a232b5b3ed Mon Sep 17 00:00:00 2001
From: lltommy
Date: Tue, 28 Apr 2020 20:31:42 +0200
Subject: Changes to the structure - we use lists now instead of strings where
it makes sense. This allows us to have multiple values where in makes sense
---
example/maximum_metadata_example.yaml | 44 +++++++++++++++++++++++++++++++++++
example/metadata.yaml | 43 ----------------------------------
example/minimal_example.yaml | 18 --------------
example/minimal_metadata_example.yaml | 0
4 files changed, 44 insertions(+), 61 deletions(-)
create mode 100644 example/maximum_metadata_example.yaml
delete mode 100644 example/metadata.yaml
delete mode 100644 example/minimal_example.yaml
create mode 100644 example/minimal_metadata_example.yaml
(limited to 'example')
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
new file mode 100644
index 0000000..0a6d910
--- /dev/null
+++ b/example/maximum_metadata_example.yaml
@@ -0,0 +1,44 @@
+id: placeholder
+
+host:
+ host_id: XX1
+ host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
+ host_sex: http://purl.obolibrary.org/obo/PATO_0000384
+ host_age: 20
+ host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
+ host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
+ host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
+ host_vaccination: [vaccines1,vaccine2]
+ additional_host_information: Optional free text field for addtional information
+
+sample:
+ sample_id: Id of the sample as defined by the submitter
+ collector_name: Name of the person that took the sample
+ collecting_institution: Institute that was responsible of sampling
+ specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
+ collection_date: "2020-01-01"
+ collection_location: http://www.wikidata.org/entity/Q148
+ sample_storage_conditions: frozen specimen
+ source_database_accession: [http://identifiers.org/insdc/LC522350.1#sequence]
+ additional_collection_information: Optional free text field for addtional information
+
+virus:
+ virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+ virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
+
+technology:
+ sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173]
+ sequence_assembly_method: Protocol used for assembly
+ sequencing_coverage: [70, 100]
+ additional_technology_information: Optional free text field for addtional information
+
+submitter:
+ submitter_name: [John Doe]
+ submitter_address: John Doe's adress
+ originating_lab: John Doe kitchen
+ lab_address: John Doe's address
+ provider_sample_id: XXX1
+ submitter_sample_id: XXX2
+ authors: [John Doe, Joe Boe, Jonny Oe]
+ submitter_orcid: [https://orcid.org/0000-0000-0000-0000,https://orcid.org/0000-0000-0000-0001]
+ additional_submitter_information: Optional free text field for addtional information
\ No newline at end of file
diff --git a/example/metadata.yaml b/example/metadata.yaml
deleted file mode 100644
index a76616c..0000000
--- a/example/metadata.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-id: placeholder
-
-host:
- host_id: XX1
- host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
- host_sex: http://purl.obolibrary.org/obo/NCIT_C27993
- host_age: 20
- host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
- host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
- host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
- host_vaccination: List of vaccines given to the host (RRIDs?)
- additional_host_information: Field for additional host information
-
-sample:
- sample_id: Id of the sample as defined by the submitter
- collector_name: Name of the person that took the sample
- collecting_institution: Institute that was responsible of sampling
- specimen_source: http://purl.obolibrary.org/obo/NCIT_C155831
- specimen_source2: http://purl.obolibrary.org/obo/NCIT_C155835
- collection_date: "2020-01-01"
- collection_location: http://www.wikidata.org/entity/Q148
- sample_storage_conditions: XXX
- additional_collection_information: XXX
-
-virus:
- virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
- virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
-
-technology:
- sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173
- sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173
- sequence_assembly_method: Protocol used for assembly
- sequencing_coverage: 70
-
-submitter:
- submitter_name: John Doe
- submitter_address: John Doe's adress
- originating_lab: John Doe kitchen
- lab_address: John Doe's address
- provider_sample_id: HmX
- submitter_sample_id: xXx
- authors: John Doe et all
- submitter_orcid: https://orcid.org/0000-0000-0000-0000
\ No newline at end of file
diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml
deleted file mode 100644
index 0e36a25..0000000
--- a/example/minimal_example.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-id: placeholder
-
-host:
- host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
-
-sample:
- sample_id: XX
- collection_date: 2020-01
- collection_location: http://www.wikidata.org/entity/Q148
-
-virus:
- virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
-
-technology:
- sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0008632
-
-submitter:
- authors: John Doe
\ No newline at end of file
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
new file mode 100644
index 0000000..e69de29
--
cgit v1.2.3
From f795744116a9e86ab2a5c5e78761162e9d4ff7e0 Mon Sep 17 00:00:00 2001
From: lltommy
Date: Tue, 28 Apr 2020 20:43:03 +0200
Subject: Adding field 'publication' to the maximum example
---
example/maximum_metadata_example.yaml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
(limited to 'example')
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
index 0a6d910..260e4e4 100644
--- a/example/maximum_metadata_example.yaml
+++ b/example/maximum_metadata_example.yaml
@@ -33,12 +33,13 @@ technology:
additional_technology_information: Optional free text field for addtional information
submitter:
+ authors: [John Doe, Joe Boe, Jonny Oe]
submitter_name: [John Doe]
submitter_address: John Doe's adress
originating_lab: John Doe kitchen
lab_address: John Doe's address
provider_sample_id: XXX1
submitter_sample_id: XXX2
- authors: [John Doe, Joe Boe, Jonny Oe]
+ publication: PMID00001113
submitter_orcid: [https://orcid.org/0000-0000-0000-0000,https://orcid.org/0000-0000-0000-0001]
additional_submitter_information: Optional free text field for addtional information
\ No newline at end of file
--
cgit v1.2.3
From b4ec03398d5e74eeb33a4a2a396fe4518c0c5465 Mon Sep 17 00:00:00 2001
From: lltommy
Date: Tue, 28 Apr 2020 20:48:42 +0200
Subject: Whatever happend to the minimal metadata, I restored it
---
example/minimal_metadata_example.yaml | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
(limited to 'example')
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
index e69de29..1e8d3f5 100644
--- a/example/minimal_metadata_example.yaml
+++ b/example/minimal_metadata_example.yaml
@@ -0,0 +1,18 @@
+id: placeholder
+
+host:
+ host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
+
+sample:
+ sample_id: XX
+ collection_date: 2020-01
+ collection_location: http://www.wikidata.org/entity/Q148
+
+virus:
+ virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+
+technology:
+ sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632]
+
+submitter:
+ authors: [John Doe]
\ No newline at end of file
--
cgit v1.2.3
From ceb34edf0449cca328dfe8cf61277d7f05ea7cf9 Mon Sep 17 00:00:00 2001
From: lltommy
Date: Tue, 28 Apr 2020 22:09:37 +0200
Subject: Decision to settle for float instead of integers for the coverage
---
bh20sequploader/bh20seq-schema.yml | 2 +-
bh20sequploader/bh20seq-shex.rdf | 2 +-
example/maximum_metadata_example.yaml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
(limited to 'example')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index f36a6e6..d3e992a 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -152,7 +152,7 @@ $graph:
_id: http://www.ebi.ac.uk/efo/EFO_0002699
sequencing_coverage:
doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. [100]) - if multiple technologies were used multiple float values can be submitted e.g. [100, 20]
- type: int[]?
+ type: float[]?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
additional_technology_information:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 4ec957d..fe6deee 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -57,7 +57,7 @@ PREFIX wikidata:
:technologyShape {
obo:OBI_0600047 IRI {0,3} ;
efo:EFO_0002699 xsd:string ?;
- obo:FLU_0000848 xsd:integer {0,2};
+ obo:FLU_0000848 xsd:float {0,3};
sio:SIO_001167 xsd:string ?;
}
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
index 260e4e4..0ba9ada 100644
--- a/example/maximum_metadata_example.yaml
+++ b/example/maximum_metadata_example.yaml
@@ -29,7 +29,7 @@ virus:
technology:
sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173]
sequence_assembly_method: Protocol used for assembly
- sequencing_coverage: [70, 100]
+ sequencing_coverage: [70.0, 100.0]
additional_technology_information: Optional free text field for addtional information
submitter:
--
cgit v1.2.3
From 5c05facbdb1e35556256cd11d6a814822ebb6159 Mon Sep 17 00:00:00 2001
From: lltommy
Date: Thu, 30 Apr 2020 18:00:58 +0200
Subject: Last addition/edits to the meta data schema
---
bh20sequploader/bh20seq-schema.yml | 15 ++++++++++++---
bh20sequploader/bh20seq-shex.rdf | 5 +++--
example/maximum_metadata_example.yaml | 1 +
example/minimal_metadata_example.yaml | 2 +-
4 files changed, 17 insertions(+), 6 deletions(-)
(limited to 'example')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index d3e992a..99e1a11 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -49,6 +49,7 @@ $graph:
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/NCIT_C25688
_type: "@id"
+ noLinkCheck: true
host_treatment:
doc: Process in which the act is intended to modify or alter host status
type: string?
@@ -59,6 +60,13 @@ $graph:
type: string[]?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/VO_0000002
+ ethnicity:
+ doc: Ethinicity of the host e.g. http://purl.obolibrary.org/obo/HANCESTRO_0010
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001014
+ _type: "@id"
+ noLinkCheck: true
additional_host_information:
doc: Field for additional host information
type: string?
@@ -91,7 +99,7 @@ $graph:
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0001895
collecting_institution:
- doc: Institute that was responsible of sampeling
+ doc: Institute that was responsible for sampeling
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/NCIT_C41206
@@ -118,12 +126,13 @@ $graph:
jsonldPredicate:
_id: http://edamontology.org/data_2091
_type: "@id"
+ noLinkCheck: true
- name: virusSchema
type: record
fields:
virus_species:
- doc: The name of a taxon from the NCBI taxonomy database
+ doc: The name of virus species from the NCBI taxonomy database, e.g. http://purl.obolibrary.org/obo/NCBITaxon_2697049 for Severe acute respiratory syndrome coronavirus 2
type: string
jsonldPredicate:
_id: http://edamontology.org/data_1875
@@ -152,7 +161,7 @@ $graph:
_id: http://www.ebi.ac.uk/efo/EFO_0002699
sequencing_coverage:
doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. [100]) - if multiple technologies were used multiple float values can be submitted e.g. [100, 20]
- type: float[]?
+ type: double[]?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
additional_technology_information:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index fe6deee..cdf2296 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -27,6 +27,7 @@ PREFIX wikidata:
efo:EFO_0000727 xsd:string ?;
obo:VO_0000002 xsd:string {0,10};
sio:SIO_001167 xsd:string ?;
+ sio:SIO_001014 [ obo:HANCESTRO_~ ] ? ; #ethnicity
}
:sampleShape {
@@ -42,7 +43,7 @@ PREFIX wikidata:
}
:submitterShape {
- obo:NCIT_C42781 xsd:string * ;
+ obo:NCIT_C42781 xsd:string + ;
sio:SIO_000116 xsd:string *;
sio:SIO_000172 xsd:string ?;
obo:NCIT_C37984 xsd:string ?;
@@ -57,7 +58,7 @@ PREFIX wikidata:
:technologyShape {
obo:OBI_0600047 IRI {0,3} ;
efo:EFO_0002699 xsd:string ?;
- obo:FLU_0000848 xsd:float {0,3};
+ obo:FLU_0000848 xsd:double OR xsd:integer {0,3};
sio:SIO_001167 xsd:string ?;
}
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
index 0ba9ada..1bc70d7 100644
--- a/example/maximum_metadata_example.yaml
+++ b/example/maximum_metadata_example.yaml
@@ -9,6 +9,7 @@ host:
host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
host_vaccination: [vaccines1,vaccine2]
+ ethnicity: http://purl.obolibrary.org/obo/HANCESTRO_0010
additional_host_information: Optional free text field for addtional information
sample:
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
index 1e8d3f5..51f8a87 100644
--- a/example/minimal_metadata_example.yaml
+++ b/example/minimal_metadata_example.yaml
@@ -5,7 +5,7 @@ host:
sample:
sample_id: XX
- collection_date: 2020-01
+ collection_date: "2020-01-01"
collection_location: http://www.wikidata.org/entity/Q148
virus:
--
cgit v1.2.3