about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--bh20sequploader/bh20seq-options.yml30
-rw-r--r--bh20sequploader/bh20seq-schema.yml52
-rw-r--r--bh20sequploader/bh20seq-shex.rdf11
-rw-r--r--example/maximum_metadata_example.yaml44
-rw-r--r--example/metadata.yaml43
-rw-r--r--example/minimal_example.yaml18
-rw-r--r--example/minimal_metadata_example.yaml0
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py2
8 files changed, 65 insertions, 135 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index 104ed6c..c553f41 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -35,38 +35,8 @@ sample_sequencing_technology:
   Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
   Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
 
-sample_sequencing_technology2:
-  Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
-  Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
-  Illumina HiSeq X: http://www.ebi.ac.uk/efo/EFO_0008567
-  Illumina MiSeq: http://www.ebi.ac.uk/efo/EFO_0004205
-  Illumina: http://purl.obolibrary.org/obo/OBI_0000759
-  IonTorrent: http://purl.obolibrary.org/obo/NCIT_C125894
-  Ion Semiconductor Sequencing: http://purl.obolibraryorg/obo/NCIT_C125894
-  Oxford Nanopore MinION: http://www.ebi.ac.uk/efo/EFO_0008632
-  Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
-  Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
-
-sample_sequencing_technology3:
-  Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
-  Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
-  Illumina HiSeq X: http://www.ebi.ac.uk/efo/EFO_0008567
-  Illumina MiSeq: http://www.ebi.ac.uk/efo/EFO_0004205
-  Illumina: http://purl.obolibrary.org/obo/OBI_0000759
-  IonTorrent: http://purl.obolibrary.org/obo/NCIT_C125894
-  Ion Semiconductor Sequencing: http://purl.obolibraryorg/obo/NCIT_C125894
-  Oxford Nanopore MinION: http://www.ebi.ac.uk/efo/EFO_0008632
-  Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
-  Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
-
 specimen_source:
   nasopharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155831
   oropharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155835
   sputum: http://purl.obolibrary.org/obo/NCIT_C13278
   bronchoalveolar lavage fluid: http://purl.obolibrary.org/obo/NCIT_C13195
-
-specimen_source2:
-  nasopharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155831
-  oropharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155835
-  sputum: http://purl.obolibrary.org/obo/NCIT_C13278
-  bronchoalveolar lavage fluid: http://purl.obolibrary.org/obo/NCIT_C13195
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ea813a0..f36a6e6 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -48,6 +48,7 @@ $graph:
         type: string?
         jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C25688
+          _type: "@id"
     host_treatment:
       doc: Process in which the act is intended to modify or alter host status
       type: string?
@@ -55,7 +56,7 @@ $graph:
           _id: http://www.ebi.ac.uk/efo/EFO_0000727
     host_vaccination:
       doc: List of vaccines given to the host
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/VO_0000002
     additional_host_information:
@@ -96,14 +97,7 @@ $graph:
           _id: http://purl.obolibrary.org/obo/NCIT_C41206
     specimen_source:
       doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155831 (=nasopharyngeal swab)
-      type: string?
-      jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/OBI_0001479
-          _type: "@id"
-          noLinkCheck: true
-    specimen_source2:
-      doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155835 (=throat swabb)
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001479
           _type: "@id"
@@ -119,10 +113,11 @@ $graph:
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_001167
     source_database_accession:
-      doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here
-      type: string?
+      doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here. Please use a resolveable URL (e.g. http://identifiers.org/insdc/LC522350.1#sequence)
+      type: string[]?
       jsonldPredicate:
           _id: http://edamontology.org/data_2091
+          _type: "@id"
 
 - name: virusSchema
   type: record
@@ -145,21 +140,7 @@ $graph:
   fields:
     sample_sequencing_technology:
       doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/OBI_0600047
-        _type: "@id"
-        noLinkCheck: true
-    sample_sequencing_technology2:
-      doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/OBI_0600047
-        _type: "@id"
-        noLinkCheck: true
-    sample_sequencing_technology3:
-      doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
+      type: string[]?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/OBI_0600047
         _type: "@id"
@@ -170,13 +151,8 @@ $graph:
       jsonldPredicate:
         _id: http://www.ebi.ac.uk/efo/EFO_0002699
     sequencing_coverage:
-      doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
-      type: float?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/FLU_0000848
-    sequencing_coverage2:
-      doc: If a second sequence technology was used you can submit its coverage here
-      type: float?
+      doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. [100]) - if multiple technologies were used multiple float values can be submitted e.g. [100, 20]
+      type: int[]?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
     additional_technology_information:
@@ -189,13 +165,13 @@ $graph:
   type: record
   fields:
     authors:
-      doc: Name of the author(s)
-      type: string
+      doc: Name(s) of the author(s)
+      type: string[]
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C42781
     submitter_name:
-      doc: Name of the submitter
-      type: string?
+      doc: Name of the submitter(s)
+      type: string[]?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000116
     submitter_address:
@@ -228,7 +204,7 @@ $graph:
         _id: http://purl.obolibrary.org/obo/NCIT_C19026
     submitter_orcid:
       doc: ORCID of the submitter as a full URI, e.g. https://orcid.org/0000-0002-1825-0097
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000115
           _type: "@id"
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index c3b0ae1..4ec957d 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -25,7 +25,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
     obo:NCIT_C42574 [ obo:UO_~ ] ?;
 	obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ;
     efo:EFO_0000727 xsd:string ?;
-    obo:VO_0000002 xsd:string ?;
+    obo:VO_0000002 xsd:string {0,10};
     sio:SIO_001167 xsd:string ?;
 }
 
@@ -38,25 +38,26 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
     obo:OBI_0001479 IRI {0,2};
     obo:OBI_0001472 xsd:string ?;
     sio:SIO_001167 xsd:string ?;
+	edam:data_2091 IRI {0,3};
 }
 
 :submitterShape {
-    obo:NCIT_C42781 xsd:string ;
-    sio:SIO_000116 xsd:string ?;
+    obo:NCIT_C42781 xsd:string * ;
+    sio:SIO_000116 xsd:string *;
     sio:SIO_000172 xsd:string ?;
     obo:NCIT_C37984 xsd:string ?;
     obo:OBI_0600047 xsd:string ?;
   	obo:NCIT_C37900 xsd:string ?;
     efo:EFO_0001741 xsd:string ?;
     obo:NCIT_C19026 xsd:string ?;
-    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
+    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/ {0,10};
     sio:SIO_001167 xsd:string ?;
 }
 
 :technologyShape {
     obo:OBI_0600047 IRI {0,3} ;
     efo:EFO_0002699 xsd:string ?;
-    obo:FLU_0000848 xsd:double {0,2};
+    obo:FLU_0000848 xsd:integer {0,2};
     sio:SIO_001167 xsd:string ?;
 }
 
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
new file mode 100644
index 0000000..0a6d910
--- /dev/null
+++ b/example/maximum_metadata_example.yaml
@@ -0,0 +1,44 @@
+id: placeholder
+
+host:
+    host_id: XX1
+    host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
+    host_sex: http://purl.obolibrary.org/obo/PATO_0000384
+    host_age: 20
+    host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
+    host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
+    host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
+    host_vaccination: [vaccines1,vaccine2]
+    additional_host_information: Optional free text field for addtional information
+
+sample:
+    sample_id: Id of the sample as defined by the submitter 
+    collector_name: Name of the person that took the sample
+    collecting_institution: Institute that was responsible of sampling  
+    specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
+    collection_date: "2020-01-01"
+    collection_location: http://www.wikidata.org/entity/Q148
+    sample_storage_conditions: frozen specimen
+    source_database_accession: [http://identifiers.org/insdc/LC522350.1#sequence]
+    additional_collection_information: Optional free text field for addtional information
+
+virus:
+    virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+    virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
+
+technology:
+    sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173]
+    sequence_assembly_method: Protocol used for assembly
+    sequencing_coverage: [70, 100]
+    additional_technology_information: Optional free text field for addtional information
+
+submitter:
+    submitter_name: [John Doe]
+    submitter_address: John Doe's adress
+    originating_lab: John Doe kitchen
+    lab_address: John Doe's address
+    provider_sample_id: XXX1
+    submitter_sample_id: XXX2
+    authors: [John Doe, Joe Boe, Jonny Oe]
+    submitter_orcid: [https://orcid.org/0000-0000-0000-0000,https://orcid.org/0000-0000-0000-0001]
+    additional_submitter_information: Optional free text field for addtional information
\ No newline at end of file
diff --git a/example/metadata.yaml b/example/metadata.yaml
deleted file mode 100644
index a76616c..0000000
--- a/example/metadata.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-id: placeholder
-
-host:
-    host_id: XX1
-    host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
-    host_sex: http://purl.obolibrary.org/obo/NCIT_C27993
-    host_age: 20
-    host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
-    host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
-    host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
-    host_vaccination: List of vaccines given to the host (RRIDs?)
-    additional_host_information: Field for additional host information
-
-sample:
-    sample_id: Id of the sample as defined by the submitter 
-    collector_name: Name of the person that took the sample
-    collecting_institution: Institute that was responsible of sampling  
-    specimen_source: http://purl.obolibrary.org/obo/NCIT_C155831
-    specimen_source2: http://purl.obolibrary.org/obo/NCIT_C155835
-    collection_date: "2020-01-01"
-    collection_location: http://www.wikidata.org/entity/Q148
-    sample_storage_conditions: XXX
-    additional_collection_information: XXX
-
-virus:
-    virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
-    virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
-
-technology:
-    sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173
-    sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173
-    sequence_assembly_method: Protocol used for assembly
-    sequencing_coverage: 70
-
-submitter:
-    submitter_name: John Doe
-    submitter_address: John Doe's adress
-    originating_lab: John Doe kitchen
-    lab_address: John Doe's address
-    provider_sample_id: HmX
-    submitter_sample_id: xXx
-    authors: John Doe et all
-    submitter_orcid: https://orcid.org/0000-0000-0000-0000
\ No newline at end of file
diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml
deleted file mode 100644
index 0e36a25..0000000
--- a/example/minimal_example.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-id: placeholder
-
-host:
-    host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
-
-sample:
-    sample_id: XX
-    collection_date: 2020-01
-    collection_location: http://www.wikidata.org/entity/Q148
-
-virus:
-    virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
-
-technology:
-    sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0008632
-
-submitter:
-    authors: John Doe
\ No newline at end of file
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/example/minimal_metadata_example.yaml
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 5257bd1..148a7e1 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -112,7 +112,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
 
         
         info_for_yaml_dict['sample']['sample_id'] = accession_version
-        info_for_yaml_dict['sample']['source_database_accession'] = accession_version
+        info_for_yaml_dict['sample']['source_database_accession'] = "http://identifiers.org/insdc/"+accession_version+"#sequence" #accession is turned into resolvable URL/URI now
         
         
         # submitter info