about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--bh20sequploader/bh20seq-schema.yml4
-rw-r--r--bh20sequploader/bh20seq-shex.rdf2
-rw-r--r--scripts/dict_ontology_standardization/ncbi_speciesman_source.csv2
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py9
4 files changed, 10 insertions, 7 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ebca35b..75308ab 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -162,12 +162,12 @@ $graph:
         _id: http://www.ebi.ac.uk/efo/EFO_0002699
     sequencing_coverage:
       doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
-      type: ["null", float, int]
+      type: float?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
     sequencing_coverage2:
       doc: If a second sequence technology was used you can submit its coverage here
-      type: ["null", float, int]
+      type: float?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
     additional_technology_information:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 59ee71b..31e714f 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -50,7 +50,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 
 :technologyShape {
     obo:OBI_0600047 IRI {0,2} ;
-    obo:FLU_0000848 xsd:integer ?;
+    obo:FLU_0000848 xsd:double ?;
     efo:EFO_0002699 xsd:string ?;
 }
 
diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
index 2905588..909cf37 100644
--- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
+++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
@@ -1,4 +1,4 @@
-nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
 nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
 respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831
 naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 1a12513..00c0012 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -130,9 +130,12 @@ if not os.path.exists(dir_fasta_and_yaml_today):
 
                         if field_in_yaml == 'sequencing_coverage':
                             # A regular expression would be better!
-                            info_for_yaml_dict['technology'][field_in_yaml] = ';'.join(
-                                [x.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>') for x in tech_info_to_parse.split(';')]
-                            )
+                            try:
+                                info_for_yaml_dict['technology'][field_in_yaml] = float(
+                                    tech_info_to_parse.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>'))
+                            except ValueError:
+                                print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse)
+                                pass
                         elif field_in_yaml == 'sample_sequencing_technology':
                             new_seq_tec_list = []
                             for seq_tec in tech_info_to_parse.split(';'):