diff options
-rw-r--r-- | bh20sequploader/bh20seq-schema.yml | 4 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-shex.rdf | 2 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_speciesman_source.csv | 2 | ||||
-rwxr-xr-x | scripts/from_genbank_to_fasta_and_yaml.py | 9 |
4 files changed, 10 insertions, 7 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index ebca35b..75308ab 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -162,12 +162,12 @@ $graph: _id: http://www.ebi.ac.uk/efo/EFO_0002699 sequencing_coverage: doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x) - type: ["null", float, int] + type: float? jsonldPredicate: _id: http://purl.obolibrary.org/obo/FLU_0000848 sequencing_coverage2: doc: If a second sequence technology was used you can submit its coverage here - type: ["null", float, int] + type: float? jsonldPredicate: _id: http://purl.obolibrary.org/obo/FLU_0000848 additional_technology_information: diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf index 59ee71b..31e714f 100644 --- a/bh20sequploader/bh20seq-shex.rdf +++ b/bh20sequploader/bh20seq-shex.rdf @@ -50,7 +50,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/> :technologyShape { obo:OBI_0600047 IRI {0,2} ; - obo:FLU_0000848 xsd:integer ?; + obo:FLU_0000848 xsd:double ?; efo:EFO_0002699 xsd:string ?; } diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv index 2905588..909cf37 100644 --- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv +++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv @@ -1,4 +1,4 @@ -nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index 1a12513..00c0012 100755 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -130,9 +130,12 @@ if not os.path.exists(dir_fasta_and_yaml_today): if field_in_yaml == 'sequencing_coverage': # A regular expression would be better! - info_for_yaml_dict['technology'][field_in_yaml] = ';'.join( - [x.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>') for x in tech_info_to_parse.split(';')] - ) + try: + info_for_yaml_dict['technology'][field_in_yaml] = float( + tech_info_to_parse.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>')) + except ValueError: + print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse) + pass elif field_in_yaml == 'sample_sequencing_technology': new_seq_tec_list = [] for seq_tec in tech_info_to_parse.split(';'): |