aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-21 13:22:53 -0400
committerPeter Amstutz2020-04-21 13:22:53 -0400
commit7e085b2958d9bd4f0a2b1912cf259a05b56366bc (patch)
tree29227cc5e55264945521ed27172db70c8de46d73 /scripts
parent88d81f853cf04b7f28681dd9cdee775b0422f252 (diff)
downloadbh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.tar.gz
bh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.tar.lz
bh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.zip
Tweak handling of "coverage" also fix typo
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/dict_ontology_standardization/ncbi_speciesman_source.csv2
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py9
2 files changed, 7 insertions, 4 deletions
diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
index 2905588..909cf37 100644
--- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
+++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
@@ -1,4 +1,4 @@
-nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831
naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 1a12513..00c0012 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -130,9 +130,12 @@ if not os.path.exists(dir_fasta_and_yaml_today):
if field_in_yaml == 'sequencing_coverage':
# A regular expression would be better!
- info_for_yaml_dict['technology'][field_in_yaml] = ';'.join(
- [x.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>') for x in tech_info_to_parse.split(';')]
- )
+ try:
+ info_for_yaml_dict['technology'][field_in_yaml] = float(
+ tech_info_to_parse.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>'))
+ except ValueError:
+ print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse)
+ pass
elif field_in_yaml == 'sample_sequencing_technology':
new_seq_tec_list = []
for seq_tec in tech_info_to_parse.split(';'):