diff options
author | Andrea Guarracino | 2020-04-19 21:22:15 +0200 |
---|---|---|
committer | GitHub | 2020-04-19 21:22:15 +0200 |
commit | cfb0c23c5206ea2d81794a4212d824a32a0a406b (patch) | |
tree | bb6bbaf7ccd11a36e68eb5f2be854c44ea9a62ea /scripts | |
parent | 8f97a0e0d8d2d7e32c396d3b9881d770e58ab832 (diff) | |
download | bh20-seq-resource-cfb0c23c5206ea2d81794a4212d824a32a0a406b.tar.gz bh20-seq-resource-cfb0c23c5206ea2d81794a4212d824a32a0a406b.tar.lz bh20-seq-resource-cfb0c23c5206ea2d81794a4212d824a32a0a406b.zip |
fixed missing variable and managed comma in dicts
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/from_genbank_to_fasta_and_yaml.py | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index a7c9dc2..33615fa 100644 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -1,5 +1,5 @@ from Bio import Entrez -Entrez.email = 'andresguarahino@gmail.com' +Entrez.email = 'insert_your_email@gmail.com' import xml.etree.ElementTree as ET import yaml @@ -54,7 +54,7 @@ if not os.path.exists(dir_metadata_today): for i, id_x_list in enumerate(chunks(list(id_set), num_ids_for_request)): path_metadata_xxx_xml = os.path.join(dir_metadata_today, 'metadata_{}.xml'.format(i)) - print('Requesting {} ids --> {}'.format(len(id_x_list), path_metadata_xml)) + print('Requesting {} ids --> {}'.format(len(id_x_list), path_metadata_xxx_xml)) with open(path_metadata_xxx_xml, 'w') as fw: fw.write( @@ -69,7 +69,11 @@ for path_dict_xxx_csv in [os.path.join(dir_dict_ontology_standardization, name_x with open(path_dict_xxx_csv) as f: for line in f: - term, uri = line.strip('\n').split(',') + if len(line.split(',')) > 2: + term, uri = line.strip('\n').split('",') + term = term.strip('"') + else: + term, uri = line.strip('\n').split(',') term_to_uri_dict[term] = uri @@ -78,7 +82,7 @@ species_to_taxid_dict = { } -if os.path.exists(dir_fasta_and_yaml_today): +if not os.path.exists(dir_fasta_and_yaml_today): os.makedirs(dir_fasta_and_yaml_today) for path_metadata_xxx_xml in [os.path.join(dir_metadata_today, name_metadata_xxx_xml) for name_metadata_xxx_xml in os.listdir(dir_metadata_today) if name_metadata_xxx_xml.endswith('.xml')]: @@ -203,7 +207,7 @@ if os.path.exists(dir_fasta_and_yaml_today): info_for_yaml_dict['virus']['virus_strain'] = GBQualifier_value_text elif GBQualifier_name_text == 'db_xref': info_for_yaml_dict['virus']['virus_species'] = int(GBQualifier_value_text.split('taxon:')[1]) - + with open(os.path.join(dir_fasta_and_yaml_today, '{}.fasta'.format(accession_version)), 'w') as fw: fw.write('>{}\n{}'.format(accession_version, GBSeq_sequence.text.upper())) |