From 764bd98fcfdde9657c520fb9bfcfb775ea9f05c9 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 13 Nov 2020 13:39:23 +0100 Subject: fix in the ids to consider --- scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py index 083122f..5a8a336 100755 --- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py +++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py @@ -120,8 +120,12 @@ if not os.path.exists(dir_metadata): with open(path_ncbi_virus_accession) as f: tmp_list = [line.strip('\n') for line in f] - new_ids = len(set(tmp_list).difference(id_set)) - id_set.update(tmp_list) + new_ids_set = set(tmp_list) + if len(accession_to_consider_set) > 0: + new_ids_set = new_ids_set.intersection(accession_to_consider_set) + + new_ids = len(new_ids_set.difference(id_set)) + id_set.update(new_ids_set) print('DB: NCBI Virus', today_date, '-->', new_ids, 'new IDs from', len(tmp_list), '---> Total unique IDs:', len(id_set)) -- cgit v1.2.3