aboutsummaryrefslogtreecommitdiff
path: root/scripts/download_genbank_data
diff options
context:
space:
mode:
authorAndreaGuarracino2020-11-13 13:39:23 +0100
committerAndreaGuarracino2020-11-13 13:39:23 +0100
commit764bd98fcfdde9657c520fb9bfcfb775ea9f05c9 (patch)
tree23d56ddd87687242b6ca5cb8897ab9deacd6de04 /scripts/download_genbank_data
parent9674e9582536d52fef8f6dcde6dade07d0c580e7 (diff)
downloadbh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.tar.gz
bh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.tar.lz
bh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.zip
fix in the ids to consider
Diffstat (limited to 'scripts/download_genbank_data')
-rwxr-xr-xscripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
index 083122f..5a8a336 100755
--- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
@@ -120,8 +120,12 @@ if not os.path.exists(dir_metadata):
with open(path_ncbi_virus_accession) as f:
tmp_list = [line.strip('\n') for line in f]
- new_ids = len(set(tmp_list).difference(id_set))
- id_set.update(tmp_list)
+ new_ids_set = set(tmp_list)
+ if len(accession_to_consider_set) > 0:
+ new_ids_set = new_ids_set.intersection(accession_to_consider_set)
+
+ new_ids = len(new_ids_set.difference(id_set))
+ id_set.update(new_ids_set)
print('DB: NCBI Virus', today_date, '-->', new_ids, 'new IDs from', len(tmp_list), '---> Total unique IDs:', len(id_set))