diff options
author | AndreaGuarracino | 2020-11-13 13:39:23 +0100 |
---|---|---|
committer | AndreaGuarracino | 2020-11-13 13:39:23 +0100 |
commit | 764bd98fcfdde9657c520fb9bfcfb775ea9f05c9 (patch) | |
tree | 23d56ddd87687242b6ca5cb8897ab9deacd6de04 | |
parent | 9674e9582536d52fef8f6dcde6dade07d0c580e7 (diff) | |
download | bh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.tar.gz bh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.tar.lz bh20-seq-resource-764bd98fcfdde9657c520fb9bfcfb775ea9f05c9.zip |
fix in the ids to consider
-rwxr-xr-x | scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py index 083122f..5a8a336 100755 --- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py +++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py @@ -120,8 +120,12 @@ if not os.path.exists(dir_metadata): with open(path_ncbi_virus_accession) as f: tmp_list = [line.strip('\n') for line in f] - new_ids = len(set(tmp_list).difference(id_set)) - id_set.update(tmp_list) + new_ids_set = set(tmp_list) + if len(accession_to_consider_set) > 0: + new_ids_set = new_ids_set.intersection(accession_to_consider_set) + + new_ids = len(new_ids_set.difference(id_set)) + id_set.update(new_ids_set) print('DB: NCBI Virus', today_date, '-->', new_ids, 'new IDs from', len(tmp_list), '---> Total unique IDs:', len(id_set)) |