aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorAndreaGuarracino2020-08-28 11:16:24 +0200
committerAndreaGuarracino2020-08-28 11:16:24 +0200
commit3165a31e321cbf4641f9afdcbea511ee66f673bb (patch)
tree7140a760be5903c6f5e63a38da7e0f76493530a8 /scripts
parentcc8f99d50236b7d0c365990398785ecc319323ea (diff)
downloadbh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.tar.gz
bh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.tar.lz
bh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.zip
added control (locally and in the validation) that sample_id has to be the same in the metadata and in the FASTA header #103
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
index 8ef76e1..8f765d7 100755
--- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py
@@ -412,6 +412,8 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
not_created_accession_dict[accession_version].append('host_species not found')
if len(GBSeq_sequence.text) < min_len_to_count:
+ if accession_version not in not_created_accession_dict:
+ not_created_accession_dict[accession_version] = []
not_created_accession_dict[accession_version].append('sequence shorter than {} bp'.format(min_len_to_count))
if accession_version not in not_created_accession_dict: