From ba8c23625dea3d869eab821007e8f5db2eaf9dfe Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Sat, 14 Nov 2020 23:30:53 +0100 Subject: added a check on host_age --- scripts/create_sra_metadata/create_sra_metadata.py | 6 ++++-- scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py index 89624fd..df77daf 100644 --- a/scripts/create_sra_metadata/create_sra_metadata.py +++ b/scripts/create_sra_metadata/create_sra_metadata.py @@ -178,8 +178,10 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): missing_value_list.append('\t'.join([accession, 'host_sex', VALUE_text])) elif TAG_text in ['host_age', 'host age']: if is_integer(VALUE_text): - info_for_yaml_dict['host']['host_age'] = int(VALUE_text) - info_for_yaml_dict['host']['host_age_unit'] = 'http://purl.obolibrary.org/obo/UO_0000036' + host_age = is_integer(VALUE_text) + if host_age > 0 and host_age < 110: + info_for_yaml_dict['host']['host_age'] = host_age + info_for_yaml_dict['host']['host_age_unit'] = 'http://purl.obolibrary.org/obo/UO_0000036' elif TAG_text == 'collected_by': if VALUE_text.lower() not in ['not available', 'missing']: name = VALUE_text in ['Dr. Susie Bartlett', 'Ahmed Babiker', 'Aisi Fu', 'Brandi Williamson', 'George Taiaroa', 'Natacha Ogando', 'Tim Dalebout', 'ykut Ozdarendeli'] diff --git a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py index 442a9a5..f9cc3fa 100755 --- a/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py +++ b/scripts/download_genbank_data/from_genbank_to_fasta_and_yaml.py @@ -311,7 +311,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) elif len(GBQualifier_value_text_list) > 2 and is_integer(GBQualifier_value_text_list[2].split(' ')[-1]): host_age = int(GBQualifier_value_text_list[2].split(' ')[-1]) - if host_age > -1: + if host_age > 0 and host_age < 110: info_for_yaml_dict['host']['host_age'] = host_age info_for_yaml_dict['host']['host_age_unit'] = 'http://purl.obolibrary.org/obo/UO_0000036' elif len(GBQualifier_value_text_list) > 2: -- cgit v1.2.3