about summary refs log tree commit diff
path: root/workflows/pull-data/genbank/ref.py
diff options
context:
space:
mode:
authorPjotr Prins2021-01-03 10:36:55 +0000
committerPjotr Prins2021-01-03 10:36:55 +0000
commit0de0ae0ac62e85f1ba6587a252b5a1164cbd5210 (patch)
tree79f9b1065ec8eb640eb50db9929a88aee2025daa /workflows/pull-data/genbank/ref.py
parenta10c61d1f5afca70f54b762beefb402372d7fd31 (diff)
downloadbh20-seq-resource-0de0ae0ac62e85f1ba6587a252b5a1164cbd5210.tar.gz
bh20-seq-resource-0de0ae0ac62e85f1ba6587a252b5a1164cbd5210.tar.lz
bh20-seq-resource-0de0ae0ac62e85f1ba6587a252b5a1164cbd5210.zip
genbank: deal with host, sex and age
Diffstat (limited to 'workflows/pull-data/genbank/ref.py')
-rw-r--r--workflows/pull-data/genbank/ref.py48
1 files changed, 2 insertions, 46 deletions
diff --git a/workflows/pull-data/genbank/ref.py b/workflows/pull-data/genbank/ref.py
index d809d7f..a9b4f06 100644
--- a/workflows/pull-data/genbank/ref.py
+++ b/workflows/pull-data/genbank/ref.py
@@ -26,53 +26,9 @@
                             technology[field_in_yaml] = tech_info_to_parse
 
 
-            for GBFeature in GBSeq.iter('GBFeature'):
-                if GBFeature.find('GBFeature_key').text != 'source':
-                    continue
-
-                for GBQualifier in GBFeature.iter('GBQualifier'):
-                    GBQualifier_value = GBQualifier.find('GBQualifier_value')
-                    if GBQualifier_value is None:
-                        continue
-                    GBQualifier_value_text = GBQualifier_value.text
-
-                    GBQualifier_name_text = GBQualifier.find('GBQualifier_name').text
-
-                    if GBQualifier_name_text == 'host':
-                        GBQualifier_value_text = GBQualifier_value_text.split(';')[0] # For case like Homo sapiens;sex:female
-                        if GBQualifier_value_text in field_to_term_to_uri_dict['ncbi_host_species']:
-                            # Cases like 'Felis catus; Domestic Shorthair'
-                            host['host_species'] = field_to_term_to_uri_dict['ncbi_host_species'][GBQualifier_value_text]
-                        else:
-                            GBQualifier_value_text_list = GBQualifier_value_text.split('; ')
 
-                            if GBQualifier_value_text_list[0] in field_to_term_to_uri_dict['ncbi_host_species']:
-                                host['host_species'] = field_to_term_to_uri_dict['ncbi_host_species'][GBQualifier_value_text_list[0]]
-                            elif GBQualifier_value_text_list[0] and ('MT215193' in accession_version or 'MT270814' in accession_version):
-                                # Information checked manually from NCBI Virus
-                                host['host_species'] = field_to_term_to_uri_dict['ncbi_host_species']['Canis lupus familiaris']
-                            else:
-                                missing_value_list.append('\t'.join([accession_version, 'host_species', GBQualifier_value_text_list[0]]))
-
-                            # Possible cases:
-                            # - Homo sapiens                        --> ['Homo sapiens']
-                            # - Homo sapiens; female                --> ['Homo sapiens', 'female']
-                            # - Homo sapiens; female 63             --> ['Homo sapiens', 'female 63']
-                            # - Homo sapiens; female; age 40        --> ['Homo sapiens', 'female', 'age 40']
-                            # - Homo sapiens; gender: F; age: 61    --> ['Homo sapiens', 'gender: F', 'age: 61']
-                            # - Homo sapiens; gender: M; age: 68    --> ['Homo sapiens', 'gender: M', 'age: 68']
-                            # - Homo sapiens; hospitalized patient  --> ['Homo sapiens', 'hospitalized patient']
-                            # - Homo sapiens; male                  --> ['Homo sapiens', 'male']
-                            # - Homo sapiens; male; 63              --> ['Homo sapiens', 'male', '63']
-                            # - Homo sapiens; male; age 29          --> ['Homo sapiens', 'male', 'age 29']
-                            # - Homo sapiens; symptomatic           --> ['Homo sapiens', 'symptomatic']
-                            if len(GBQualifier_value_text_list) > 1:
-                                host_sex = ''
-                                if 'female' in GBQualifier_value_text_list[1]:
-                                    host_sex = 'female'
-                                elif 'male' in GBQualifier_value_text_list[1]:
-                                    host_sex = 'male'
-                                elif 'gender' in GBQualifier_value_text_list[1]:
+
+       elif 'gender' in GBQualifier_value_text_list[1]:
                                     host_sex_one_lecter = GBQualifier_value_text_list[1].split(':')[-1].strip()
                                     if host_sex_one_lecter in ['F', 'M']:
                                         host_sex = 'female' if host_sex_one_lecter == 'F' else 'male'