aboutsummaryrefslogtreecommitdiff
path: root/workflows
diff options
context:
space:
mode:
Diffstat (limited to 'workflows')
-rw-r--r--workflows/pull-data/genbank/utils.py22
1 files changed, 15 insertions, 7 deletions
diff --git a/workflows/pull-data/genbank/utils.py b/workflows/pull-data/genbank/utils.py
index 3efc67a..96920a5 100644
--- a/workflows/pull-data/genbank/utils.py
+++ b/workflows/pull-data/genbank/utils.py
@@ -1,5 +1,6 @@
import os
+
def is_integer(string_to_check):
try:
int(string_to_check)
@@ -7,19 +8,26 @@ def is_integer(string_to_check):
except ValueError:
return False
+
def chunks(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
+
def check_and_get_ontology_dictionaries(dir_ontology_dictionaries):
- # Check duplicated entry looking at all dictionaries
+ """
+ Check duplicated entry by looking in all dictionaries
+ """
+
field_to_term_to_uri_dict = {}
- path_dict_xxx_csv_list = [os.path.join(dir_ontology_dictionaries, name_xxx_csv) for name_xxx_csv in
- os.listdir(dir_ontology_dictionaries) if name_xxx_csv.endswith('.csv')]
+ path_dict_xxx_csv_list = [
+ os.path.join(dir_ontology_dictionaries, name_xxx_csv) for name_xxx_csv in
+ os.listdir(dir_ontology_dictionaries) if name_xxx_csv.endswith('.csv')
+ ]
for path_dict_xxx_csv in path_dict_xxx_csv_list:
- print('Read {}'.format(path_dict_xxx_csv))
+ print(f'Read {path_dict_xxx_csv}')
with open(path_dict_xxx_csv) as f:
for line in f:
@@ -31,7 +39,7 @@ def check_and_get_ontology_dictionaries(dir_ontology_dictionaries):
term = term.strip('"')
if term in field_to_term_to_uri_dict:
- print('Warning: in the dictionaries there are more entries for the same term ({}).'.format(term))
+ print(f'Warning: in the dictionaries there are more entries for the same term ({term}).')
continue
field_to_term_to_uri_dict[term] = uri
@@ -54,9 +62,9 @@ def check_and_get_ontology_dictionaries(dir_ontology_dictionaries):
term = term.strip('"')
if term in field_to_term_to_uri_dict[field]:
- print('Warning: in the {} dictionary there are more entries for the same term ({}).'.format(field, term))
+ print(f'Warning: in the {field} dictionary there are more entries for the same term ({term}).')
continue
field_to_term_to_uri_dict[field][term] = uri
- return field_to_term_to_uri_dict \ No newline at end of file
+ return field_to_term_to_uri_dict