diff options
| -rw-r--r-- | workflows/pull-data/genbank/utils.py | 22 | 
1 files changed, 15 insertions, 7 deletions
| diff --git a/workflows/pull-data/genbank/utils.py b/workflows/pull-data/genbank/utils.py index 3efc67a..96920a5 100644 --- a/workflows/pull-data/genbank/utils.py +++ b/workflows/pull-data/genbank/utils.py @@ -1,5 +1,6 @@ import os + def is_integer(string_to_check): try: int(string_to_check) @@ -7,19 +8,26 @@ def is_integer(string_to_check): except ValueError: return False + def chunks(lst, n): for i in range(0, len(lst), n): yield lst[i:i + n] + def check_and_get_ontology_dictionaries(dir_ontology_dictionaries): - # Check duplicated entry looking at all dictionaries + """ + Check duplicated entry by looking in all dictionaries + """ + field_to_term_to_uri_dict = {} - path_dict_xxx_csv_list = [os.path.join(dir_ontology_dictionaries, name_xxx_csv) for name_xxx_csv in - os.listdir(dir_ontology_dictionaries) if name_xxx_csv.endswith('.csv')] + path_dict_xxx_csv_list = [ + os.path.join(dir_ontology_dictionaries, name_xxx_csv) for name_xxx_csv in + os.listdir(dir_ontology_dictionaries) if name_xxx_csv.endswith('.csv') + ] for path_dict_xxx_csv in path_dict_xxx_csv_list: - print('Read {}'.format(path_dict_xxx_csv)) + print(f'Read {path_dict_xxx_csv}') with open(path_dict_xxx_csv) as f: for line in f: @@ -31,7 +39,7 @@ def check_and_get_ontology_dictionaries(dir_ontology_dictionaries): term = term.strip('"') if term in field_to_term_to_uri_dict: - print('Warning: in the dictionaries there are more entries for the same term ({}).'.format(term)) + print(f'Warning: in the dictionaries there are more entries for the same term ({term}).') continue field_to_term_to_uri_dict[term] = uri @@ -54,9 +62,9 @@ def check_and_get_ontology_dictionaries(dir_ontology_dictionaries): term = term.strip('"') if term in field_to_term_to_uri_dict[field]: - print('Warning: in the {} dictionary there are more entries for the same term ({}).'.format(field, term)) + print(f'Warning: in the {field} dictionary there are more entries for the same term ({term}).') continue field_to_term_to_uri_dict[field][term] = uri - return field_to_term_to_uri_dict \ No newline at end of file + return field_to_term_to_uri_dict | 
