import os
def is_integer(string_to_check):
try:
int(string_to_check)
return True
except ValueError:
return False
def chunks(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
def check_and_get_ontology_dictionaries(dir_ontology_dictionaries):
"""
Check duplicated entry by looking in all dictionaries
"""
field_to_term_to_uri_dict = {}
path_dict_xxx_csv_list = [
os.path.join(dir_ontology_dictionaries, name_xxx_csv) for name_xxx_csv in
os.listdir(dir_ontology_dictionaries) if name_xxx_csv.endswith('.csv')
]
for path_dict_xxx_csv in path_dict_xxx_csv_list:
print(f'Read {path_dict_xxx_csv}')
with open(path_dict_xxx_csv) as f:
for line in f:
if len(line.split(',')) > 2:
term, uri = line.strip('\n').split('",')
else:
term, uri = line.strip('\n').split(',')
term = term.strip('"')
if term in field_to_term_to_uri_dict:
print(f'Warning: in the dictionaries there are more entries for the same term ({term}).')
continue
field_to_term_to_uri_dict[term] = uri
# Prepare separated dictionaries (to avoid, for example, that a valid IRI for species is accepted as specimen)
field_to_term_to_uri_dict = {}
for path_dict_xxx_csv in path_dict_xxx_csv_list:
field = os.path.basename(path_dict_xxx_csv).split('.')[0]
field_to_term_to_uri_dict[field] = {}
with open(path_dict_xxx_csv) as f:
for line in f:
if len(line.split(',')) > 2:
term, uri = line.strip('\n').split('",')
else:
term, uri = line.strip('\n').split(',')
term = term.strip('"')
if term in field_to_term_to_uri_dict[field]:
print(f'Warning: in the {field} dictionary there are more entries for the same term ({term}).')
continue
field_to_term_to_uri_dict[field][term] = uri
return field_to_term_to_uri_dict