aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreaGuarracino2020-09-04 10:37:35 +0200
committerAndreaGuarracino2020-09-04 10:37:35 +0200
commit1430c62ff9245bfecb1d41cc87bbafafcfc81ca3 (patch)
treeacb706ad5809093ba1686a5ff3d30bd4c87e17b8
parent842f1a25cc2632771e8a816dbb4898db81d7a6c2 (diff)
downloadbh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.tar.gz
bh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.tar.lz
bh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.zip
sra script updated for managing more locations
-rw-r--r--scripts/create_sra_metadata/create_sra_metadata.py8
-rw-r--r--scripts/dict_ontology_standardization/ncbi_countries.csv1
2 files changed, 7 insertions, 2 deletions
diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py
index 2a05d26..09cc51b 100644
--- a/scripts/create_sra_metadata/create_sra_metadata.py
+++ b/scripts/create_sra_metadata/create_sra_metadata.py
@@ -85,6 +85,8 @@ not_created_accession_dict = {}
run_accession_set = set()
run_accession_to_downloadble_file_url_dict = {}
+num_yaml_created = 0
+
for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
#print(i, EXPERIMENT_PACKAGE)
@@ -209,7 +211,7 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(VALUE_text)
else:
info_for_yaml_dict['sample']['additional_collection_information'] = "The 'collection_date' is estimated (the original date was: {})".format(VALUE_text)
- elif TAG_text == 'geo_loc_name':
+ elif TAG_text in ['geo_loc_name', 'geographic location (country and/or sea)', 'geographic location (region and locality)']:
if ': ' in VALUE_text:
VALUE_text = VALUE_text.replace(': ', ':')
@@ -301,6 +303,8 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
not_created_accession_dict[accession].append('host_species not found')
if accession not in not_created_accession_dict:
+ num_yaml_created += 1
+
with open(os.path.join(dir_yaml, '{}.yaml'.format(accession)), 'w') as fw:
json.dump(info_for_yaml_dict, fw, indent=2)
@@ -316,4 +320,4 @@ if len(not_created_accession_dict) > 0:
with open(path_not_created_accession_tsv, 'w') as fw:
fw.write('\n'.join(['\t'.join([accession_version, ','.join(missing_info_list)]) for accession_version, missing_info_list in not_created_accession_dict.items()]))
-
+print('Num. YAML files created: {}'.format(num_yaml_created))
diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv
index 400d732..58a64e3 100644
--- a/scripts/dict_ontology_standardization/ncbi_countries.csv
+++ b/scripts/dict_ontology_standardization/ncbi_countries.csv
@@ -708,6 +708,7 @@ USA:WY,http://www.wikidata.org/entity/Q1214
Uzbekistan,http://www.wikidata.org/entity/Q265
Vanuatu,http://www.wikidata.org/entity/Q686
Vatican City,http://www.wikidata.org/entity/Q237
+Wales,http://www.wikidata.org/entity/Q25
Venezuela,http://www.wikidata.org/entity/Q717
Viet nam,http://www.wikidata.org/entity/Q881
Viet Nam,http://www.wikidata.org/entity/Q881