diff options
author | AndreaGuarracino | 2020-09-04 10:37:35 +0200 |
---|---|---|
committer | AndreaGuarracino | 2020-09-04 10:37:35 +0200 |
commit | 1430c62ff9245bfecb1d41cc87bbafafcfc81ca3 (patch) | |
tree | acb706ad5809093ba1686a5ff3d30bd4c87e17b8 | |
parent | 842f1a25cc2632771e8a816dbb4898db81d7a6c2 (diff) | |
download | bh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.tar.gz bh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.tar.lz bh20-seq-resource-1430c62ff9245bfecb1d41cc87bbafafcfc81ca3.zip |
sra script updated for managing more locations
-rw-r--r-- | scripts/create_sra_metadata/create_sra_metadata.py | 8 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_countries.csv | 1 |
2 files changed, 7 insertions, 2 deletions
diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py index 2a05d26..09cc51b 100644 --- a/scripts/create_sra_metadata/create_sra_metadata.py +++ b/scripts/create_sra_metadata/create_sra_metadata.py @@ -85,6 +85,8 @@ not_created_accession_dict = {} run_accession_set = set() run_accession_to_downloadble_file_url_dict = {} +num_yaml_created = 0 + for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): #print(i, EXPERIMENT_PACKAGE) @@ -209,7 +211,7 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(VALUE_text) else: info_for_yaml_dict['sample']['additional_collection_information'] = "The 'collection_date' is estimated (the original date was: {})".format(VALUE_text) - elif TAG_text == 'geo_loc_name': + elif TAG_text in ['geo_loc_name', 'geographic location (country and/or sea)', 'geographic location (region and locality)']: if ': ' in VALUE_text: VALUE_text = VALUE_text.replace(': ', ':') @@ -301,6 +303,8 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET): not_created_accession_dict[accession].append('host_species not found') if accession not in not_created_accession_dict: + num_yaml_created += 1 + with open(os.path.join(dir_yaml, '{}.yaml'.format(accession)), 'w') as fw: json.dump(info_for_yaml_dict, fw, indent=2) @@ -316,4 +320,4 @@ if len(not_created_accession_dict) > 0: with open(path_not_created_accession_tsv, 'w') as fw: fw.write('\n'.join(['\t'.join([accession_version, ','.join(missing_info_list)]) for accession_version, missing_info_list in not_created_accession_dict.items()])) - +print('Num. YAML files created: {}'.format(num_yaml_created)) diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv index 400d732..58a64e3 100644 --- a/scripts/dict_ontology_standardization/ncbi_countries.csv +++ b/scripts/dict_ontology_standardization/ncbi_countries.csv @@ -708,6 +708,7 @@ USA:WY,http://www.wikidata.org/entity/Q1214 Uzbekistan,http://www.wikidata.org/entity/Q265 Vanuatu,http://www.wikidata.org/entity/Q686 Vatican City,http://www.wikidata.org/entity/Q237 +Wales,http://www.wikidata.org/entity/Q25 Venezuela,http://www.wikidata.org/entity/Q717 Viet nam,http://www.wikidata.org/entity/Q881 Viet Nam,http://www.wikidata.org/entity/Q881 |