aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorAndreaGuarracino2020-07-06 15:24:06 +0200
committerAndreaGuarracino2020-07-06 15:24:06 +0200
commit4e2ac9bdef97175f5d762c6e7f065a83512a3c40 (patch)
treee8b609c0e089bc9d871a5d78211bccb539a21e91 /scripts
parentc8b12b4e2b7e2e51315fe738f8bb4e37cb78ae55 (diff)
downloadbh20-seq-resource-4e2ac9bdef97175f5d762c6e7f065a83512a3c40.tar.gz
bh20-seq-resource-4e2ac9bdef97175f5d762c6e7f065a83512a3c40.tar.lz
bh20-seq-resource-4e2ac9bdef97175f5d762c6e7f065a83512a3c40.zip
renamed sra script; added seq technology in its additional information field if the term …
Diffstat (limited to 'scripts')
-rw-r--r--scripts/create_sra_metadata/SraExperimentPackage.2020.07.05.xml.gz (renamed from scripts/download_sra_data/SraExperimentPackage.2020.07.05.xml.gz)bin6502056 -> 6502056 bytes
-rw-r--r--scripts/create_sra_metadata/create_sra_metadata.py (renamed from scripts/download_sra_data/download_sra_data.py)6
2 files changed, 5 insertions, 1 deletions
diff --git a/scripts/download_sra_data/SraExperimentPackage.2020.07.05.xml.gz b/scripts/create_sra_metadata/SraExperimentPackage.2020.07.05.xml.gz
index 88acb18..88acb18 100644
--- a/scripts/download_sra_data/SraExperimentPackage.2020.07.05.xml.gz
+++ b/scripts/create_sra_metadata/SraExperimentPackage.2020.07.05.xml.gz
Binary files differ
diff --git a/scripts/download_sra_data/download_sra_data.py b/scripts/create_sra_metadata/create_sra_metadata.py
index e36afb6..470980e 100644
--- a/scripts/download_sra_data/download_sra_data.py
+++ b/scripts/create_sra_metadata/create_sra_metadata.py
@@ -92,6 +92,9 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
VALUE_text = VALUE.text
if TAG_text in ['host', 'host scientific name']:
+ if VALUE_text.lower() in ['homo sapien', 'homosapiens']:
+ VALUE_text = 'Homo sapiens'
+
if VALUE_text in term_to_uri_dict:
info_for_yaml_dict['host']['host_species'] = term_to_uri_dict[VALUE_text]
else:
@@ -105,7 +108,7 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
if VALUE_text.lower() not in ['not applicable', 'missing', 'na', 'unknown', 'not provided']:
value_to_insert = VALUE_text
- if value_to_insert.lower() in ['homo sapien']:
+ if value_to_insert.lower() in ['homo sapien', 'homosapiens']:
value_to_insert = 'Homo sapiens'
if value_to_insert in term_to_uri_dict:
@@ -194,6 +197,7 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
if INSTRUMENT_MODEL in term_to_uri_dict:
info_for_yaml_dict['technology']['sample_sequencing_technology'] = [term_to_uri_dict[INSTRUMENT_MODEL]]
else:
+ info_for_yaml_dict['technology']['additional_technology_information'] = INSTRUMENT_MODEL
missing_value_list.append('\t'.join([accession, 'sample_sequencing_technology', INSTRUMENT_MODEL]))
#else:
# print(accession, 'Missing INSTRUMENT_MODEL', info_for_yaml_dict)