aboutsummaryrefslogtreecommitdiff
path: root/scripts/create_sra_metadata
diff options
context:
space:
mode:
authorAndreaGuarracino2020-11-13 22:04:36 +0100
committerAndreaGuarracino2020-11-13 22:04:36 +0100
commita5f5a85b9db5e84f3680e85b7116f324fd6d2e2e (patch)
treee2544a90619a2001a5a3cb2332c681d0128ff1a5 /scripts/create_sra_metadata
parent852fbbd14a174addcfe729ab2b54e556228984a4 (diff)
downloadbh20-seq-resource-a5f5a85b9db5e84f3680e85b7116f324fd6d2e2e.tar.gz
bh20-seq-resource-a5f5a85b9db5e84f3680e85b7116f324fd6d2e2e.tar.lz
bh20-seq-resource-a5f5a85b9db5e84f3680e85b7116f324fd6d2e2e.zip
to not create YAML files with date before 2019 December
Diffstat (limited to 'scripts/create_sra_metadata')
-rw-r--r--scripts/create_sra_metadata/create_sra_metadata.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/scripts/create_sra_metadata/create_sra_metadata.py b/scripts/create_sra_metadata/create_sra_metadata.py
index 554aea3..77cdf0d 100644
--- a/scripts/create_sra_metadata/create_sra_metadata.py
+++ b/scripts/create_sra_metadata/create_sra_metadata.py
@@ -14,6 +14,7 @@ from dateutil.parser import parse
import xml.etree.ElementTree as ET
import json
import gzip
+from datetime import datetime
import sys
sys.path.append('../')
@@ -23,6 +24,8 @@ dir_yaml = 'yaml'
date = '2020.07.09'
+min_acceptable_collection_date = datetime(2019, 12, 1)
+
# Query on SRA: 'txid2697049[Organism]' (https://www.ncbi.nlm.nih.gov/sra/?term=txid2697049%5BOrganism%5D)
# Query on SRA: 'txid2697049[Organism:noexp] NOT 0[Mbases ' (https://www.ncbi.nlm.nih.gov/sra/?term=txid2697049%5BOrganism:noexp%5D%20NOT%200[Mbases)
# -> Send to -> File -> Full XML -> Create File
@@ -283,6 +286,14 @@ for i, EXPERIMENT_PACKAGE in enumerate(EXPERIMENT_PACKAGE_SET):
if accession not in not_created_accession_dict:
not_created_accession_dict[accession] = []
not_created_accession_dict[accession].append('collection_date not found')
+ else:
+ year, month, day = [int(x) for x in info_for_yaml_dict['sample']['collection_date'].split('-')]
+
+ collection_date_in_yaml = datetime(year, month, day)
+
+ if accession not in not_created_accession_dict:
+ not_created_accession_dict[accession] = []
+ not_created_accession_dict[accession].append('collection_date too early')
if 'sample_sequencing_technology' not in info_for_yaml_dict['technology']:
# print(accession_version, ' - technology not found')