diff options
author | Pjotr Prins | 2021-01-03 07:40:34 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-03 07:40:34 +0000 |
commit | d55a1b6556e6cd6e09405cb1f4bcf58d52892331 (patch) | |
tree | d426282fa16e5ab8756297f051c72c0d9f161a7c /workflows/pull-data/genbank/genbank.py | |
parent | 6ecc2338b78f3fadbfb411853c4391672ad71cea (diff) | |
download | bh20-seq-resource-d55a1b6556e6cd6e09405cb1f4bcf58d52892331.tar.gz bh20-seq-resource-d55a1b6556e6cd6e09405cb1f4bcf58d52892331.tar.lz bh20-seq-resource-d55a1b6556e6cd6e09405cb1f4bcf58d52892331.zip |
genbank: get authors
Diffstat (limited to 'workflows/pull-data/genbank/genbank.py')
-rw-r--r-- | workflows/pull-data/genbank/genbank.py | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py index 5ba1ef6..2d46f3d 100644 --- a/workflows/pull-data/genbank/genbank.py +++ b/workflows/pull-data/genbank/genbank.py @@ -51,6 +51,10 @@ Example of an output JSON: "submitter_address": "Pirogov Russian National Research Medical University, Ostrovityanova 1, Moscow 117997, Russia" } } + +Note: missing data should be None! Do not fill in other data by +'guessing'. + """ def get_metadata(id, gbseq): @@ -73,29 +77,23 @@ def get_metadata(id, gbseq): # </GBQualifier> sample.collection_location = "FIXME" - # --- Handling dates --- - # <GBSeq_create-date>29-JUL-2020</GBSeq_create-date> + submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")] + + # --- Dates n = gbseq.find("./GBSeq_create-date") creation_date = dateparse(n.text).date() - - # <GBSeq_update-date>30-JUL-2020</GBSeq_update-date> n = gbseq.find("./GBSeq_update-date") update_date = dateparse(n.text).date() - - # <GBQualifier> - # <GBQualifier_name>collection_date</GBQualifier_name> - # <GBQualifier_value>2020-04-01</GBQualifier_value> - # </GBQualifier> n = gbseq.find(".//GBQualifier/GBQualifier_name/[.='collection_date']/../GBQualifier_value") try: date = dateparse(n.text).date() sample.collection_date = str(date) except dateutil.parser._parser.ParserError as e: - warn(str(e)) - sample.collection_date = str(creation_date) + warn("No collection_date: ",str(e)) + sample.collection_date = None except AttributeError: - warn("Missing collection_date - used creation_date instead") - sample.collection_date = str(creation_date) + warn("Missing collection_date") + sample.collection_date = None info = { 'id': 'placeholder', |