aboutsummaryrefslogtreecommitdiff
path: root/workflows/pull-data/genbank
diff options
context:
space:
mode:
authorPjotr Prins2021-01-03 08:05:51 +0000
committerPjotr Prins2021-01-03 08:05:51 +0000
commita5ba1a8062e7116c2951762f86a6ae6d1638261d (patch)
tree6315a062ff00e72e791e68d1fb7fd75ea47a9560 /workflows/pull-data/genbank
parentd55a1b6556e6cd6e09405cb1f4bcf58d52892331 (diff)
downloadbh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.gz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.lz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.zip
genbank: submitter info
Diffstat (limited to 'workflows/pull-data/genbank')
-rw-r--r--workflows/pull-data/genbank/genbank.py16
-rw-r--r--workflows/pull-data/genbank/ref.py11
2 files changed, 16 insertions, 11 deletions
diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py
index 2d46f3d..8f6ba06 100644
--- a/workflows/pull-data/genbank/genbank.py
+++ b/workflows/pull-data/genbank/genbank.py
@@ -78,6 +78,22 @@ def get_metadata(id, gbseq):
sample.collection_location = "FIXME"
submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")]
+ # <GBReference_journal>Submitted (28-OCT-2020) MDU-PHL, The Peter
+ # Doherty Institute for Infection and Immunity, 792 Elizabeth
+ # Street, Melbourne, Vic 3000, Australia
+ # </GBReference_journal>
+ try:
+ n = gbseq.find(".//GBReference_journal").text
+ # print(n,file=sys.stderr)
+ if n != 'Unpublished':
+ institute,address = n.split(',',1)
+ submitter.submitter_name = institute.split(') ')[1]
+ submitter.submitter_address = address.strip()
+ except AttributeError:
+ pass
+ except ValueError:
+ submitter.additional_submitter_information = n
+ pass
# --- Dates
n = gbseq.find("./GBSeq_create-date")
diff --git a/workflows/pull-data/genbank/ref.py b/workflows/pull-data/genbank/ref.py
index e998d37..66c9fb0 100644
--- a/workflows/pull-data/genbank/ref.py
+++ b/workflows/pull-data/genbank/ref.py
@@ -1,16 +1,5 @@
# ---- BELOW IS JUST FOR REFERENCE ----
- GBReference = GBSeq_references.find('GBReference')
- if GBReference is not None:
- GBReference_journal = GBReference.find('GBReference_journal')
-
- if GBReference_journal is not None and GBReference_journal.text != 'Unpublished':
- if 'Submitted' in GBReference_journal.text:
- submitter['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())]
- submitter['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip()
- else:
- submitter['additional_submitter_information'] = GBReference_journal.text
-
# This script download and prepare data and metadata for assemblies samples
technology['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628'