about summary refs log tree commit diff
path: root/workflows/pull-data/genbank/genbank.py
diff options
context:
space:
mode:
authorPjotr Prins2021-01-03 08:05:51 +0000
committerPjotr Prins2021-01-03 08:05:51 +0000
commita5ba1a8062e7116c2951762f86a6ae6d1638261d (patch)
tree6315a062ff00e72e791e68d1fb7fd75ea47a9560 /workflows/pull-data/genbank/genbank.py
parentd55a1b6556e6cd6e09405cb1f4bcf58d52892331 (diff)
downloadbh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.gz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.lz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.zip
genbank: submitter info
Diffstat (limited to 'workflows/pull-data/genbank/genbank.py')
-rw-r--r--workflows/pull-data/genbank/genbank.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py
index 2d46f3d..8f6ba06 100644
--- a/workflows/pull-data/genbank/genbank.py
+++ b/workflows/pull-data/genbank/genbank.py
@@ -78,6 +78,22 @@ def get_metadata(id, gbseq):
     sample.collection_location = "FIXME"
 
     submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")]
+    # <GBReference_journal>Submitted (28-OCT-2020) MDU-PHL, The Peter
+    #   Doherty Institute for Infection and Immunity, 792 Elizabeth
+    #   Street, Melbourne, Vic 3000, Australia
+    # </GBReference_journal>
+    try:
+        n = gbseq.find(".//GBReference_journal").text
+        # print(n,file=sys.stderr)
+        if n != 'Unpublished':
+            institute,address = n.split(',',1)
+            submitter.submitter_name = institute.split(') ')[1]
+            submitter.submitter_address = address.strip()
+    except AttributeError:
+        pass
+    except ValueError:
+        submitter.additional_submitter_information = n
+        pass
 
     # --- Dates
     n = gbseq.find("./GBSeq_create-date")