about summary refs log tree commit diff
path: root/workflows
diff options
context:
space:
mode:
authorPjotr Prins2021-01-03 08:05:51 +0000
committerPjotr Prins2021-01-03 08:05:51 +0000
commita5ba1a8062e7116c2951762f86a6ae6d1638261d (patch)
tree6315a062ff00e72e791e68d1fb7fd75ea47a9560 /workflows
parentd55a1b6556e6cd6e09405cb1f4bcf58d52892331 (diff)
downloadbh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.gz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.lz
bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.zip
genbank: submitter info
Diffstat (limited to 'workflows')
-rw-r--r--workflows/pull-data/genbank/genbank.py16
-rw-r--r--workflows/pull-data/genbank/ref.py11
2 files changed, 16 insertions, 11 deletions
diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py
index 2d46f3d..8f6ba06 100644
--- a/workflows/pull-data/genbank/genbank.py
+++ b/workflows/pull-data/genbank/genbank.py
@@ -78,6 +78,22 @@ def get_metadata(id, gbseq):
     sample.collection_location = "FIXME"
 
     submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")]
+    # <GBReference_journal>Submitted (28-OCT-2020) MDU-PHL, The Peter
+    #   Doherty Institute for Infection and Immunity, 792 Elizabeth
+    #   Street, Melbourne, Vic 3000, Australia
+    # </GBReference_journal>
+    try:
+        n = gbseq.find(".//GBReference_journal").text
+        # print(n,file=sys.stderr)
+        if n != 'Unpublished':
+            institute,address = n.split(',',1)
+            submitter.submitter_name = institute.split(') ')[1]
+            submitter.submitter_address = address.strip()
+    except AttributeError:
+        pass
+    except ValueError:
+        submitter.additional_submitter_information = n
+        pass
 
     # --- Dates
     n = gbseq.find("./GBSeq_create-date")
diff --git a/workflows/pull-data/genbank/ref.py b/workflows/pull-data/genbank/ref.py
index e998d37..66c9fb0 100644
--- a/workflows/pull-data/genbank/ref.py
+++ b/workflows/pull-data/genbank/ref.py
@@ -1,16 +1,5 @@
 # ---- BELOW IS JUST FOR REFERENCE ----
 
-                GBReference = GBSeq_references.find('GBReference')
-                if GBReference is not None:
-                    GBReference_journal = GBReference.find('GBReference_journal')
-
-                    if GBReference_journal is not None and GBReference_journal.text != 'Unpublished':
-                        if 'Submitted' in GBReference_journal.text:
-                            submitter['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())]
-                            submitter['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip()
-                        else:
-                            submitter['additional_submitter_information'] = GBReference_journal.text
-
             # This script download and prepare data and metadata for assemblies samples
             technology['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628'