From a5ba1a8062e7116c2951762f86a6ae6d1638261d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 3 Jan 2021 08:05:51 +0000 Subject: genbank: submitter info --- workflows/pull-data/genbank/genbank.py | 16 ++++++++++++++++ workflows/pull-data/genbank/ref.py | 11 ----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py index 2d46f3d..8f6ba06 100644 --- a/workflows/pull-data/genbank/genbank.py +++ b/workflows/pull-data/genbank/genbank.py @@ -78,6 +78,22 @@ def get_metadata(id, gbseq): sample.collection_location = "FIXME" submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")] + # Submitted (28-OCT-2020) MDU-PHL, The Peter + # Doherty Institute for Infection and Immunity, 792 Elizabeth + # Street, Melbourne, Vic 3000, Australia + # + try: + n = gbseq.find(".//GBReference_journal").text + # print(n,file=sys.stderr) + if n != 'Unpublished': + institute,address = n.split(',',1) + submitter.submitter_name = institute.split(') ')[1] + submitter.submitter_address = address.strip() + except AttributeError: + pass + except ValueError: + submitter.additional_submitter_information = n + pass # --- Dates n = gbseq.find("./GBSeq_create-date") diff --git a/workflows/pull-data/genbank/ref.py b/workflows/pull-data/genbank/ref.py index e998d37..66c9fb0 100644 --- a/workflows/pull-data/genbank/ref.py +++ b/workflows/pull-data/genbank/ref.py @@ -1,16 +1,5 @@ # ---- BELOW IS JUST FOR REFERENCE ---- - GBReference = GBSeq_references.find('GBReference') - if GBReference is not None: - GBReference_journal = GBReference.find('GBReference_journal') - - if GBReference_journal is not None and GBReference_journal.text != 'Unpublished': - if 'Submitted' in GBReference_journal.text: - submitter['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())] - submitter['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip() - else: - submitter['additional_submitter_information'] = GBReference_journal.text - # This script download and prepare data and metadata for assemblies samples technology['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628' -- cgit v1.2.3