diff options
author | Pjotr Prins | 2021-01-03 08:05:51 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-03 08:05:51 +0000 |
commit | a5ba1a8062e7116c2951762f86a6ae6d1638261d (patch) | |
tree | 6315a062ff00e72e791e68d1fb7fd75ea47a9560 | |
parent | d55a1b6556e6cd6e09405cb1f4bcf58d52892331 (diff) | |
download | bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.gz bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.tar.lz bh20-seq-resource-a5ba1a8062e7116c2951762f86a6ae6d1638261d.zip |
genbank: submitter info
-rw-r--r-- | workflows/pull-data/genbank/genbank.py | 16 | ||||
-rw-r--r-- | workflows/pull-data/genbank/ref.py | 11 |
2 files changed, 16 insertions, 11 deletions
diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py index 2d46f3d..8f6ba06 100644 --- a/workflows/pull-data/genbank/genbank.py +++ b/workflows/pull-data/genbank/genbank.py @@ -78,6 +78,22 @@ def get_metadata(id, gbseq): sample.collection_location = "FIXME" submitter.authors = [n.text for n in gbseq.findall(".//GBAuthor")] + # <GBReference_journal>Submitted (28-OCT-2020) MDU-PHL, The Peter + # Doherty Institute for Infection and Immunity, 792 Elizabeth + # Street, Melbourne, Vic 3000, Australia + # </GBReference_journal> + try: + n = gbseq.find(".//GBReference_journal").text + # print(n,file=sys.stderr) + if n != 'Unpublished': + institute,address = n.split(',',1) + submitter.submitter_name = institute.split(') ')[1] + submitter.submitter_address = address.strip() + except AttributeError: + pass + except ValueError: + submitter.additional_submitter_information = n + pass # --- Dates n = gbseq.find("./GBSeq_create-date") diff --git a/workflows/pull-data/genbank/ref.py b/workflows/pull-data/genbank/ref.py index e998d37..66c9fb0 100644 --- a/workflows/pull-data/genbank/ref.py +++ b/workflows/pull-data/genbank/ref.py @@ -1,16 +1,5 @@ # ---- BELOW IS JUST FOR REFERENCE ---- - GBReference = GBSeq_references.find('GBReference') - if GBReference is not None: - GBReference_journal = GBReference.find('GBReference_journal') - - if GBReference_journal is not None and GBReference_journal.text != 'Unpublished': - if 'Submitted' in GBReference_journal.text: - submitter['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())] - submitter['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip() - else: - submitter['additional_submitter_information'] = GBReference_journal.text - # This script download and prepare data and metadata for assemblies samples technology['assembly_method'] = 'http://purl.obolibrary.org/obo/GENEPIO_0001628' |