From ba4161b1660c3a67090dd3715e9862906fb1cc5f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 3 Jan 2021 11:17:29 +0000
Subject: genbank: specimen source

---
 workflows/pull-data/genbank/genbank.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'workflows/pull-data/genbank')

diff --git a/workflows/pull-data/genbank/genbank.py b/workflows/pull-data/genbank/genbank.py
index 80432de..26cb5e7 100644
--- a/workflows/pull-data/genbank/genbank.py
+++ b/workflows/pull-data/genbank/genbank.py
@@ -28,7 +28,8 @@ Example of an output JSON:
     ],
     "collection_location": "http://www.wikidata.org/entity/Q649",
     "collection_date": "2020-04-17",
-    "collecting_institution": "N.A.Kovtun Clinical Hospital 1 of Departament of President Affairs"
+    "collecting_institution": "N.A.Kovtun Clinical Hospital 1 of Departament of President Affairs",
+    "specimen_source": ["http://purl.obolibrary.org/obo/NCIT_C155831"]
   },
   "virus": {
     "virus_strain": "SARS-CoV-2/human/RUS/20200417_10/2020",
@@ -145,10 +146,8 @@ def get_metadata(id, gbseq):
         sample.collection_date = str(date)
     except dateutil.parser._parser.ParserError as e:
         warn("No collection_date: ",str(e))
-        sample.collection_date = None
     except AttributeError:
         warn("Missing collection_date")
-        sample.collection_date = None
 
     # --- Host info
     # - Homo sapiens
@@ -186,7 +185,8 @@ def get_metadata(id, gbseq):
     if n: virus.virus_strain = n
     n = fetch("virus_species", ".//GBQualifier/GBQualifier_name/[.='db_xref']/../GBQualifier_value")
     if n: virus.virus_species = "http://purl.obolibrary.org/obo/NCBITaxon_"+n.split('taxon:')[1]
-
+    n = fetch("specimen_source", ".//GBQualifier/GBQualifier_name/[.='isolation_source']/../GBQualifier_value")
+    if n: sample.specimen_source = n
 
     info = {
         'id': 'placeholder',
-- 
cgit v1.2.3