about summary refs log tree commit diff
path: root/workflows/tools/normalize
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/tools/normalize')
-rw-r--r--workflows/tools/normalize/mapping.py59
1 files changed, 44 insertions, 15 deletions
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py
index bc82fea..3ed09c2 100644
--- a/workflows/tools/normalize/mapping.py
+++ b/workflows/tools/normalize/mapping.py
@@ -17,14 +17,36 @@ import re
 import types
 
 def host_species(host,mapping):
+    Homo_sapiens = "http://purl.obolibrary.org/obo/NCBITaxon_9606"
+
+    SPECIES_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed
+        "human": Homo_sapiens,
+        "sapiens": Homo_sapiens,
+        "Mustela lutreola": "http://purl.obolibrary.org/obo/NCBITaxon_9666",
+        "Manis javanica": "http://purl.obolibrary.org/obo/NCBITaxon_9974",
+        "Felis catus": "http://purl.obolibrary.org/obo/NCBITaxon_9685",
+        "Panthera tigris": "http://purl.obolibrary.org/obo/NCBITaxon_419130",
+        "Canis lupus": "http://purl.obolibrary.org/obo/NCBITaxon_9615",
+        # Mink:
+        "vison": "http://purl.obolibrary.org/obo/NCBITaxon_452646"
+        }
+
     warning = None
     host = types.SimpleNamespace(**host)
     if not 'obolibrary' in host.host_species:
         key = host.host_species
+        host.host_species = None
         if key in mapping:
             host.host_species = mapping[key]
         else:
+            for term in SPECIES_TERMS:
+                p = re.compile(".*?"+term,re.IGNORECASE)
+                m = p.match(key)
+                if m: host.host_species = SPECIES_TERMS[term]
+        if not host.host_species:
             warning = f"No URI mapping for host_species <{key}>"
+        if host.host_species == Unknown or host.host_species == None:
+            del(host.host_species)
     return host.__dict__,warning
 
 Unknown = "Not found" # So as not to create a warning
@@ -35,8 +57,10 @@ def specimen_source(sample,mapping):
     Nasopharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155831"
     Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
     Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
-    Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
+    Nasal_Swab = Nasopharyngeal # "http://purl.obolibrary.org/obo/NCIT_C132119"
     Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+    Bronchoalveolar_Lavage = "http://purl.obolibrary.org/obo/NCIT_C13195",
+    Biospecimen = "http://purl.obolibrary.org/obo/NCIT_C70699"
     SPECIMEN_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed
         "Oronasopharynx": Oronasopharynx,
         "orophar": Oropharyngeal,
@@ -44,28 +68,33 @@ def specimen_source(sample,mapping):
         "\snares": Nasal_Swab,
         "saliva": Saliva,
         "swab": Nasal_Swab,
+        "broncho": Bronchoalveolar_Lavage,
         "seafood": Frozen_Food,
         "packaging": Frozen_Food,
+        "specimen": Biospecimen,
+        "patient": Biospecimen,
         "uknown": Unknown,
         "unknown": Unknown
         }
     warning = None
     sample = types.SimpleNamespace(**sample)
     try:
-        if sample.specimen_source and \
-           not 'obolibrary' in sample.specimen_source and \
-           not 'wikidata' in sample.specimen_source:
-            key = sample.specimen_source
-            sample.specimen_source = None
-            if key in mapping:
-                sample.specimen_source = mapping[key]
-            else:
-                for term in SPECIMEN_TERMS:
-                    p = re.compile(".*?"+term,re.IGNORECASE)
-                    m = p.match(key)
-                    if m: sample.specimen_source = SPECIMEN_TERMS[term]
-        if not sample.specimen_source:
-            warning = f"No URI mapping for specimen_source <{key}>"
+        if sample.specimen_source:
+            keys = sample.specimen_source
+            sample.specimen_source = []
+            for key in keys:
+                if 'obolibrary' in key:
+                    sample.specimen_source.append(key)
+                    continue
+                if key in mapping:
+                    sample.specimen_source.append(mapping[key])
+                else:
+                    for term in SPECIMEN_TERMS:
+                        p = re.compile(".*?"+term,re.IGNORECASE)
+                        m = p.match(key)
+                        if m: sample.specimen_source = [SPECIMEN_TERMS[term]]
+                if len(sample.specimen_source)==0:
+                    warning = f"No URI mapping for specimen_source <{key}>"
         if sample.specimen_source == Unknown or sample.specimen_source == None:
             del(sample.specimen_source)
     except AttributeError: