about summary refs log tree commit diff
path: root/workflows/tools/normalize
diff options
context:
space:
mode:
authorPjotr Prins2021-01-04 10:25:36 +0000
committerPjotr Prins2021-01-04 10:25:36 +0000
commitf9f27a787fef0ad58c1ae465d8ba1ee4634083ae (patch)
tree64bba4b6c496c9aa63aeb651bcc8252256a905bf /workflows/tools/normalize
parentbf8f13af6f083d382b4a3900566ef5e329084cbf (diff)
downloadbh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.gz
bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.lz
bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.zip
mapping: no longer requires specimen file for genbank output
Diffstat (limited to 'workflows/tools/normalize')
-rw-r--r--workflows/tools/normalize/mapping.py30
1 files changed, 20 insertions, 10 deletions
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py
index d2af3b5..bc82fea 100644
--- a/workflows/tools/normalize/mapping.py
+++ b/workflows/tools/normalize/mapping.py
@@ -27,21 +27,30 @@ def host_species(host,mapping):
             warning = f"No URI mapping for host_species <{key}>"
     return host.__dict__,warning
 
-Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
-Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
-Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
-Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+Unknown = "Not found" # So as not to create a warning
 
 def specimen_source(sample,mapping):
-    SPECIMEN_TERMS = {
-        r".*swab": Nasal_Swab,
+    Oronasopharynx = "http://purl.obolibrary.org/obo/NCIT_C155835"
+    Oropharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155835"
+    Nasopharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155831"
+    Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
+    Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
+    Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
+    Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+    SPECIMEN_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed
+        "Oronasopharynx": Oronasopharynx,
+        "orophar": Oropharyngeal,
+        "pharyngeal": Nasopharyngeal,
+        "\snares": Nasal_Swab,
         "saliva": Saliva,
+        "swab": Nasal_Swab,
         "seafood": Frozen_Food,
-        "packaging": Frozen_Food
+        "packaging": Frozen_Food,
+        "uknown": Unknown,
+        "unknown": Unknown
         }
     warning = None
     sample = types.SimpleNamespace(**sample)
-
     try:
         if sample.specimen_source and \
            not 'obolibrary' in sample.specimen_source and \
@@ -52,12 +61,13 @@ def specimen_source(sample,mapping):
                 sample.specimen_source = mapping[key]
             else:
                 for term in SPECIMEN_TERMS:
-                    p = re.compile(term,re.IGNORECASE)
+                    p = re.compile(".*?"+term,re.IGNORECASE)
                     m = p.match(key)
                     if m: sample.specimen_source = SPECIMEN_TERMS[term]
         if not sample.specimen_source:
             warning = f"No URI mapping for specimen_source <{key}>"
-        if sample.specimen_source == None: del(sample.specimen_source)
+        if sample.specimen_source == Unknown or sample.specimen_source == None:
+            del(sample.specimen_source)
     except AttributeError:
         pass
     return sample.__dict__,warning