about summary refs log tree commit diff
path: root/workflows/tools
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/tools')
-rwxr-xr-xworkflows/tools/normalize-yamlfa.py4
-rw-r--r--workflows/tools/normalize/mapping.py30
2 files changed, 22 insertions, 12 deletions
diff --git a/workflows/tools/normalize-yamlfa.py b/workflows/tools/normalize-yamlfa.py
index e3f92c0..20c2feb 100755
--- a/workflows/tools/normalize-yamlfa.py
+++ b/workflows/tools/normalize-yamlfa.py
@@ -3,7 +3,7 @@
 #
 # Example:
 #
-#    python3 ./workflows/tools/normalize-yamlfa.py -s ~/tmp/yamlfa/state.json MW241349 --species ./scripts/dict_ontology_standardization/ncbi_host_species.csv
+#    python3 ./workflows/tools/normalize-yamlfa.py -s ~/tmp/yamlfa/state.json --species ncbi_host_species.csv --specimen specimen.csv --validate
 
 import argparse
 import json
@@ -30,7 +30,7 @@ won't stop - it is used for (automated) uploads.
 
 parser.add_argument('-s','--state', type=str, help='State file (JSON) as produced by transform2yamlfa', required=True)
 parser.add_argument('--species', type=str, help='Species mapping file')
-parser.add_argument('--specimen', type=str, help='Specimen mapping file')
+parser.add_argument('--specimen', type=str, help='Optional specimen mapping file')
 parser.add_argument('--validate', action='store_true', help='Validation mode - stops on warning')
 parser.add_argument('--rewrite', action='store_true', help='Rewrite mode - updates files')
 parser.add_argument('--yaml', action='store_true', help='Input YAML instead of JSON')
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py
index d2af3b5..bc82fea 100644
--- a/workflows/tools/normalize/mapping.py
+++ b/workflows/tools/normalize/mapping.py
@@ -27,21 +27,30 @@ def host_species(host,mapping):
             warning = f"No URI mapping for host_species <{key}>"
     return host.__dict__,warning
 
-Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
-Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
-Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
-Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+Unknown = "Not found" # So as not to create a warning
 
 def specimen_source(sample,mapping):
-    SPECIMEN_TERMS = {
-        r".*swab": Nasal_Swab,
+    Oronasopharynx = "http://purl.obolibrary.org/obo/NCIT_C155835"
+    Oropharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155835"
+    Nasopharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155831"
+    Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
+    Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
+    Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
+    Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+    SPECIMEN_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed
+        "Oronasopharynx": Oronasopharynx,
+        "orophar": Oropharyngeal,
+        "pharyngeal": Nasopharyngeal,
+        "\snares": Nasal_Swab,
         "saliva": Saliva,
+        "swab": Nasal_Swab,
         "seafood": Frozen_Food,
-        "packaging": Frozen_Food
+        "packaging": Frozen_Food,
+        "uknown": Unknown,
+        "unknown": Unknown
         }
     warning = None
     sample = types.SimpleNamespace(**sample)
-
     try:
         if sample.specimen_source and \
            not 'obolibrary' in sample.specimen_source and \
@@ -52,12 +61,13 @@ def specimen_source(sample,mapping):
                 sample.specimen_source = mapping[key]
             else:
                 for term in SPECIMEN_TERMS:
-                    p = re.compile(term,re.IGNORECASE)
+                    p = re.compile(".*?"+term,re.IGNORECASE)
                     m = p.match(key)
                     if m: sample.specimen_source = SPECIMEN_TERMS[term]
         if not sample.specimen_source:
             warning = f"No URI mapping for specimen_source <{key}>"
-        if sample.specimen_source == None: del(sample.specimen_source)
+        if sample.specimen_source == Unknown or sample.specimen_source == None:
+            del(sample.specimen_source)
     except AttributeError:
         pass
     return sample.__dict__,warning