aboutsummaryrefslogtreecommitdiff
path: root/workflows/tools/normalize
diff options
context:
space:
mode:
authorPjotr Prins2021-01-04 09:40:54 +0000
committerPjotr Prins2021-01-04 09:40:54 +0000
commit3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a (patch)
tree6151f218cef5d4412118844ebce65d732f4b17b7 /workflows/tools/normalize
parent1c4e055b8a9dc53b7fdbdf12d4b0a7e877fbc2ef (diff)
downloadbh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.tar.gz
bh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.tar.lz
bh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.zip
mapping sample_species using regex
Diffstat (limited to 'workflows/tools/normalize')
-rw-r--r--workflows/tools/normalize/mapping.py28
1 files changed, 24 insertions, 4 deletions
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py
index 1d52b03..d2af3b5 100644
--- a/workflows/tools/normalize/mapping.py
+++ b/workflows/tools/normalize/mapping.py
@@ -13,6 +13,7 @@
#
# Pjotr Prins (c) 2021
+import re
import types
def host_species(host,mapping):
@@ -26,18 +27,37 @@ def host_species(host,mapping):
warning = f"No URI mapping for host_species <{key}>"
return host.__dict__,warning
+Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195"
+Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275"
+Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119"
+Frozen_Food = "https://www.wikidata.org/wiki/Q751728"
+
def specimen_source(sample,mapping):
+ SPECIMEN_TERMS = {
+ r".*swab": Nasal_Swab,
+ "saliva": Saliva,
+ "seafood": Frozen_Food,
+ "packaging": Frozen_Food
+ }
warning = None
sample = types.SimpleNamespace(**sample)
+
try:
- if sample.specimen_source and not 'obolibrary' in sample.specimen_source:
+ if sample.specimen_source and \
+ not 'obolibrary' in sample.specimen_source and \
+ not 'wikidata' in sample.specimen_source:
key = sample.specimen_source
+ sample.specimen_source = None
if key in mapping:
sample.specimen_source = mapping[key]
else:
- sample.specimen_source = None
- warning = f"No URI mapping for specimen_source <{key}>"
+ for term in SPECIMEN_TERMS:
+ p = re.compile(term,re.IGNORECASE)
+ m = p.match(key)
+ if m: sample.specimen_source = SPECIMEN_TERMS[term]
+ if not sample.specimen_source:
+ warning = f"No URI mapping for specimen_source <{key}>"
+ if sample.specimen_source == None: del(sample.specimen_source)
except AttributeError:
pass
- if not sample.specimen_source: del(sample.specimen_source)
return sample.__dict__,warning