diff options
author | Pjotr Prins | 2021-01-04 09:40:54 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-04 09:40:54 +0000 |
commit | 3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a (patch) | |
tree | 6151f218cef5d4412118844ebce65d732f4b17b7 /workflows/tools/normalize/mapping.py | |
parent | 1c4e055b8a9dc53b7fdbdf12d4b0a7e877fbc2ef (diff) | |
download | bh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.tar.gz bh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.tar.lz bh20-seq-resource-3593b3f6a835f6c5927cbb1cc79e3db3c5d0053a.zip |
mapping sample_species using regex
Diffstat (limited to 'workflows/tools/normalize/mapping.py')
-rw-r--r-- | workflows/tools/normalize/mapping.py | 28 |
1 files changed, 24 insertions, 4 deletions
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py index 1d52b03..d2af3b5 100644 --- a/workflows/tools/normalize/mapping.py +++ b/workflows/tools/normalize/mapping.py @@ -13,6 +13,7 @@ # # Pjotr Prins (c) 2021 +import re import types def host_species(host,mapping): @@ -26,18 +27,37 @@ def host_species(host,mapping): warning = f"No URI mapping for host_species <{key}>" return host.__dict__,warning +Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195" +Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275" +Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119" +Frozen_Food = "https://www.wikidata.org/wiki/Q751728" + def specimen_source(sample,mapping): + SPECIMEN_TERMS = { + r".*swab": Nasal_Swab, + "saliva": Saliva, + "seafood": Frozen_Food, + "packaging": Frozen_Food + } warning = None sample = types.SimpleNamespace(**sample) + try: - if sample.specimen_source and not 'obolibrary' in sample.specimen_source: + if sample.specimen_source and \ + not 'obolibrary' in sample.specimen_source and \ + not 'wikidata' in sample.specimen_source: key = sample.specimen_source + sample.specimen_source = None if key in mapping: sample.specimen_source = mapping[key] else: - sample.specimen_source = None - warning = f"No URI mapping for specimen_source <{key}>" + for term in SPECIMEN_TERMS: + p = re.compile(term,re.IGNORECASE) + m = p.match(key) + if m: sample.specimen_source = SPECIMEN_TERMS[term] + if not sample.specimen_source: + warning = f"No URI mapping for specimen_source <{key}>" + if sample.specimen_source == None: del(sample.specimen_source) except AttributeError: pass - if not sample.specimen_source: del(sample.specimen_source) return sample.__dict__,warning |