diff options
author | Pjotr Prins | 2021-01-04 10:25:36 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-04 10:25:36 +0000 |
commit | f9f27a787fef0ad58c1ae465d8ba1ee4634083ae (patch) | |
tree | 64bba4b6c496c9aa63aeb651bcc8252256a905bf /workflows/tools/normalize | |
parent | bf8f13af6f083d382b4a3900566ef5e329084cbf (diff) | |
download | bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.gz bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.lz bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.zip |
mapping: no longer requires specimen file for genbank output
Diffstat (limited to 'workflows/tools/normalize')
-rw-r--r-- | workflows/tools/normalize/mapping.py | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py index d2af3b5..bc82fea 100644 --- a/workflows/tools/normalize/mapping.py +++ b/workflows/tools/normalize/mapping.py @@ -27,21 +27,30 @@ def host_species(host,mapping): warning = f"No URI mapping for host_species <{key}>" return host.__dict__,warning -Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195" -Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275" -Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119" -Frozen_Food = "https://www.wikidata.org/wiki/Q751728" +Unknown = "Not found" # So as not to create a warning def specimen_source(sample,mapping): - SPECIMEN_TERMS = { - r".*swab": Nasal_Swab, + Oronasopharynx = "http://purl.obolibrary.org/obo/NCIT_C155835" + Oropharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155835" + Nasopharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155831" + Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195" + Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275" + Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119" + Frozen_Food = "https://www.wikidata.org/wiki/Q751728" + SPECIMEN_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed + "Oronasopharynx": Oronasopharynx, + "orophar": Oropharyngeal, + "pharyngeal": Nasopharyngeal, + "\snares": Nasal_Swab, "saliva": Saliva, + "swab": Nasal_Swab, "seafood": Frozen_Food, - "packaging": Frozen_Food + "packaging": Frozen_Food, + "uknown": Unknown, + "unknown": Unknown } warning = None sample = types.SimpleNamespace(**sample) - try: if sample.specimen_source and \ not 'obolibrary' in sample.specimen_source and \ @@ -52,12 +61,13 @@ def specimen_source(sample,mapping): sample.specimen_source = mapping[key] else: for term in SPECIMEN_TERMS: - p = re.compile(term,re.IGNORECASE) + p = re.compile(".*?"+term,re.IGNORECASE) m = p.match(key) if m: sample.specimen_source = SPECIMEN_TERMS[term] if not sample.specimen_source: warning = f"No URI mapping for specimen_source <{key}>" - if sample.specimen_source == None: del(sample.specimen_source) + if sample.specimen_source == Unknown or sample.specimen_source == None: + del(sample.specimen_source) except AttributeError: pass return sample.__dict__,warning |