diff options
author | Pjotr Prins | 2021-01-04 10:25:36 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-04 10:25:36 +0000 |
commit | f9f27a787fef0ad58c1ae465d8ba1ee4634083ae (patch) | |
tree | 64bba4b6c496c9aa63aeb651bcc8252256a905bf /workflows/tools | |
parent | bf8f13af6f083d382b4a3900566ef5e329084cbf (diff) | |
download | bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.gz bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.tar.lz bh20-seq-resource-f9f27a787fef0ad58c1ae465d8ba1ee4634083ae.zip |
mapping: no longer requires specimen file for genbank output
Diffstat (limited to 'workflows/tools')
-rwxr-xr-x | workflows/tools/normalize-yamlfa.py | 4 | ||||
-rw-r--r-- | workflows/tools/normalize/mapping.py | 30 |
2 files changed, 22 insertions, 12 deletions
diff --git a/workflows/tools/normalize-yamlfa.py b/workflows/tools/normalize-yamlfa.py index e3f92c0..20c2feb 100755 --- a/workflows/tools/normalize-yamlfa.py +++ b/workflows/tools/normalize-yamlfa.py @@ -3,7 +3,7 @@ # # Example: # -# python3 ./workflows/tools/normalize-yamlfa.py -s ~/tmp/yamlfa/state.json MW241349 --species ./scripts/dict_ontology_standardization/ncbi_host_species.csv +# python3 ./workflows/tools/normalize-yamlfa.py -s ~/tmp/yamlfa/state.json --species ncbi_host_species.csv --specimen specimen.csv --validate import argparse import json @@ -30,7 +30,7 @@ won't stop - it is used for (automated) uploads. parser.add_argument('-s','--state', type=str, help='State file (JSON) as produced by transform2yamlfa', required=True) parser.add_argument('--species', type=str, help='Species mapping file') -parser.add_argument('--specimen', type=str, help='Specimen mapping file') +parser.add_argument('--specimen', type=str, help='Optional specimen mapping file') parser.add_argument('--validate', action='store_true', help='Validation mode - stops on warning') parser.add_argument('--rewrite', action='store_true', help='Rewrite mode - updates files') parser.add_argument('--yaml', action='store_true', help='Input YAML instead of JSON') diff --git a/workflows/tools/normalize/mapping.py b/workflows/tools/normalize/mapping.py index d2af3b5..bc82fea 100644 --- a/workflows/tools/normalize/mapping.py +++ b/workflows/tools/normalize/mapping.py @@ -27,21 +27,30 @@ def host_species(host,mapping): warning = f"No URI mapping for host_species <{key}>" return host.__dict__,warning -Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195" -Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275" -Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119" -Frozen_Food = "https://www.wikidata.org/wiki/Q751728" +Unknown = "Not found" # So as not to create a warning def specimen_source(sample,mapping): - SPECIMEN_TERMS = { - r".*swab": Nasal_Swab, + Oronasopharynx = "http://purl.obolibrary.org/obo/NCIT_C155835" + Oropharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155835" + Nasopharyngeal = "http://purl.obolibrary.org/obo/NCIT_C155831" + Bronchoalveolar_Lavage_Fluid = "http://purl.obolibrary.org/obo/NCIT_C13195" + Saliva = "http://purl.obolibrary.org/obo/NCIT_C13275" + Nasal_Swab = "http://purl.obolibrary.org/obo/NCIT_C132119" + Frozen_Food = "https://www.wikidata.org/wiki/Q751728" + SPECIMEN_TERMS = { # since Python 3.7 dict is ordered! Note that re is allowed + "Oronasopharynx": Oronasopharynx, + "orophar": Oropharyngeal, + "pharyngeal": Nasopharyngeal, + "\snares": Nasal_Swab, "saliva": Saliva, + "swab": Nasal_Swab, "seafood": Frozen_Food, - "packaging": Frozen_Food + "packaging": Frozen_Food, + "uknown": Unknown, + "unknown": Unknown } warning = None sample = types.SimpleNamespace(**sample) - try: if sample.specimen_source and \ not 'obolibrary' in sample.specimen_source and \ @@ -52,12 +61,13 @@ def specimen_source(sample,mapping): sample.specimen_source = mapping[key] else: for term in SPECIMEN_TERMS: - p = re.compile(term,re.IGNORECASE) + p = re.compile(".*?"+term,re.IGNORECASE) m = p.match(key) if m: sample.specimen_source = SPECIMEN_TERMS[term] if not sample.specimen_source: warning = f"No URI mapping for specimen_source <{key}>" - if sample.specimen_source == None: del(sample.specimen_source) + if sample.specimen_source == Unknown or sample.specimen_source == None: + del(sample.specimen_source) except AttributeError: pass return sample.__dict__,warning |