From 40df65dec296b81650987c8ee4f832b703ab8f74 Mon Sep 17 00:00:00 2001 From: lltommy Date: Tue, 7 Apr 2020 19:51:49 +0200 Subject: adding dummy metadata qc to the project --- bh20sequploader/qc_metadata.py | 13 +++++++++++++ example/dummyschema.yaml | 16 ++++++++++++++++ example/metadata.json | 0 example/metadata.yaml | 17 +++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 bh20sequploader/qc_metadata.py create mode 100644 example/dummyschema.yaml delete mode 100644 example/metadata.json create mode 100644 example/metadata.yaml diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py new file mode 100644 index 0000000..0632777 --- /dev/null +++ b/bh20sequploader/qc_metadata.py @@ -0,0 +1,13 @@ +import yamale + +## NOTE: this is just a DUMMY. Everything about this can and will change +def qc_metadata(metadatafile): + print("Start metadata validation...") + schema = yamale.make_schema('../example/dummyschema.yaml') + data = yamale.make_data(metadatafile) + # Validate data against the schema. Throws a ValueError if data is invalid. + yamale.validate(schema, data) + print("...complete!") + +#qc_metadata("../example/metadata.yaml") + diff --git a/example/dummyschema.yaml b/example/dummyschema.yaml new file mode 100644 index 0000000..e428324 --- /dev/null +++ b/example/dummyschema.yaml @@ -0,0 +1,16 @@ +#sampleInformation: include('sampleInformation') +#InstituteInformation: include('InstituteInformation') +--- +sampleInformation: + location : str() + host : str() + sequenceTechnology: str() + assemblyMethod: str() + +InstituteInformation: + OriginatingLab: str() + SubmittingLab: str() + +VirusDetail: + VirusName: str() + AccessionId: str() diff --git a/example/metadata.json b/example/metadata.json deleted file mode 100644 index e69de29..0000000 diff --git a/example/metadata.yaml b/example/metadata.yaml new file mode 100644 index 0000000..587d0be --- /dev/null +++ b/example/metadata.yaml @@ -0,0 +1,17 @@ +sampleInformation: + location: "USA" + host : "Homo Sapiens" + sequenceTechnology: "Sanger" + assemblyMethod: "CLC Genomics" + +InstituteInformation: + OriginatingLab: "Erik's kitchen" + SubmittingLab: "National Institute for Viral Disease Control and Prevention, China CDC" + +SubmitterInformation: + Submitter: "National Institute for Viral Disease Control and Prevention, China CDC" + submissionDate: "04-04-2020" + +VirusDetail: + VirusName: "hCoV-19/USA/identifer/2020" + AccessionId: "EPI_ISL_Random" -- cgit v1.2.3