From 07bc4c65535437b8e9e0744f08da8cea541d0116 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 7 Apr 2020 15:28:42 -0400 Subject: Add metadata validation with schema-salad --- bh20sequploader/qc_metadata.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'bh20sequploader/qc_metadata.py') diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py index 0632777..78b31b2 100644 --- a/bh20sequploader/qc_metadata.py +++ b/bh20sequploader/qc_metadata.py @@ -1,13 +1,21 @@ -import yamale +import schema_salad.schema +import logging +import pkg_resources -## NOTE: this is just a DUMMY. Everything about this can and will change def qc_metadata(metadatafile): - print("Start metadata validation...") - schema = yamale.make_schema('../example/dummyschema.yaml') - data = yamale.make_data(metadatafile) - # Validate data against the schema. Throws a ValueError if data is invalid. - yamale.validate(schema, data) - print("...complete!") + schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml") + cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")} + (document_loader, + avsc_names, + schema_metadata, + metaschema_loader) = schema_salad.schema.load_schema("https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", cache=cache) -#qc_metadata("../example/metadata.yaml") + if not isinstance(avsc_names, schema_salad.avro.schema.Names): + print(avsc_names) + return False + try: + doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True) + return True + except: + return False -- cgit v1.2.3