diff options
author | AndreaGuarracino | 2021-01-08 00:25:55 +0100 |
---|---|---|
committer | AndreaGuarracino | 2021-01-08 00:25:55 +0100 |
commit | 2ddf72a4028475eb65dfeab153f3565e698d5338 (patch) | |
tree | 9526d7271ec0b58d1e88b24dc1ed7043d91c5017 /workflows/yamlfa2ttl/check_metadata.py | |
parent | 8f385cdd408f8e72e13ec7f87b5413b11d3c2d53 (diff) | |
download | bh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.tar.gz bh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.tar.lz bh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.zip |
added check_metadata workflow and script
Diffstat (limited to 'workflows/yamlfa2ttl/check_metadata.py')
-rw-r--r-- | workflows/yamlfa2ttl/check_metadata.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/workflows/yamlfa2ttl/check_metadata.py b/workflows/yamlfa2ttl/check_metadata.py new file mode 100644 index 0000000..05494ca --- /dev/null +++ b/workflows/yamlfa2ttl/check_metadata.py @@ -0,0 +1,39 @@ +import sys + +import schema_salad.schema +import schema_salad.jsonld_context + +from pyshex.evaluate import evaluate + +path_yaml = sys.argv[1] +path_schema_yaml = sys.argv[2] +path_shex_rdf = sys.argv[3] + +with open(path_schema_yaml, "rb") as f: + cache = { + "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": f.read().decode("utf-8") + } + +metadata_schema = schema_salad.schema.load_schema( + "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", + cache=cache +) + +(document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema + +if not isinstance(avsc_names, schema_salad.avro.schema.Names): + raise Exception(avsc_names) + +with open(path_shex_rdf, "rb") as f: + shex = f.read().decode("utf-8") + +doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, path_yaml, True) +g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) +rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape") + +# As part of QC make sure serialization works too, this will raise +# an exception if there are invalid URIs. +g.serialize(format="ntriples") + +if not rslt: + raise Exception(reason) |