aboutsummaryrefslogtreecommitdiff
path: root/workflows/yamlfa2ttl/check_metadata.py
diff options
context:
space:
mode:
authorAndreaGuarracino2021-01-08 00:25:55 +0100
committerAndreaGuarracino2021-01-08 00:25:55 +0100
commit2ddf72a4028475eb65dfeab153f3565e698d5338 (patch)
tree9526d7271ec0b58d1e88b24dc1ed7043d91c5017 /workflows/yamlfa2ttl/check_metadata.py
parent8f385cdd408f8e72e13ec7f87b5413b11d3c2d53 (diff)
downloadbh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.tar.gz
bh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.tar.lz
bh20-seq-resource-2ddf72a4028475eb65dfeab153f3565e698d5338.zip
added check_metadata workflow and script
Diffstat (limited to 'workflows/yamlfa2ttl/check_metadata.py')
-rw-r--r--workflows/yamlfa2ttl/check_metadata.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/workflows/yamlfa2ttl/check_metadata.py b/workflows/yamlfa2ttl/check_metadata.py
new file mode 100644
index 0000000..05494ca
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_metadata.py
@@ -0,0 +1,39 @@
+import sys
+
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+from pyshex.evaluate import evaluate
+
+path_yaml = sys.argv[1]
+path_schema_yaml = sys.argv[2]
+path_shex_rdf = sys.argv[3]
+
+with open(path_schema_yaml, "rb") as f:
+ cache = {
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": f.read().decode("utf-8")
+ }
+
+metadata_schema = schema_salad.schema.load_schema(
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml",
+ cache=cache
+)
+
+(document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema
+
+if not isinstance(avsc_names, schema_salad.avro.schema.Names):
+ raise Exception(avsc_names)
+
+with open(path_shex_rdf, "rb") as f:
+ shex = f.read().decode("utf-8")
+
+doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, path_yaml, True)
+g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx)
+rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")
+
+# As part of QC make sure serialization works too, this will raise
+# an exception if there are invalid URIs.
+g.serialize(format="ntriples")
+
+if not rslt:
+ raise Exception(reason)