aboutsummaryrefslogtreecommitdiff
path: root/workflows/yamlfa2ttl/check_metadata.py
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/yamlfa2ttl/check_metadata.py')
-rw-r--r--workflows/yamlfa2ttl/check_metadata.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/workflows/yamlfa2ttl/check_metadata.py b/workflows/yamlfa2ttl/check_metadata.py
new file mode 100644
index 0000000..05494ca
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_metadata.py
@@ -0,0 +1,39 @@
+import sys
+
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+from pyshex.evaluate import evaluate
+
+path_yaml = sys.argv[1]
+path_schema_yaml = sys.argv[2]
+path_shex_rdf = sys.argv[3]
+
+with open(path_schema_yaml, "rb") as f:
+ cache = {
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": f.read().decode("utf-8")
+ }
+
+metadata_schema = schema_salad.schema.load_schema(
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml",
+ cache=cache
+)
+
+(document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema
+
+if not isinstance(avsc_names, schema_salad.avro.schema.Names):
+ raise Exception(avsc_names)
+
+with open(path_shex_rdf, "rb") as f:
+ shex = f.read().decode("utf-8")
+
+doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, path_yaml, True)
+g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx)
+rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")
+
+# As part of QC make sure serialization works too, this will raise
+# an exception if there are invalid URIs.
+g.serialize(format="ntriples")
+
+if not rslt:
+ raise Exception(reason)