From 2ddf72a4028475eb65dfeab153f3565e698d5338 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 8 Jan 2021 00:25:55 +0100 Subject: added check_metadata workflow and script --- workflows/yamlfa2ttl/check_metadata.cwl | 24 ++++++++++++++++++++ workflows/yamlfa2ttl/check_metadata.py | 39 +++++++++++++++++++++++++++++++++ workflows/yamlfa2ttl/yamlfa2ttl.cwl | 20 ++++++++++++++++- 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 workflows/yamlfa2ttl/check_metadata.cwl create mode 100644 workflows/yamlfa2ttl/check_metadata.py diff --git a/workflows/yamlfa2ttl/check_metadata.cwl b/workflows/yamlfa2ttl/check_metadata.cwl new file mode 100644 index 0000000..72c4d36 --- /dev/null +++ b/workflows/yamlfa2ttl/check_metadata.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.1 +class: CommandLineTool +baseCommand: python3 + +inputs: + script: + type: File + inputBinding: {position: 1} + default: {class: File, location: check_metadata.py} + path_yaml: + type: string + inputBinding: {position: 2} + path_schema_yaml: + type: File + inputBinding: {position: 3} + default: {class: File, location: ../../bh20sequploader/bh20seq-schema.yml} + path_shex_rdf: + type: File + inputBinding: {position: 4} + default: {class: File, location: ../../bh20sequploader/bh20seq-shex.rdf} + +outputs: [] diff --git a/workflows/yamlfa2ttl/check_metadata.py b/workflows/yamlfa2ttl/check_metadata.py new file mode 100644 index 0000000..05494ca --- /dev/null +++ b/workflows/yamlfa2ttl/check_metadata.py @@ -0,0 +1,39 @@ +import sys + +import schema_salad.schema +import schema_salad.jsonld_context + +from pyshex.evaluate import evaluate + +path_yaml = sys.argv[1] +path_schema_yaml = sys.argv[2] +path_shex_rdf = sys.argv[3] + +with open(path_schema_yaml, "rb") as f: + cache = { + "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": f.read().decode("utf-8") + } + +metadata_schema = schema_salad.schema.load_schema( + "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", + cache=cache +) + +(document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema + +if not isinstance(avsc_names, schema_salad.avro.schema.Names): + raise Exception(avsc_names) + +with open(path_shex_rdf, "rb") as f: + shex = f.read().decode("utf-8") + +doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, path_yaml, True) +g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx) +rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape") + +# As part of QC make sure serialization works too, this will raise +# an exception if there are invalid URIs. +g.serialize(format="ntriples") + +if not rslt: + raise Exception(reason) diff --git a/workflows/yamlfa2ttl/yamlfa2ttl.cwl b/workflows/yamlfa2ttl/yamlfa2ttl.cwl index 143fc9d..2913e99 100644 --- a/workflows/yamlfa2ttl/yamlfa2ttl.cwl +++ b/workflows/yamlfa2ttl/yamlfa2ttl.cwl @@ -1,3 +1,10 @@ +~/.config/guix/current/bin/guix environment -C guix --ad-hoc cwltool python python-biopython python-requests python-dateutil python-magic ruby +cwltool --preserve-environment PYTHONPATH yamlfa2ttl.cwl --path_fasta ~/bh20-seq-resource/example/sequence.fasta + +cwltool --no-container --preserve-environment GUIX_ENVIRONMENT --preserve-environment PYTHONPATH yamlfa2ttl.cwl --path_fasta ~/bh20-seq-resource/example/sequence.fasta + + + #!/usr/bin/env cwl-runner cwlVersion: v1.1 @@ -7,12 +14,16 @@ doc: "Workflow to go from YAML (metadata) + FASTA (sequence) to TTL (metadata)" inputs: path_fasta: type: string - doc: input fasta to validate + doc: input FASTA to validate format_to_check: type: string default: text/fasta + path_yaml: + type: string + doc: input YAML to validate and convert in TTL + steps: check_format: in: @@ -29,4 +40,11 @@ steps: out: [] run: check_sequence.cwl + check_metadata: + in: + path_yaml: path_yaml + doc: the input metadata information to put in the knowledge graph + out: [] + run: check_metadata.cwl + outputs: [] -- cgit v1.2.3