aboutsummaryrefslogtreecommitdiff
path: root/workflows/yamlfa2ttl
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/yamlfa2ttl')
-rw-r--r--workflows/yamlfa2ttl/check_metadata.cwl24
-rw-r--r--workflows/yamlfa2ttl/check_metadata.py39
-rw-r--r--workflows/yamlfa2ttl/yamlfa2ttl.cwl20
3 files changed, 82 insertions, 1 deletions
diff --git a/workflows/yamlfa2ttl/check_metadata.cwl b/workflows/yamlfa2ttl/check_metadata.cwl
new file mode 100644
index 0000000..72c4d36
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_metadata.cwl
@@ -0,0 +1,24 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.1
+class: CommandLineTool
+baseCommand: python3
+
+inputs:
+ script:
+ type: File
+ inputBinding: {position: 1}
+ default: {class: File, location: check_metadata.py}
+ path_yaml:
+ type: string
+ inputBinding: {position: 2}
+ path_schema_yaml:
+ type: File
+ inputBinding: {position: 3}
+ default: {class: File, location: ../../bh20sequploader/bh20seq-schema.yml}
+ path_shex_rdf:
+ type: File
+ inputBinding: {position: 4}
+ default: {class: File, location: ../../bh20sequploader/bh20seq-shex.rdf}
+
+outputs: []
diff --git a/workflows/yamlfa2ttl/check_metadata.py b/workflows/yamlfa2ttl/check_metadata.py
new file mode 100644
index 0000000..05494ca
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_metadata.py
@@ -0,0 +1,39 @@
+import sys
+
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+from pyshex.evaluate import evaluate
+
+path_yaml = sys.argv[1]
+path_schema_yaml = sys.argv[2]
+path_shex_rdf = sys.argv[3]
+
+with open(path_schema_yaml, "rb") as f:
+ cache = {
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": f.read().decode("utf-8")
+ }
+
+metadata_schema = schema_salad.schema.load_schema(
+ "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml",
+ cache=cache
+)
+
+(document_loader, avsc_names, schema_metadata, metaschema_loader) = metadata_schema
+
+if not isinstance(avsc_names, schema_salad.avro.schema.Names):
+ raise Exception(avsc_names)
+
+with open(path_shex_rdf, "rb") as f:
+ shex = f.read().decode("utf-8")
+
+doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, path_yaml, True)
+g = schema_salad.jsonld_context.makerdf("workflow", doc, document_loader.ctx)
+rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")
+
+# As part of QC make sure serialization works too, this will raise
+# an exception if there are invalid URIs.
+g.serialize(format="ntriples")
+
+if not rslt:
+ raise Exception(reason)
diff --git a/workflows/yamlfa2ttl/yamlfa2ttl.cwl b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
index 143fc9d..2913e99 100644
--- a/workflows/yamlfa2ttl/yamlfa2ttl.cwl
+++ b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
@@ -1,3 +1,10 @@
+~/.config/guix/current/bin/guix environment -C guix --ad-hoc cwltool python python-biopython python-requests python-dateutil python-magic ruby
+cwltool --preserve-environment PYTHONPATH yamlfa2ttl.cwl --path_fasta ~/bh20-seq-resource/example/sequence.fasta
+
+cwltool --no-container --preserve-environment GUIX_ENVIRONMENT --preserve-environment PYTHONPATH yamlfa2ttl.cwl --path_fasta ~/bh20-seq-resource/example/sequence.fasta
+
+
+
#!/usr/bin/env cwl-runner
cwlVersion: v1.1
@@ -7,12 +14,16 @@ doc: "Workflow to go from YAML (metadata) + FASTA (sequence) to TTL (metadata)"
inputs:
path_fasta:
type: string
- doc: input fasta to validate
+ doc: input FASTA to validate
format_to_check:
type: string
default: text/fasta
+ path_yaml:
+ type: string
+ doc: input YAML to validate and convert in TTL
+
steps:
check_format:
in:
@@ -29,4 +40,11 @@ steps:
out: []
run: check_sequence.cwl
+ check_metadata:
+ in:
+ path_yaml: path_yaml
+ doc: the input metadata information to put in the knowledge graph
+ out: []
+ run: check_metadata.cwl
+
outputs: []