From d781e42c9adac07253cb928ae66e9b7314710267 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 20 Apr 2020 12:55:18 -0400 Subject: Move workflows into main repo Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- workflows/pangenome-generate/merge-metadata.cwl | 18 +++++++ workflows/pangenome-generate/merge-metadata.py | 17 +++++++ workflows/pangenome-generate/minimap2.cwl | 23 +++++++++ workflows/pangenome-generate/odgi-build.cwl | 26 ++++++++++ workflows/pangenome-generate/odgi-viz.cwl | 25 ++++++++++ workflows/pangenome-generate/odgi_to_rdf.cwl | 25 ++++++++++ .../pangenome-generate/pangenome-generate.cwl | 57 ++++++++++++++++++++++ workflows/pangenome-generate/seqkit-rmdup.cwl | 32 ++++++++++++ workflows/pangenome-generate/seqwish.cwl | 29 +++++++++++ 9 files changed, 252 insertions(+) create mode 100644 workflows/pangenome-generate/merge-metadata.cwl create mode 100644 workflows/pangenome-generate/merge-metadata.py create mode 100644 workflows/pangenome-generate/minimap2.cwl create mode 100644 workflows/pangenome-generate/odgi-build.cwl create mode 100644 workflows/pangenome-generate/odgi-viz.cwl create mode 100644 workflows/pangenome-generate/odgi_to_rdf.cwl create mode 100644 workflows/pangenome-generate/pangenome-generate.cwl create mode 100644 workflows/pangenome-generate/seqkit-rmdup.cwl create mode 100644 workflows/pangenome-generate/seqwish.cwl (limited to 'workflows/pangenome-generate') diff --git a/workflows/pangenome-generate/merge-metadata.cwl b/workflows/pangenome-generate/merge-metadata.cwl new file mode 100644 index 0000000..9164c09 --- /dev/null +++ b/workflows/pangenome-generate/merge-metadata.cwl @@ -0,0 +1,18 @@ +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: commonworkflowlanguage/cwltool_module +inputs: + metadata: File[] + metadataSchema: File + subjects: string[] +outputs: + merged: stdout +stdout: mergedmetadata.ttl +requirements: + InitialWorkDirRequirement: + listing: + - entry: {$include: merge-metadata.py} + entryname: merge-metadata.py +baseCommand: [python3, merge-metadata.py] diff --git a/workflows/pangenome-generate/merge-metadata.py b/workflows/pangenome-generate/merge-metadata.py new file mode 100644 index 0000000..64275b1 --- /dev/null +++ b/workflows/pangenome-generate/merge-metadata.py @@ -0,0 +1,17 @@ +import schema_salad.schema +import schema_salad.jsonld_context + +metadataSchema = '$(inputs.metadataSchema.path)' +metadata = $(inputs.metadata) +subjects = $(inputs.subjects) + +(document_loader, + avsc_names, + schema_metadata, + metaschema_loader) = schema_salad.schema.load_schema(metadataSchema) + +for i, m in enumerate(metadata): + doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, m["path"], True) + doc["id"] = subjects[i] + g = schema_salad.jsonld_context.makerdf(subjects[i], doc, document_loader.ctx) + print(g.serialize(format="ntriples").decode("utf-8")) diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl new file mode 100644 index 0000000..bf19ef7 --- /dev/null +++ b/workflows/pangenome-generate/minimap2.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File +outputs: + readsPAF: stdout +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1" + ResourceRequirement: + coresMin: 8 + coresMax: 32 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20)) +stdout: $(inputs.readsFA.nameroot).paf +baseCommand: minimap2 +arguments: [-cx, asm20, + -w, "1", + -t, $(runtime.cores), + $(inputs.readsFA), + $(inputs.readsFA)] diff --git a/workflows/pangenome-generate/odgi-build.cwl b/workflows/pangenome-generate/odgi-build.cwl new file mode 100644 index 0000000..0bd6a20 --- /dev/null +++ b/workflows/pangenome-generate/odgi-build.cwl @@ -0,0 +1,26 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + inputGFA: File +outputs: + odgiGraph: + type: File + outputBinding: + glob: $(inputs.inputGFA.nameroot).odgi +requirements: + InlineJavascriptRequirement: {} + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2)) + InitialWorkDirRequirement: + listing: + - entry: $(inputs.inputGFA) + writable: true +arguments: [odgi, build, -g, $(inputs.inputGFA), -s, -o, -, + {shellQuote: false, valueFrom: "|"}, + odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).odgi] diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl new file mode 100644 index 0000000..d440fcb --- /dev/null +++ b/workflows/pangenome-generate/odgi-viz.cwl @@ -0,0 +1,25 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + inputODGI: File +outputs: + odgiPNG: + type: File + outputBinding: + glob: $(inputs.inputODGI.nameroot).png +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: 1 +baseCommand: [odgi, viz] +arguments: [-i, $(inputs.inputODGI), + -o, $(inputs.inputODGI.nameroot).png, + -x, "50000", + -y, "500", + -R, + -P, "4"] diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl new file mode 100644 index 0000000..079d6fb --- /dev/null +++ b/workflows/pangenome-generate/odgi_to_rdf.cwl @@ -0,0 +1,25 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: spodgi/spodgi +requirements: + InlineJavascriptRequirement: {} + ShellCommandRequirement: {} +inputs: + - id: odgi + type: File + - id: output_name + type: string? + +stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz') + +arguments: + [odgi_to_rdf.py, $(inputs.odgi), "-", + {valueFrom: "|", shellQuote: false}, + xz, --stdout] + +outputs: + - id: rdf + type: stdout diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl new file mode 100644 index 0000000..2710743 --- /dev/null +++ b/workflows/pangenome-generate/pangenome-generate.cwl @@ -0,0 +1,57 @@ +cwlVersion: v1.1 +class: Workflow +inputs: + inputReads: File[] + metadata: File[] + metadataSchema: File + subjects: string[] +outputs: + odgiGraph: + type: File + outputSource: buildGraph/odgiGraph + odgiPNG: + type: File + outputSource: vizGraph/odgiPNG + seqwishGFA: + type: File + outputSource: induceGraph/seqwishGFA + odgiRDF: + type: File + outputSource: odgi2rdf/rdf + mergedMetadata: + type: File + outputSource: mergeMetadata/merged +steps: + dedup: + in: {readsFA: inputReads} + out: [readsMergeDedup] + run: seqkit-rmdup.cwl + overlapReads: + in: {readsFA: dedup/readsMergeDedup} + out: [readsPAF] + run: minimap2.cwl + induceGraph: + in: + readsFA: dedup/readsMergeDedup + readsPAF: overlapReads/readsPAF + out: [seqwishGFA] + run: seqwish.cwl + buildGraph: + in: {inputGFA: induceGraph/seqwishGFA} + out: [odgiGraph] + run: odgi-build.cwl + vizGraph: + in: {inputODGI: buildGraph/odgiGraph} + out: [odgiPNG] + run: odgi-viz.cwl + odgi2rdf: + in: {odgi: buildGraph/odgiGraph} + out: [rdf] + run: odgi_to_rdf.cwl + mergeMetadata: + in: + metadata: metadata + metadataSchema: metadataSchema + subjects: subjects + out: [merged] + run: merge-metadata.cwl diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl new file mode 100644 index 0000000..d3626f5 --- /dev/null +++ b/workflows/pangenome-generate/seqkit-rmdup.cwl @@ -0,0 +1,32 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File[] +outputs: + readsMergeDedup: + type: File + outputBinding: + glob: readsMergeDedup.fasta +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0" + ResourceRequirement: + coresMin: 8 + coresMax: 32 + ramMin: $(7 * 1024) + outdirMin: | + ${ + var sum = 0; + for (var i = 0; i < inputs.readsFA.length; i++) { + sum += inputs.readsFA[i].size; + } + return (sum/(1024*1024*1024)+1) + 20; + } +baseCommand: seqkit +arguments: [rmdup, + --by-seq, + --ignore-case, + -o, readsMergeDedup.fasta, + $(inputs.readsFA)] diff --git a/workflows/pangenome-generate/seqwish.cwl b/workflows/pangenome-generate/seqwish.cwl new file mode 100644 index 0000000..9f8cb15 --- /dev/null +++ b/workflows/pangenome-generate/seqwish.cwl @@ -0,0 +1,29 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File + readsPAF: File + kmerSize: + type: int + default: 16 +outputs: + seqwishGFA: + type: File + outputBinding: + glob: $(inputs.readsPAF.nameroot).gfa +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/seqwish:0.4.1--h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20)) +stdout: $(inputs.readsFA.nameroot).paf +baseCommand: seqwish +arguments: [-t, $(runtime.cores), + -k, $(inputs.kmerSize), + -s, $(inputs.readsFA), + -p, $(inputs.readsPAF), + -g, $(inputs.readsPAF.nameroot).gfa] -- cgit v1.2.3