aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 12:55:18 -0400
committerPeter Amstutz2020-04-20 12:55:18 -0400
commitd781e42c9adac07253cb928ae66e9b7314710267 (patch)
tree4718165cbc069169eaff98080c61c5f07763ffc1 /workflows/pangenome-generate
parent1219eaf496c899f3043b90e30eb956f0f363bfb3 (diff)
downloadbh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.gz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.lz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.zip
Move workflows into main repo
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate')
-rw-r--r--workflows/pangenome-generate/merge-metadata.cwl18
-rw-r--r--workflows/pangenome-generate/merge-metadata.py17
-rw-r--r--workflows/pangenome-generate/minimap2.cwl23
-rw-r--r--workflows/pangenome-generate/odgi-build.cwl26
-rw-r--r--workflows/pangenome-generate/odgi-viz.cwl25
-rw-r--r--workflows/pangenome-generate/odgi_to_rdf.cwl25
-rw-r--r--workflows/pangenome-generate/pangenome-generate.cwl57
-rw-r--r--workflows/pangenome-generate/seqkit-rmdup.cwl32
-rw-r--r--workflows/pangenome-generate/seqwish.cwl29
9 files changed, 252 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/merge-metadata.cwl b/workflows/pangenome-generate/merge-metadata.cwl
new file mode 100644
index 0000000..9164c09
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.cwl
@@ -0,0 +1,18 @@
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+ DockerRequirement:
+ dockerPull: commonworkflowlanguage/cwltool_module
+inputs:
+ metadata: File[]
+ metadataSchema: File
+ subjects: string[]
+outputs:
+ merged: stdout
+stdout: mergedmetadata.ttl
+requirements:
+ InitialWorkDirRequirement:
+ listing:
+ - entry: {$include: merge-metadata.py}
+ entryname: merge-metadata.py
+baseCommand: [python3, merge-metadata.py]
diff --git a/workflows/pangenome-generate/merge-metadata.py b/workflows/pangenome-generate/merge-metadata.py
new file mode 100644
index 0000000..64275b1
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.py
@@ -0,0 +1,17 @@
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+metadataSchema = '$(inputs.metadataSchema.path)'
+metadata = $(inputs.metadata)
+subjects = $(inputs.subjects)
+
+(document_loader,
+ avsc_names,
+ schema_metadata,
+ metaschema_loader) = schema_salad.schema.load_schema(metadataSchema)
+
+for i, m in enumerate(metadata):
+ doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, m["path"], True)
+ doc["id"] = subjects[i]
+ g = schema_salad.jsonld_context.makerdf(subjects[i], doc, document_loader.ctx)
+ print(g.serialize(format="ntriples").decode("utf-8"))
diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl
new file mode 100644
index 0000000..bf19ef7
--- /dev/null
+++ b/workflows/pangenome-generate/minimap2.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File
+outputs:
+ readsPAF: stdout
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1"
+ ResourceRequirement:
+ coresMin: 8
+ coresMax: 32
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: minimap2
+arguments: [-cx, asm20,
+ -w, "1",
+ -t, $(runtime.cores),
+ $(inputs.readsFA),
+ $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/odgi-build.cwl b/workflows/pangenome-generate/odgi-build.cwl
new file mode 100644
index 0000000..0bd6a20
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-build.cwl
@@ -0,0 +1,26 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ inputGFA: File
+outputs:
+ odgiGraph:
+ type: File
+ outputBinding:
+ glob: $(inputs.inputGFA.nameroot).odgi
+requirements:
+ InlineJavascriptRequirement: {}
+ ShellCommandRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2))
+ InitialWorkDirRequirement:
+ listing:
+ - entry: $(inputs.inputGFA)
+ writable: true
+arguments: [odgi, build, -g, $(inputs.inputGFA), -s, -o, -,
+ {shellQuote: false, valueFrom: "|"},
+ odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).odgi]
diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl
new file mode 100644
index 0000000..d440fcb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-viz.cwl
@@ -0,0 +1,25 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ inputODGI: File
+outputs:
+ odgiPNG:
+ type: File
+ outputBinding:
+ glob: $(inputs.inputODGI.nameroot).png
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: 1
+baseCommand: [odgi, viz]
+arguments: [-i, $(inputs.inputODGI),
+ -o, $(inputs.inputODGI.nameroot).png,
+ -x, "50000",
+ -y, "500",
+ -R,
+ -P, "4"]
diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl
new file mode 100644
index 0000000..079d6fb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi_to_rdf.cwl
@@ -0,0 +1,25 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: spodgi/spodgi
+requirements:
+ InlineJavascriptRequirement: {}
+ ShellCommandRequirement: {}
+inputs:
+ - id: odgi
+ type: File
+ - id: output_name
+ type: string?
+
+stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz')
+
+arguments:
+ [odgi_to_rdf.py, $(inputs.odgi), "-",
+ {valueFrom: "|", shellQuote: false},
+ xz, --stdout]
+
+outputs:
+ - id: rdf
+ type: stdout
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
new file mode 100644
index 0000000..2710743
--- /dev/null
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -0,0 +1,57 @@
+cwlVersion: v1.1
+class: Workflow
+inputs:
+ inputReads: File[]
+ metadata: File[]
+ metadataSchema: File
+ subjects: string[]
+outputs:
+ odgiGraph:
+ type: File
+ outputSource: buildGraph/odgiGraph
+ odgiPNG:
+ type: File
+ outputSource: vizGraph/odgiPNG
+ seqwishGFA:
+ type: File
+ outputSource: induceGraph/seqwishGFA
+ odgiRDF:
+ type: File
+ outputSource: odgi2rdf/rdf
+ mergedMetadata:
+ type: File
+ outputSource: mergeMetadata/merged
+steps:
+ dedup:
+ in: {readsFA: inputReads}
+ out: [readsMergeDedup]
+ run: seqkit-rmdup.cwl
+ overlapReads:
+ in: {readsFA: dedup/readsMergeDedup}
+ out: [readsPAF]
+ run: minimap2.cwl
+ induceGraph:
+ in:
+ readsFA: dedup/readsMergeDedup
+ readsPAF: overlapReads/readsPAF
+ out: [seqwishGFA]
+ run: seqwish.cwl
+ buildGraph:
+ in: {inputGFA: induceGraph/seqwishGFA}
+ out: [odgiGraph]
+ run: odgi-build.cwl
+ vizGraph:
+ in: {inputODGI: buildGraph/odgiGraph}
+ out: [odgiPNG]
+ run: odgi-viz.cwl
+ odgi2rdf:
+ in: {odgi: buildGraph/odgiGraph}
+ out: [rdf]
+ run: odgi_to_rdf.cwl
+ mergeMetadata:
+ in:
+ metadata: metadata
+ metadataSchema: metadataSchema
+ subjects: subjects
+ out: [merged]
+ run: merge-metadata.cwl
diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl
new file mode 100644
index 0000000..d3626f5
--- /dev/null
+++ b/workflows/pangenome-generate/seqkit-rmdup.cwl
@@ -0,0 +1,32 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File[]
+outputs:
+ readsMergeDedup:
+ type: File
+ outputBinding:
+ glob: readsMergeDedup.fasta
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0"
+ ResourceRequirement:
+ coresMin: 8
+ coresMax: 32
+ ramMin: $(7 * 1024)
+ outdirMin: |
+ ${
+ var sum = 0;
+ for (var i = 0; i < inputs.readsFA.length; i++) {
+ sum += inputs.readsFA[i].size;
+ }
+ return (sum/(1024*1024*1024)+1) + 20;
+ }
+baseCommand: seqkit
+arguments: [rmdup,
+ --by-seq,
+ --ignore-case,
+ -o, readsMergeDedup.fasta,
+ $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/seqwish.cwl b/workflows/pangenome-generate/seqwish.cwl
new file mode 100644
index 0000000..9f8cb15
--- /dev/null
+++ b/workflows/pangenome-generate/seqwish.cwl
@@ -0,0 +1,29 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File
+ readsPAF: File
+ kmerSize:
+ type: int
+ default: 16
+outputs:
+ seqwishGFA:
+ type: File
+ outputBinding:
+ glob: $(inputs.readsPAF.nameroot).gfa
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/seqwish:0.4.1--h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: seqwish
+arguments: [-t, $(runtime.cores),
+ -k, $(inputs.kmerSize),
+ -s, $(inputs.readsFA),
+ -p, $(inputs.readsPAF),
+ -g, $(inputs.readsPAF.nameroot).gfa]