about summary refs log tree commit diff
path: root/workflows/pangenome-generate
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 12:55:18 -0400
committerPeter Amstutz2020-04-20 12:55:18 -0400
commitd781e42c9adac07253cb928ae66e9b7314710267 (patch)
tree4718165cbc069169eaff98080c61c5f07763ffc1 /workflows/pangenome-generate
parent1219eaf496c899f3043b90e30eb956f0f363bfb3 (diff)
downloadbh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.gz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.lz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.zip
Move workflows into main repo
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate')
-rw-r--r--workflows/pangenome-generate/merge-metadata.cwl18
-rw-r--r--workflows/pangenome-generate/merge-metadata.py17
-rw-r--r--workflows/pangenome-generate/minimap2.cwl23
-rw-r--r--workflows/pangenome-generate/odgi-build.cwl26
-rw-r--r--workflows/pangenome-generate/odgi-viz.cwl25
-rw-r--r--workflows/pangenome-generate/odgi_to_rdf.cwl25
-rw-r--r--workflows/pangenome-generate/pangenome-generate.cwl57
-rw-r--r--workflows/pangenome-generate/seqkit-rmdup.cwl32
-rw-r--r--workflows/pangenome-generate/seqwish.cwl29
9 files changed, 252 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/merge-metadata.cwl b/workflows/pangenome-generate/merge-metadata.cwl
new file mode 100644
index 0000000..9164c09
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.cwl
@@ -0,0 +1,18 @@
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: commonworkflowlanguage/cwltool_module
+inputs:
+  metadata: File[]
+  metadataSchema: File
+  subjects: string[]
+outputs:
+  merged: stdout
+stdout: mergedmetadata.ttl
+requirements:
+  InitialWorkDirRequirement:
+    listing:
+      - entry: {$include: merge-metadata.py}
+        entryname: merge-metadata.py
+baseCommand: [python3, merge-metadata.py]
diff --git a/workflows/pangenome-generate/merge-metadata.py b/workflows/pangenome-generate/merge-metadata.py
new file mode 100644
index 0000000..64275b1
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.py
@@ -0,0 +1,17 @@
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+metadataSchema = '$(inputs.metadataSchema.path)'
+metadata = $(inputs.metadata)
+subjects = $(inputs.subjects)
+
+(document_loader,
+ avsc_names,
+ schema_metadata,
+ metaschema_loader) = schema_salad.schema.load_schema(metadataSchema)
+
+for i, m in enumerate(metadata):
+    doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, m["path"], True)
+    doc["id"] = subjects[i]
+    g = schema_salad.jsonld_context.makerdf(subjects[i], doc, document_loader.ctx)
+    print(g.serialize(format="ntriples").decode("utf-8"))
diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl
new file mode 100644
index 0000000..bf19ef7
--- /dev/null
+++ b/workflows/pangenome-generate/minimap2.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+  readsFA: File
+outputs:
+  readsPAF: stdout
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1"
+  ResourceRequirement:
+    coresMin: 8
+    coresMax: 32
+    ramMin: $(7 * 1024)
+    outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: minimap2
+arguments: [-cx, asm20,
+            -w, "1",
+            -t, $(runtime.cores),
+            $(inputs.readsFA),
+            $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/odgi-build.cwl b/workflows/pangenome-generate/odgi-build.cwl
new file mode 100644
index 0000000..0bd6a20
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-build.cwl
@@ -0,0 +1,26 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+  inputGFA: File
+outputs:
+  odgiGraph:
+    type: File
+    outputBinding:
+      glob: $(inputs.inputGFA.nameroot).odgi
+requirements:
+  InlineJavascriptRequirement: {}
+  ShellCommandRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+  ResourceRequirement:
+    coresMin: 4
+    ramMin: $(7 * 1024)
+    outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2))
+  InitialWorkDirRequirement:
+    listing:
+      - entry: $(inputs.inputGFA)
+        writable: true
+arguments: [odgi, build, -g, $(inputs.inputGFA), -s, -o, -,
+            {shellQuote: false, valueFrom: "|"},
+            odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).odgi]
diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl
new file mode 100644
index 0000000..d440fcb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-viz.cwl
@@ -0,0 +1,25 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+  inputODGI: File
+outputs:
+  odgiPNG:
+    type: File
+    outputBinding:
+      glob: $(inputs.inputODGI.nameroot).png
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+  ResourceRequirement:
+    coresMin: 4
+    ramMin: $(7 * 1024)
+    outdirMin: 1
+baseCommand: [odgi, viz]
+arguments: [-i, $(inputs.inputODGI),
+            -o, $(inputs.inputODGI.nameroot).png,
+            -x, "50000",
+            -y, "500",
+            -R,
+            -P, "4"]
diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl
new file mode 100644
index 0000000..079d6fb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi_to_rdf.cwl
@@ -0,0 +1,25 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+  DockerRequirement:
+    dockerPull: spodgi/spodgi
+requirements:
+  InlineJavascriptRequirement: {}
+  ShellCommandRequirement: {}
+inputs:
+  - id: odgi
+    type: File
+  - id: output_name
+    type: string?
+
+stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz')
+
+arguments:
+  [odgi_to_rdf.py, $(inputs.odgi), "-",
+   {valueFrom: "|", shellQuote: false},
+   xz, --stdout]
+
+outputs:
+  - id: rdf
+    type: stdout
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
new file mode 100644
index 0000000..2710743
--- /dev/null
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -0,0 +1,57 @@
+cwlVersion: v1.1
+class: Workflow
+inputs:
+  inputReads: File[]
+  metadata: File[]
+  metadataSchema: File
+  subjects: string[]
+outputs:
+  odgiGraph:
+    type: File
+    outputSource: buildGraph/odgiGraph
+  odgiPNG:
+    type: File
+    outputSource: vizGraph/odgiPNG
+  seqwishGFA:
+    type: File
+    outputSource: induceGraph/seqwishGFA
+  odgiRDF:
+    type: File
+    outputSource: odgi2rdf/rdf
+  mergedMetadata:
+    type: File
+    outputSource: mergeMetadata/merged
+steps:
+  dedup:
+    in: {readsFA: inputReads}
+    out: [readsMergeDedup]
+    run: seqkit-rmdup.cwl
+  overlapReads:
+    in: {readsFA: dedup/readsMergeDedup}
+    out: [readsPAF]
+    run: minimap2.cwl
+  induceGraph:
+    in:
+      readsFA: dedup/readsMergeDedup
+      readsPAF: overlapReads/readsPAF
+    out: [seqwishGFA]
+    run: seqwish.cwl
+  buildGraph:
+    in: {inputGFA: induceGraph/seqwishGFA}
+    out: [odgiGraph]
+    run: odgi-build.cwl
+  vizGraph:
+    in: {inputODGI: buildGraph/odgiGraph}
+    out: [odgiPNG]
+    run: odgi-viz.cwl
+  odgi2rdf:
+    in: {odgi: buildGraph/odgiGraph}
+    out: [rdf]
+    run: odgi_to_rdf.cwl
+  mergeMetadata:
+    in:
+      metadata: metadata
+      metadataSchema: metadataSchema
+      subjects: subjects
+    out: [merged]
+    run: merge-metadata.cwl
diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl
new file mode 100644
index 0000000..d3626f5
--- /dev/null
+++ b/workflows/pangenome-generate/seqkit-rmdup.cwl
@@ -0,0 +1,32 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+  readsFA: File[]
+outputs:
+  readsMergeDedup:
+    type: File
+    outputBinding:
+      glob: readsMergeDedup.fasta
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0"
+  ResourceRequirement:
+    coresMin: 8
+    coresMax: 32
+    ramMin: $(7 * 1024)
+    outdirMin: |
+      ${
+        var sum = 0;
+        for (var i = 0; i < inputs.readsFA.length; i++) {
+          sum += inputs.readsFA[i].size;
+        }
+        return (sum/(1024*1024*1024)+1) + 20;
+      }
+baseCommand: seqkit
+arguments: [rmdup,
+            --by-seq,
+            --ignore-case,
+            -o, readsMergeDedup.fasta,
+            $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/seqwish.cwl b/workflows/pangenome-generate/seqwish.cwl
new file mode 100644
index 0000000..9f8cb15
--- /dev/null
+++ b/workflows/pangenome-generate/seqwish.cwl
@@ -0,0 +1,29 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+  readsFA: File
+  readsPAF: File
+  kmerSize:
+    type: int
+    default: 16
+outputs:
+  seqwishGFA:
+    type: File
+    outputBinding:
+      glob: $(inputs.readsPAF.nameroot).gfa
+requirements:
+  InlineJavascriptRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: "quay.io/biocontainers/seqwish:0.4.1--h8b12597_0"
+  ResourceRequirement:
+    coresMin: 4
+    ramMin: $(7 * 1024)
+    outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: seqwish
+arguments: [-t, $(runtime.cores),
+            -k, $(inputs.kmerSize),
+            -s, $(inputs.readsFA),
+            -p, $(inputs.readsPAF),
+            -g, $(inputs.readsPAF.nameroot).gfa]