From 4618a271193fa5cd5cb1e10d6f8acee9c6e8132a Mon Sep 17 00:00:00 2001
From: Michael R. Crusoe
Date: Mon, 4 May 2020 18:37:52 +0200
Subject: add pangenome browser prep

---
 .../pangenome-generate/pangenome-generate.cwl      | 44 ++++++++++++++++++++++
 workflows/tools                                    |  1 +
 2 files changed, 45 insertions(+)
 create mode 160000 workflows/tools

(limited to 'workflows')

diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
index 6794e2d..05e3511 100644
--- a/workflows/pangenome-generate/pangenome-generate.cwl
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -1,10 +1,22 @@
+#!/usr/bin/env cwl-runner
 cwlVersion: v1.1
 class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+  StepInputExpressionRequirement: {}
 inputs:
   inputReads: File[]
   metadata: File[]
   metadataSchema: File
   subjects: string[]
+  bin_widths:
+    type: int[]
+    default: [ 1, 4, 16, 64, 256, 1000, 4000, 16000]
+    doc: width of each bin in basepairs along the graph vector
+  cells_per_file:
+    type: int
+    default: 100
+    doc: Cells per file on component_segmentation
 outputs:
   odgiGraph:
     type: File
@@ -24,6 +36,12 @@ outputs:
   mergedMetadata:
     type: File
     outputSource: mergeMetadata/merged
+  indexed_paths:
+    type: File
+    outputSource: index_paths/indexed_paths
+  colinear_components:
+    type: File[]
+    outputSource: segment_components/colinear_components
 steps:
   relabel:
     in:
@@ -66,3 +84,29 @@ steps:
       originalLabels: relabel/originalLabels
     out: [merged]
     run: merge-metadata.cwl
+  bin_paths:
+    run: ../tools/odgi/odgi_bin.cwl
+    in:
+      sparse_graph_index: buildGraph/odgiGraph
+      bin_width: bin_widths
+    scatter: bin_width
+    out: [ bins, pangenome_sequence ]
+  index_paths:
+    label: Create path index
+    run : ../tools/odgi/odgi_pathindex.cwl
+    in:
+      sparse_graph_index: buildGraph/odgiGraph
+    out: [ indexed_paths ]
+  segment_components:
+    label: Run component segmentation
+    run: ../tools/graph-genome-segmentation/component_segmentation.cwl
+    in:
+      bins: bin_paths/bins
+      cells_per_file: cells_per_file
+      pangenome_sequence:
+        source: bin_paths/pangenome_sequence
+        valueFrom: $(self[0])
+        # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index
+        # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index
+        # regardless of bin_width, so we take the first pangenome_sequence as input for this step
+    out: [ colinear_components ]
diff --git a/workflows/tools b/workflows/tools
new file mode 160000
index 0000000..45c2d6d
--- /dev/null
+++ b/workflows/tools
@@ -0,0 +1 @@
+Subproject commit 45c2d6dab199cb931b28c54863924ff3cf9079c1
-- 
cgit v1.2.3


From 6599feb6a6f8b488bd2d12fcf532c3b8a917a17f Mon Sep 17 00:00:00 2001
From: Michael R. Crusoe
Date: Tue, 5 May 2020 13:11:02 +0200
Subject: move some tools into the shared repo

---
 workflows/pangenome-generate/minimap2.cwl          | 23 -------------
 workflows/pangenome-generate/odgi-viz.cwl          | 25 --------------
 workflows/pangenome-generate/odgi_to_rdf.cwl       |  9 ++---
 .../pangenome-generate/pangenome-generate.cwl      | 39 ++++++++++++++--------
 workflows/pangenome-generate/seqkit-rmdup.cwl      | 37 --------------------
 workflows/tools                                    |  2 +-
 6 files changed, 30 insertions(+), 105 deletions(-)
 delete mode 100644 workflows/pangenome-generate/minimap2.cwl
 delete mode 100644 workflows/pangenome-generate/odgi-viz.cwl
 delete mode 100644 workflows/pangenome-generate/seqkit-rmdup.cwl

(limited to 'workflows')

diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl
deleted file mode 100644
index bf8eb4c..0000000
--- a/workflows/pangenome-generate/minimap2.cwl
+++ /dev/null
@@ -1,23 +0,0 @@
-cwlVersion: v1.1
-class: CommandLineTool
-inputs:
-  readsFA: File
-outputs:
-  readsPAF: stdout
-requirements:
-  InlineJavascriptRequirement: {}
-hints:
-  DockerRequirement:
-    dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1"
-  ResourceRequirement:
-    coresMin: 8
-    coresMax: 32
-    ramMin: $(15 * 1024)
-    outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
-stdout: $(inputs.readsFA.nameroot).paf
-baseCommand: minimap2
-arguments: [-cx, asm20,
-            -w, "1",
-            -t, $(runtime.cores),
-            $(inputs.readsFA),
-            $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl
deleted file mode 100644
index d440fcb..0000000
--- a/workflows/pangenome-generate/odgi-viz.cwl
+++ /dev/null
@@ -1,25 +0,0 @@
-cwlVersion: v1.1
-class: CommandLineTool
-inputs:
-  inputODGI: File
-outputs:
-  odgiPNG:
-    type: File
-    outputBinding:
-      glob: $(inputs.inputODGI.nameroot).png
-requirements:
-  InlineJavascriptRequirement: {}
-hints:
-  DockerRequirement:
-    dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
-  ResourceRequirement:
-    coresMin: 4
-    ramMin: $(7 * 1024)
-    outdirMin: 1
-baseCommand: [odgi, viz]
-arguments: [-i, $(inputs.inputODGI),
-            -o, $(inputs.inputODGI.nameroot).png,
-            -x, "50000",
-            -y, "500",
-            -R,
-            -P, "4"]
diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl
index 31ef0c5..96579a3 100644
--- a/workflows/pangenome-generate/odgi_to_rdf.cwl
+++ b/workflows/pangenome-generate/odgi_to_rdf.cwl
@@ -10,10 +10,8 @@ requirements:
   ResourceRequirement:
     ramMin: $((2 * 1024) + 1)
 inputs:
-  - id: odgi
-    type: File
-  - id: output_name
-    type: string?
+  odgi: File
+  output_name: string?
 
 stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz')
 
@@ -23,5 +21,4 @@ arguments:
    xz, --stdout]
 
 outputs:
-  - id: rdf
-    type: stdout
+  rdf: stdout
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
index 05e3511..51ea6b4 100644
--- a/workflows/pangenome-generate/pangenome-generate.cwl
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -23,7 +23,7 @@ outputs:
     outputSource: buildGraph/odgiGraph
   odgiPNG:
     type: File
-    outputSource: vizGraph/odgiPNG
+    outputSource: vizGraph/graph_image
   seqwishGFA:
     type: File
     outputSource: induceGraph/seqwishGFA
@@ -32,7 +32,7 @@ outputs:
     outputSource: odgi2rdf/rdf
   readsMergeDedup:
     type: File
-    outputSource: dedup/readsMergeDedup
+    outputSource: dedup/reads_dedup
   mergedMetadata:
     type: File
     outputSource: mergeMetadata/merged
@@ -50,17 +50,21 @@ steps:
     out: [relabeledSeqs, originalLabels]
     run: relabel-seqs.cwl
   dedup:
-    in: {readsFA: relabel/relabeledSeqs}
-    out: [readsMergeDedup, dups]
-    run: seqkit-rmdup.cwl
+    in: {reads: relabel/relabeledSeqs}
+    out: [reads_dedup, dups]
+    run: ../tools/seqkit/seqkit_rmdup.cwl
   overlapReads:
-    in: {readsFA: dedup/readsMergeDedup}
-    out: [readsPAF]
-    run: minimap2.cwl
+    in:
+      target: dedup/reads_dedup
+      query: dedup/reads_dedup
+      outputCIGAR:
+        default: true
+    out: [alignments]
+    run: ../tools/minimap2/minimap2_paf.cwl
   induceGraph:
     in:
-      readsFA: dedup/readsMergeDedup
-      readsPAF: overlapReads/readsPAF
+      readsFA: dedup/reads_dedup
+      readsPAF: overlapReads/alignments
     out: [seqwishGFA]
     run: seqwish.cwl
   buildGraph:
@@ -68,9 +72,18 @@ steps:
     out: [odgiGraph]
     run: odgi-build.cwl
   vizGraph:
-    in: {inputODGI: buildGraph/odgiGraph}
-    out: [odgiPNG]
-    run: odgi-viz.cwl
+    in:
+      sparse_graph_index: buildGraph/odgiGraph
+      width:
+        default: 50000
+      height:
+        default: 500
+      path_per_row:
+        default: true
+      path_height:
+        default: 4
+    out: [graph_image]
+    run: ../tools/odgi/odgi_viz.cwl
   odgi2rdf:
     in: {odgi: buildGraph/odgiGraph}
     out: [rdf]
diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl
deleted file mode 100644
index 071fa66..0000000
--- a/workflows/pangenome-generate/seqkit-rmdup.cwl
+++ /dev/null
@@ -1,37 +0,0 @@
-cwlVersion: v1.1
-class: CommandLineTool
-inputs:
-  readsFA: File
-outputs:
-  readsMergeDedup:
-    type: File
-    outputBinding:
-      glob: readsMergeDedup.fasta
-  dups:
-    type: File?
-    outputBinding:
-      glob: dups.txt
-requirements:
-  InlineJavascriptRequirement: {}
-hints:
-  DockerRequirement:
-    dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0"
-  ResourceRequirement:
-    coresMin: 8
-    coresMax: 32
-    ramMin: $(7 * 1024)
-    outdirMin: |
-      ${
-        var sum = 0;
-        for (var i = 0; i < inputs.readsFA.length; i++) {
-          sum += inputs.readsFA[i].size;
-        }
-        return (sum/(1024*1024*1024)+1) + 20;
-      }
-baseCommand: seqkit
-arguments: [rmdup,
-            --by-seq,
-            --ignore-case,
-            --dup-num-file, dups.txt,
-            -o, readsMergeDedup.fasta,
-            $(inputs.readsFA)]
diff --git a/workflows/tools b/workflows/tools
index 45c2d6d..2d8523e 160000
--- a/workflows/tools
+++ b/workflows/tools
@@ -1 +1 @@
-Subproject commit 45c2d6dab199cb931b28c54863924ff3cf9079c1
+Subproject commit 2d8523e9a86a9504e77d21f8a878d4f8ef2dac42
-- 
cgit v1.2.3


From ca7df27a6349e7b7009e1d1d5fec372106a072b1 Mon Sep 17 00:00:00 2001
From: Michael R. Crusoe
Date: Tue, 5 May 2020 18:24:56 +0200
Subject: preserve the directory layout of segmentation.py

---
 workflows/pangenome-generate/pangenome-generate.cwl | 2 +-
 workflows/tools                                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'workflows')

diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
index 51ea6b4..1d49904 100644
--- a/workflows/pangenome-generate/pangenome-generate.cwl
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -40,7 +40,7 @@ outputs:
     type: File
     outputSource: index_paths/indexed_paths
   colinear_components:
-    type: File[]
+    type: Directory
     outputSource: segment_components/colinear_components
 steps:
   relabel:
diff --git a/workflows/tools b/workflows/tools
index 2d8523e..659e174 160000
--- a/workflows/tools
+++ b/workflows/tools
@@ -1 +1 @@
-Subproject commit 2d8523e9a86a9504e77d21f8a878d4f8ef2dac42
+Subproject commit 659e174d0d42ed6b9afd79d9e6f68e225c526d1e
-- 
cgit v1.2.3