about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--workflows/pangenome-generate/arv-main.cwl20
-rw-r--r--workflows/pangenome-generate/dups2metadata.py7
-rw-r--r--workflows/pangenome-generate/odgi-build-from-spoa-gfa.cwl16
-rw-r--r--workflows/pangenome-generate/pangenome-generate_spoa.cwl110
-rw-r--r--workflows/pangenome-generate/sort_fasta_by_quality_and_len.cwl3
-rw-r--r--workflows/pangenome-generate/spoa.cwl3
m---------workflows/tools0
7 files changed, 86 insertions, 73 deletions
diff --git a/workflows/pangenome-generate/arv-main.cwl b/workflows/pangenome-generate/arv-main.cwl
index 176cfe7..8d7f83b 100644
--- a/workflows/pangenome-generate/arv-main.cwl
+++ b/workflows/pangenome-generate/arv-main.cwl
@@ -10,9 +10,9 @@ outputs:
   odgiGraph:
     type: File
     outputSource: pangenome-generate/odgiGraph
-  odgiPNG:
-    type: File
-    outputSource: pangenome-generate/odgiPNG
+#  odgiPNG:
+#    type: File
+#    outputSource: pangenome-generate/odgiPNG
   spoaGFA:
     type: File
     outputSource: pangenome-generate/spoaGFA
@@ -25,12 +25,12 @@ outputs:
   mergedMetadata:
     type: File
     outputSource: pangenome-generate/mergedMetadata
-  indexed_paths:
-    type: File
-    outputSource: pangenome-generate/indexed_paths
-  colinear_components:
-    type: Directory
-    outputSource: pangenome-generate/colinear_components
+#  indexed_paths:
+#    type: File
+#    outputSource: pangenome-generate/indexed_paths
+#  colinear_components:
+#    type: Directory
+#    outputSource: pangenome-generate/colinear_components
 steps:
   collect-seqs:
     run: collect-seqs.cwl
@@ -45,4 +45,4 @@ steps:
       seqs: collect-seqs/relabeledSeqs
       metadata: collect-seqs/mergedMetadata
       exclude: exclude
-    out: [odgiGraph, odgiPNG, spoaGFA, odgiRDF, readsMergeDedup, mergedMetadata, indexed_paths, colinear_components]
+    out: [odgiGraph, spoaGFA, odgiRDF, readsMergeDedup, mergedMetadata]
diff --git a/workflows/pangenome-generate/dups2metadata.py b/workflows/pangenome-generate/dups2metadata.py
index 9bda10a..89e7236 100644
--- a/workflows/pangenome-generate/dups2metadata.py
+++ b/workflows/pangenome-generate/dups2metadata.py
@@ -1,17 +1,16 @@
 import sys
+import re
 
 md = open(sys.argv[1], "rt")
 for d in md:
-    print(d)
+    sys.stdout.write(d)
 
 if len(sys.argv) < 3:
     exit(0)
 
 sameseqs = open(sys.argv[2], "rt")
 for d in sameseqs:
-    logging.warn(d)
     g = re.match(r"\d+\t(.*)", d)
-    logging.warn("%s", g.group(1))
     sp = g.group(1).split(",")
     for n in sp[1:]:
-        print("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> ." % (n.strip(), sp[0].strip()))
+        sys.stdout.write("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> .\n" % (n.strip(), sp[0].strip()))
diff --git a/workflows/pangenome-generate/odgi-build-from-spoa-gfa.cwl b/workflows/pangenome-generate/odgi-build-from-spoa-gfa.cwl
index 2459ce7..1eadc88 100644
--- a/workflows/pangenome-generate/odgi-build-from-spoa-gfa.cwl
+++ b/workflows/pangenome-generate/odgi-build-from-spoa-gfa.cwl
@@ -9,21 +9,21 @@ outputs:
       glob: $(inputs.inputGFA.nameroot).unchop.sorted.odgi
 requirements:
   InlineJavascriptRequirement: {}
-  ShellCommandRequirement: {}
 hints:
   DockerRequirement:
-    dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+    dockerPull: "odgi-bash-binutils:latest"
   ResourceRequirement:
     coresMin: 4
     ramMin: $(7 * 1024)
     outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2))
   InitialWorkDirRequirement:
+    # Will fail if input file is not writable (odgi bug)
     listing:
       - entry: $(inputs.inputGFA)
         writable: true
-arguments: [odgi, build, -g, $(inputs.inputGFA), -o, -,
-            {shellQuote: false, valueFrom: "|"},
-            odgi, unchop, -i, -, -o, -,
-            {shellQuote: false, valueFrom: "|"},
-            odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).unchop.sorted.odgi
-           ]
+arguments:
+  - "sh"
+  - "-c"
+  - >-
+    odgi build -g '$(inputs.inputGFA.path)' -o - | odgi unchop -i - -o - |
+    odgi sort -i - -p s -o $(inputs.inputGFA.nameroot).unchop.sorted.odgi
diff --git a/workflows/pangenome-generate/pangenome-generate_spoa.cwl b/workflows/pangenome-generate/pangenome-generate_spoa.cwl
index 33bf64e..ed12254 100644
--- a/workflows/pangenome-generate/pangenome-generate_spoa.cwl
+++ b/workflows/pangenome-generate/pangenome-generate_spoa.cwl
@@ -19,9 +19,9 @@ outputs:
   odgiGraph:
     type: File
     outputSource: buildGraph/odgiGraph
-  odgiPNG:
-    type: File
-    outputSource: vizGraph/graph_image
+#  odgiPNG:
+#    type: File
+#    outputSource: vizGraph/graph_image
   spoaGFA:
     type: File
     outputSource: induceGraph/spoaGFA
@@ -34,16 +34,16 @@ outputs:
   mergedMetadata:
     type: File
     outputSource: dups2metadata/merged
-  indexed_paths:
-    type: File
-    outputSource: index_paths/indexed_paths
-  colinear_components:
-    type: Directory
-    outputSource: segment_components/colinear_components
+#  indexed_paths:
+#    type: File
+#    outputSource: index_paths/indexed_paths
+#  colinear_components:
+#    type: Directory
+#    outputSource: segment_components/colinear_components
 steps:
   dedup_and_sort_by_quality_and_len:
     in: {reads: seqs}
-    out: [reads_dedupped_sorted_by_quality_and_len, dups]
+    out: [sortedReadsFA, dups]
     run: sort_fasta_by_quality_and_len.cwl
   induceGraph:
     in:
@@ -54,19 +54,23 @@ steps:
     in: {inputGFA: induceGraph/spoaGFA}
     out: [odgiGraph]
     run: odgi-build-from-spoa-gfa.cwl
-  vizGraph:
-    in:
-      sparse_graph_index: buildGraph/odgiGraph
-      width:
-        default: 50000
-      height:
-        default: 500
-      path_per_row:
-        default: true
-      path_height:
-        default: 4
-    out: [graph_image]
-    run: ../tools/odgi/odgi_viz.cwl
+  # vizGraph:
+  #   in:
+  #     sparse_graph_index: buildGraph/odgiGraph
+  #     width:
+  #       default: 50000
+  #     height:
+  #       default: 500
+  #     path_per_row:
+  #       default: true
+  #     path_height:
+  #       default: 4
+  #   out: [graph_image]
+  #   requirements:
+  #     ResourceRequirement:
+  #       ramMin: $(15 * 1024)
+  #       outdirMin: 10
+  #   run: ../tools/odgi/odgi_viz.cwl
   odgi2rdf:
     in: {odgi: buildGraph/odgiGraph}
     out: [rdf]
@@ -77,29 +81,37 @@ steps:
       dups: dedup_and_sort_by_quality_and_len/dups
     out: [merged]
     run: dups2metadata.cwl
-  bin_paths:
-    run: ../tools/odgi/odgi_bin.cwl
-    in:
-      sparse_graph_index: buildGraph/odgiGraph
-      bin_width: bin_widths
-    scatter: bin_width
-    out: [ bins, pangenome_sequence ]
-  index_paths:
-    label: Create path index
-    run: ../tools/odgi/odgi_pathindex.cwl
-    in:
-      sparse_graph_index: buildGraph/odgiGraph
-    out: [ indexed_paths ]
-  segment_components:
-    label: Run component segmentation
-    run: ../tools/graph-genome-segmentation/component_segmentation.cwl
-    in:
-      bins: bin_paths/bins
-      cells_per_file: cells_per_file
-      pangenome_sequence:
-        source: bin_paths/pangenome_sequence
-        valueFrom: $(self[0])
-        # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index
-        # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index
-        # regardless of bin_width, so we take the first pangenome_sequence as input for this step
-    out: [ colinear_components ]
+  # bin_paths:
+  #   requirements:
+  #     ResourceRequirement:
+  #       ramMin: 3000
+  #       outdirMin: 10
+  #   run: ../tools/odgi/odgi_bin.cwl
+  #   in:
+  #     sparse_graph_index: buildGraph/odgiGraph
+  #     bin_width: bin_widths
+  #   scatter: bin_width
+  #   out: [ bins, pangenome_sequence ]
+  # index_paths:
+  #   label: Create path index
+  #   requirements:
+  #     ResourceRequirement:
+  #       ramMin: 3000
+  #       outdirMin: 10
+  #   run: ../tools/odgi/odgi_pathindex.cwl
+  #   in:
+  #     sparse_graph_index: buildGraph/odgiGraph
+  #   out: [ indexed_paths ]
+  # segment_components:
+  #   label: Run component segmentation
+  #   run: ../tools/graph-genome-segmentation/component_segmentation.cwl
+  #   in:
+  #     bins: bin_paths/bins
+  #     cells_per_file: cells_per_file
+  #     pangenome_sequence:
+  #       source: bin_paths/pangenome_sequence
+  #       valueFrom: $(self[0])
+  #       # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index
+  #       # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index
+  #       # regardless of bin_width, so we take the first pangenome_sequence as input for this step
+  #   out: [ colinear_components ]
diff --git a/workflows/pangenome-generate/sort_fasta_by_quality_and_len.cwl b/workflows/pangenome-generate/sort_fasta_by_quality_and_len.cwl
index f8da5d3..9d9b31d 100644
--- a/workflows/pangenome-generate/sort_fasta_by_quality_and_len.cwl
+++ b/workflows/pangenome-generate/sort_fasta_by_quality_and_len.cwl
@@ -16,6 +16,9 @@ stdout: $(inputs.readsFA.nameroot).sorted_by_quality_and_len.fasta
 outputs:
   sortedReadsFA:
     type: stdout
+  dups:
+    type: File
+    outputBinding: {glob: dups.txt}
 requirements:
   InlineJavascriptRequirement: {}
   ShellCommandRequirement: {}
diff --git a/workflows/pangenome-generate/spoa.cwl b/workflows/pangenome-generate/spoa.cwl
index 132633c..150227d 100644
--- a/workflows/pangenome-generate/spoa.cwl
+++ b/workflows/pangenome-generate/spoa.cwl
@@ -10,10 +10,9 @@ outputs:
     type: stdout
 requirements:
   InlineJavascriptRequirement: {}
-  ShellCommandRequirement: {}
 hints:
   DockerRequirement:
-    dockerPull: "quay.io/biocontainers/spoa:3.0.2--hc9558a2_0"
+    dockerPull: "quay.io/biocontainers/spoa:3.4.0--hc9558a2_0"
   ResourceRequirement:
     coresMin: 1
     ramMin: $(15 * 1024)
diff --git a/workflows/tools b/workflows/tools
-Subproject 61ffac1862822f08dc20b6f8e2f22634b986b0b
+Subproject c67c011765bea798a24485cbe0a1c6c59243652