From d1e8809a15ae74d0b847abb80f9f63f53078e1d6 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 19 Aug 2020 15:15:57 -0400 Subject: Scaling pangenome generation Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .../pangenome-generate/pangenome-generate_spoa.cwl | 110 ++++++++++++--------- 1 file changed, 61 insertions(+), 49 deletions(-) (limited to 'workflows/pangenome-generate/pangenome-generate_spoa.cwl') diff --git a/workflows/pangenome-generate/pangenome-generate_spoa.cwl b/workflows/pangenome-generate/pangenome-generate_spoa.cwl index 33bf64e..ed12254 100644 --- a/workflows/pangenome-generate/pangenome-generate_spoa.cwl +++ b/workflows/pangenome-generate/pangenome-generate_spoa.cwl @@ -19,9 +19,9 @@ outputs: odgiGraph: type: File outputSource: buildGraph/odgiGraph - odgiPNG: - type: File - outputSource: vizGraph/graph_image +# odgiPNG: +# type: File +# outputSource: vizGraph/graph_image spoaGFA: type: File outputSource: induceGraph/spoaGFA @@ -34,16 +34,16 @@ outputs: mergedMetadata: type: File outputSource: dups2metadata/merged - indexed_paths: - type: File - outputSource: index_paths/indexed_paths - colinear_components: - type: Directory - outputSource: segment_components/colinear_components +# indexed_paths: +# type: File +# outputSource: index_paths/indexed_paths +# colinear_components: +# type: Directory +# outputSource: segment_components/colinear_components steps: dedup_and_sort_by_quality_and_len: in: {reads: seqs} - out: [reads_dedupped_sorted_by_quality_and_len, dups] + out: [sortedReadsFA, dups] run: sort_fasta_by_quality_and_len.cwl induceGraph: in: @@ -54,19 +54,23 @@ steps: in: {inputGFA: induceGraph/spoaGFA} out: [odgiGraph] run: odgi-build-from-spoa-gfa.cwl - vizGraph: - in: - sparse_graph_index: buildGraph/odgiGraph - width: - default: 50000 - height: - default: 500 - path_per_row: - default: true - path_height: - default: 4 - out: [graph_image] - run: ../tools/odgi/odgi_viz.cwl + # vizGraph: + # in: + # sparse_graph_index: buildGraph/odgiGraph + # width: + # default: 50000 + # height: + # default: 500 + # path_per_row: + # default: true + # path_height: + # default: 4 + # out: [graph_image] + # requirements: + # ResourceRequirement: + # ramMin: $(15 * 1024) + # outdirMin: 10 + # run: ../tools/odgi/odgi_viz.cwl odgi2rdf: in: {odgi: buildGraph/odgiGraph} out: [rdf] @@ -77,29 +81,37 @@ steps: dups: dedup_and_sort_by_quality_and_len/dups out: [merged] run: dups2metadata.cwl - bin_paths: - run: ../tools/odgi/odgi_bin.cwl - in: - sparse_graph_index: buildGraph/odgiGraph - bin_width: bin_widths - scatter: bin_width - out: [ bins, pangenome_sequence ] - index_paths: - label: Create path index - run: ../tools/odgi/odgi_pathindex.cwl - in: - sparse_graph_index: buildGraph/odgiGraph - out: [ indexed_paths ] - segment_components: - label: Run component segmentation - run: ../tools/graph-genome-segmentation/component_segmentation.cwl - in: - bins: bin_paths/bins - cells_per_file: cells_per_file - pangenome_sequence: - source: bin_paths/pangenome_sequence - valueFrom: $(self[0]) - # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index - # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index - # regardless of bin_width, so we take the first pangenome_sequence as input for this step - out: [ colinear_components ] + # bin_paths: + # requirements: + # ResourceRequirement: + # ramMin: 3000 + # outdirMin: 10 + # run: ../tools/odgi/odgi_bin.cwl + # in: + # sparse_graph_index: buildGraph/odgiGraph + # bin_width: bin_widths + # scatter: bin_width + # out: [ bins, pangenome_sequence ] + # index_paths: + # label: Create path index + # requirements: + # ResourceRequirement: + # ramMin: 3000 + # outdirMin: 10 + # run: ../tools/odgi/odgi_pathindex.cwl + # in: + # sparse_graph_index: buildGraph/odgiGraph + # out: [ indexed_paths ] + # segment_components: + # label: Run component segmentation + # run: ../tools/graph-genome-segmentation/component_segmentation.cwl + # in: + # bins: bin_paths/bins + # cells_per_file: cells_per_file + # pangenome_sequence: + # source: bin_paths/pangenome_sequence + # valueFrom: $(self[0]) + # # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index + # # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index + # # regardless of bin_width, so we take the first pangenome_sequence as input for this step + # out: [ colinear_components ] -- cgit v1.2.3