From 4618a271193fa5cd5cb1e10d6f8acee9c6e8132a Mon Sep 17 00:00:00 2001 From: Michael R. Crusoe Date: Mon, 4 May 2020 18:37:52 +0200 Subject: add pangenome browser prep --- .gitmodules | 3 ++ .../pangenome-generate/pangenome-generate.cwl | 44 ++++++++++++++++++++++ workflows/tools | 1 + 3 files changed, 48 insertions(+) create mode 100644 .gitmodules create mode 160000 workflows/tools diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..c7d7f99 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "workflows/tools"] + path = workflows/tools + url = https://github.com/common-workflow-library/bio-cwl-tools.git diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl index 6794e2d..05e3511 100644 --- a/workflows/pangenome-generate/pangenome-generate.cwl +++ b/workflows/pangenome-generate/pangenome-generate.cwl @@ -1,10 +1,22 @@ +#!/usr/bin/env cwl-runner cwlVersion: v1.1 class: Workflow +requirements: + ScatterFeatureRequirement: {} + StepInputExpressionRequirement: {} inputs: inputReads: File[] metadata: File[] metadataSchema: File subjects: string[] + bin_widths: + type: int[] + default: [ 1, 4, 16, 64, 256, 1000, 4000, 16000] + doc: width of each bin in basepairs along the graph vector + cells_per_file: + type: int + default: 100 + doc: Cells per file on component_segmentation outputs: odgiGraph: type: File @@ -24,6 +36,12 @@ outputs: mergedMetadata: type: File outputSource: mergeMetadata/merged + indexed_paths: + type: File + outputSource: index_paths/indexed_paths + colinear_components: + type: File[] + outputSource: segment_components/colinear_components steps: relabel: in: @@ -66,3 +84,29 @@ steps: originalLabels: relabel/originalLabels out: [merged] run: merge-metadata.cwl + bin_paths: + run: ../tools/odgi/odgi_bin.cwl + in: + sparse_graph_index: buildGraph/odgiGraph + bin_width: bin_widths + scatter: bin_width + out: [ bins, pangenome_sequence ] + index_paths: + label: Create path index + run : ../tools/odgi/odgi_pathindex.cwl + in: + sparse_graph_index: buildGraph/odgiGraph + out: [ indexed_paths ] + segment_components: + label: Run component segmentation + run: ../tools/graph-genome-segmentation/component_segmentation.cwl + in: + bins: bin_paths/bins + cells_per_file: cells_per_file + pangenome_sequence: + source: bin_paths/pangenome_sequence + valueFrom: $(self[0]) + # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index + # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index + # regardless of bin_width, so we take the first pangenome_sequence as input for this step + out: [ colinear_components ] diff --git a/workflows/tools b/workflows/tools new file mode 160000 index 0000000..45c2d6d --- /dev/null +++ b/workflows/tools @@ -0,0 +1 @@ +Subproject commit 45c2d6dab199cb931b28c54863924ff3cf9079c1 -- cgit v1.2.3