diff options
author | Pjotr Prins | 2020-05-12 13:55:55 -0500 |
---|---|---|
committer | Pjotr Prins | 2020-05-12 13:55:55 -0500 |
commit | 8e9042879db7fe061f3d18737f4102b5fd0b8c65 (patch) | |
tree | 978645b86e805c218451beaf194bfd97f2fbd9d8 | |
parent | 40ca03045bbc4f6fb1258acb6f42a60ee5532e0d (diff) | |
parent | 55bab826fda6c6bc3190476f8e2c61e51ef092d6 (diff) | |
download | bh20-seq-resource-8e9042879db7fe061f3d18737f4102b5fd0b8c65.tar.gz bh20-seq-resource-8e9042879db7fe061f3d18737f4102b5fd0b8c65.tar.lz bh20-seq-resource-8e9042879db7fe061f3d18737f4102b5fd0b8c65.zip |
Merge branch 'master' of github.com:arvados/bh20-seq-resource
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-schema.yml | 2 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-shex.rdf | 2 | ||||
-rw-r--r-- | workflows/pangenome-generate/minimap2.cwl | 23 | ||||
-rw-r--r-- | workflows/pangenome-generate/odgi-viz.cwl | 25 | ||||
-rw-r--r-- | workflows/pangenome-generate/odgi_to_rdf.cwl | 9 | ||||
-rw-r--r-- | workflows/pangenome-generate/pangenome-generate.cwl | 83 | ||||
-rw-r--r-- | workflows/pangenome-generate/seqkit-rmdup.cwl | 37 | ||||
m--------- | workflows/tools | 0 |
9 files changed, 78 insertions, 106 deletions
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..c7d7f99 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "workflows/tools"] + path = workflows/tools + url = https://github.com/common-workflow-library/bio-cwl-tools.git diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index 99e1a11..8a16bd3 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -25,7 +25,7 @@ $graph: jsonldPredicate: _id: http://semanticscience.org/resource/SIO_000115 host_sex: - doc: Sex of the host as defined in PATO, expect male () or female () + doc: Sex of the host as defined in PATO, expect Male (http://purl.obolibrary.org/obo/PATO_0000384) or Female (http://purl.obolibrary.org/obo/PATO_0000383) or in Intersex (http://purl.obolibrary.org/obo/PATO_0001340) type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/PATO_0000047 diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf index cdf2296..a017805 100644 --- a/bh20sequploader/bh20seq-shex.rdf +++ b/bh20sequploader/bh20seq-shex.rdf @@ -20,7 +20,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/> :hostShape { efo:EFO_0000532 [ obo:NCBITaxon_~ ] ; sio:SIO_000115 xsd:string ?; - obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?; + obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 obo:PATO_0001340] ?; obo:PATO_0000011 xsd:integer ?; obo:NCIT_C42574 [ obo:UO_~ ] ?; obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ; diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl deleted file mode 100644 index bf8eb4c..0000000 --- a/workflows/pangenome-generate/minimap2.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.1 -class: CommandLineTool -inputs: - readsFA: File -outputs: - readsPAF: stdout -requirements: - InlineJavascriptRequirement: {} -hints: - DockerRequirement: - dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1" - ResourceRequirement: - coresMin: 8 - coresMax: 32 - ramMin: $(15 * 1024) - outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20)) -stdout: $(inputs.readsFA.nameroot).paf -baseCommand: minimap2 -arguments: [-cx, asm20, - -w, "1", - -t, $(runtime.cores), - $(inputs.readsFA), - $(inputs.readsFA)] diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl deleted file mode 100644 index d440fcb..0000000 --- a/workflows/pangenome-generate/odgi-viz.cwl +++ /dev/null @@ -1,25 +0,0 @@ -cwlVersion: v1.1 -class: CommandLineTool -inputs: - inputODGI: File -outputs: - odgiPNG: - type: File - outputBinding: - glob: $(inputs.inputODGI.nameroot).png -requirements: - InlineJavascriptRequirement: {} -hints: - DockerRequirement: - dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0" - ResourceRequirement: - coresMin: 4 - ramMin: $(7 * 1024) - outdirMin: 1 -baseCommand: [odgi, viz] -arguments: [-i, $(inputs.inputODGI), - -o, $(inputs.inputODGI.nameroot).png, - -x, "50000", - -y, "500", - -R, - -P, "4"] diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl index e6a279b..9a2e912 100644 --- a/workflows/pangenome-generate/odgi_to_rdf.cwl +++ b/workflows/pangenome-generate/odgi_to_rdf.cwl @@ -10,10 +10,8 @@ requirements: ResourceRequirement: ramMin: $((2 * 1024) + 1) inputs: - - id: odgi - type: File - - id: output_name - type: string? + odgi: File + output_name: string? stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz') @@ -23,5 +21,4 @@ arguments: xz, --stdout] outputs: - - id: rdf - type: stdout + rdf: stdout diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl index 6794e2d..1d49904 100644 --- a/workflows/pangenome-generate/pangenome-generate.cwl +++ b/workflows/pangenome-generate/pangenome-generate.cwl @@ -1,17 +1,29 @@ +#!/usr/bin/env cwl-runner cwlVersion: v1.1 class: Workflow +requirements: + ScatterFeatureRequirement: {} + StepInputExpressionRequirement: {} inputs: inputReads: File[] metadata: File[] metadataSchema: File subjects: string[] + bin_widths: + type: int[] + default: [ 1, 4, 16, 64, 256, 1000, 4000, 16000] + doc: width of each bin in basepairs along the graph vector + cells_per_file: + type: int + default: 100 + doc: Cells per file on component_segmentation outputs: odgiGraph: type: File outputSource: buildGraph/odgiGraph odgiPNG: type: File - outputSource: vizGraph/odgiPNG + outputSource: vizGraph/graph_image seqwishGFA: type: File outputSource: induceGraph/seqwishGFA @@ -20,10 +32,16 @@ outputs: outputSource: odgi2rdf/rdf readsMergeDedup: type: File - outputSource: dedup/readsMergeDedup + outputSource: dedup/reads_dedup mergedMetadata: type: File outputSource: mergeMetadata/merged + indexed_paths: + type: File + outputSource: index_paths/indexed_paths + colinear_components: + type: Directory + outputSource: segment_components/colinear_components steps: relabel: in: @@ -32,17 +50,21 @@ steps: out: [relabeledSeqs, originalLabels] run: relabel-seqs.cwl dedup: - in: {readsFA: relabel/relabeledSeqs} - out: [readsMergeDedup, dups] - run: seqkit-rmdup.cwl + in: {reads: relabel/relabeledSeqs} + out: [reads_dedup, dups] + run: ../tools/seqkit/seqkit_rmdup.cwl overlapReads: - in: {readsFA: dedup/readsMergeDedup} - out: [readsPAF] - run: minimap2.cwl + in: + target: dedup/reads_dedup + query: dedup/reads_dedup + outputCIGAR: + default: true + out: [alignments] + run: ../tools/minimap2/minimap2_paf.cwl induceGraph: in: - readsFA: dedup/readsMergeDedup - readsPAF: overlapReads/readsPAF + readsFA: dedup/reads_dedup + readsPAF: overlapReads/alignments out: [seqwishGFA] run: seqwish.cwl buildGraph: @@ -50,9 +72,18 @@ steps: out: [odgiGraph] run: odgi-build.cwl vizGraph: - in: {inputODGI: buildGraph/odgiGraph} - out: [odgiPNG] - run: odgi-viz.cwl + in: + sparse_graph_index: buildGraph/odgiGraph + width: + default: 50000 + height: + default: 500 + path_per_row: + default: true + path_height: + default: 4 + out: [graph_image] + run: ../tools/odgi/odgi_viz.cwl odgi2rdf: in: {odgi: buildGraph/odgiGraph} out: [rdf] @@ -66,3 +97,29 @@ steps: originalLabels: relabel/originalLabels out: [merged] run: merge-metadata.cwl + bin_paths: + run: ../tools/odgi/odgi_bin.cwl + in: + sparse_graph_index: buildGraph/odgiGraph + bin_width: bin_widths + scatter: bin_width + out: [ bins, pangenome_sequence ] + index_paths: + label: Create path index + run : ../tools/odgi/odgi_pathindex.cwl + in: + sparse_graph_index: buildGraph/odgiGraph + out: [ indexed_paths ] + segment_components: + label: Run component segmentation + run: ../tools/graph-genome-segmentation/component_segmentation.cwl + in: + bins: bin_paths/bins + cells_per_file: cells_per_file + pangenome_sequence: + source: bin_paths/pangenome_sequence + valueFrom: $(self[0]) + # the bin_paths step is scattered over the bin_width array, but always using the same sparse_graph_index + # the pangenome_sequence that is extracted is exactly the same for the same sparse_graph_index + # regardless of bin_width, so we take the first pangenome_sequence as input for this step + out: [ colinear_components ] diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl deleted file mode 100644 index 071fa66..0000000 --- a/workflows/pangenome-generate/seqkit-rmdup.cwl +++ /dev/null @@ -1,37 +0,0 @@ -cwlVersion: v1.1 -class: CommandLineTool -inputs: - readsFA: File -outputs: - readsMergeDedup: - type: File - outputBinding: - glob: readsMergeDedup.fasta - dups: - type: File? - outputBinding: - glob: dups.txt -requirements: - InlineJavascriptRequirement: {} -hints: - DockerRequirement: - dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0" - ResourceRequirement: - coresMin: 8 - coresMax: 32 - ramMin: $(7 * 1024) - outdirMin: | - ${ - var sum = 0; - for (var i = 0; i < inputs.readsFA.length; i++) { - sum += inputs.readsFA[i].size; - } - return (sum/(1024*1024*1024)+1) + 20; - } -baseCommand: seqkit -arguments: [rmdup, - --by-seq, - --ignore-case, - --dup-num-file, dups.txt, - -o, readsMergeDedup.fasta, - $(inputs.readsFA)] diff --git a/workflows/tools b/workflows/tools new file mode 160000 +Subproject 659e174d0d42ed6b9afd79d9e6f68e225c526d1 |