From d781e42c9adac07253cb928ae66e9b7314710267 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 20 Apr 2020 12:55:18 -0400 Subject: Move workflows into main repo Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- workflows/fastq2fasta/bam2fasta.cwl | 65 ++++++++++++++++++++++ workflows/fastq2fasta/bcftools-concat.cwl | 24 ++++++++ workflows/fastq2fasta/bcftools-consensus.cwl | 24 ++++++++ workflows/fastq2fasta/bcftools-index.cwl | 23 ++++++++ workflows/fastq2fasta/bcftools-norm.cwl | 32 +++++++++++ .../fastq2fasta/bcftools-view-exclude-ref.cwl | 23 ++++++++ workflows/fastq2fasta/bcftools-view-qc.cwl | 24 ++++++++ workflows/fastq2fasta/bcftools-view.cwl | 19 +++++++ workflows/fastq2fasta/bwa-index.cwl | 32 +++++++++++ workflows/fastq2fasta/bwa-mem.cwl | 59 ++++++++++++++++++++ .../fastq2fasta/fastq2fasta-create-bwaindex.cwl | 41 ++++++++++++++ workflows/fastq2fasta/fastq2fasta.cwl | 61 ++++++++++++++++++++ workflows/fastq2fasta/freebayes.cwl | 25 +++++++++ workflows/fastq2fasta/samtools-faidx.cwl | 39 +++++++++++++ workflows/fastq2fasta/samtools-sort.cwl | 41 ++++++++++++++ workflows/fastq2fasta/samtools-view.cwl | 63 +++++++++++++++++++++ workflows/pangenome-generate/merge-metadata.cwl | 18 ++++++ workflows/pangenome-generate/merge-metadata.py | 17 ++++++ workflows/pangenome-generate/minimap2.cwl | 23 ++++++++ workflows/pangenome-generate/odgi-build.cwl | 26 +++++++++ workflows/pangenome-generate/odgi-viz.cwl | 25 +++++++++ workflows/pangenome-generate/odgi_to_rdf.cwl | 25 +++++++++ .../pangenome-generate/pangenome-generate.cwl | 57 +++++++++++++++++++ workflows/pangenome-generate/seqkit-rmdup.cwl | 32 +++++++++++ workflows/pangenome-generate/seqwish.cwl | 29 ++++++++++ workflows/update-workflows.sh | 3 + 26 files changed, 850 insertions(+) create mode 100644 workflows/fastq2fasta/bam2fasta.cwl create mode 100644 workflows/fastq2fasta/bcftools-concat.cwl create mode 100644 workflows/fastq2fasta/bcftools-consensus.cwl create mode 100644 workflows/fastq2fasta/bcftools-index.cwl create mode 100644 workflows/fastq2fasta/bcftools-norm.cwl create mode 100644 workflows/fastq2fasta/bcftools-view-exclude-ref.cwl create mode 100644 workflows/fastq2fasta/bcftools-view-qc.cwl create mode 100644 workflows/fastq2fasta/bcftools-view.cwl create mode 100644 workflows/fastq2fasta/bwa-index.cwl create mode 100644 workflows/fastq2fasta/bwa-mem.cwl create mode 100644 workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl create mode 100644 workflows/fastq2fasta/fastq2fasta.cwl create mode 100644 workflows/fastq2fasta/freebayes.cwl create mode 100644 workflows/fastq2fasta/samtools-faidx.cwl create mode 100644 workflows/fastq2fasta/samtools-sort.cwl create mode 100644 workflows/fastq2fasta/samtools-view.cwl create mode 100644 workflows/pangenome-generate/merge-metadata.cwl create mode 100644 workflows/pangenome-generate/merge-metadata.py create mode 100644 workflows/pangenome-generate/minimap2.cwl create mode 100644 workflows/pangenome-generate/odgi-build.cwl create mode 100644 workflows/pangenome-generate/odgi-viz.cwl create mode 100644 workflows/pangenome-generate/odgi_to_rdf.cwl create mode 100644 workflows/pangenome-generate/pangenome-generate.cwl create mode 100644 workflows/pangenome-generate/seqkit-rmdup.cwl create mode 100644 workflows/pangenome-generate/seqwish.cwl create mode 100755 workflows/update-workflows.sh diff --git a/workflows/fastq2fasta/bam2fasta.cwl b/workflows/fastq2fasta/bam2fasta.cwl new file mode 100644 index 0000000..efe580f --- /dev/null +++ b/workflows/fastq2fasta/bam2fasta.cwl @@ -0,0 +1,65 @@ +# Reference: +# https://github.com/VGP/vgp-assembly/blob/33cd6236a68a1aee5f282e365dfe6b97e0b4ebb7/pipeline/freebayes-polish/freebayes.sh +# https://github.com/VGP/vgp-assembly/blob/33cd6236a68a1aee5f282e365dfe6b97e0b4ebb7/pipeline/freebayes-polish/consensus.sh +class: Workflow +cwlVersion: v1.1 +id: bam2fasta +label: bam2fasta +requirements: [] + +inputs: + bam: + type: File + fasta: + type: File + threads: + type: int + default: 4 + +outputs: + out_fasta: + type: File + outputSource: bcftools_consensus/out_fasta + +steps: + freebayes: + in: + bam: bam + ref_fasta: fasta + out: [vcf] + run: freebayes.cwl + bcftools_view_exclude_ref: + in: + vcf: freebayes/vcf + threads: threads + out: [bcf] + run: bcftools-view-exclude-ref.cwl + bcftools_norm: + in: + ref_fasta: fasta + bcf: bcftools_view_exclude_ref/bcf + threads: threads + out: [normalized_bcf] + run: bcftools-norm.cwl + bcftools_index_after_normalization: + in: + bcf: bcftools_norm/normalized_bcf + out: [indexed] + run: bcftools-index.cwl + bcftools_view_qc: + in: + bcf: bcftools_index_after_normalization/indexed + threads: threads + out: [vcf] + run: bcftools-view-qc.cwl + bcftools_index_after_qc: + in: + bcf: bcftools_view_qc/vcf + out: [indexed] + run: bcftools-index.cwl + bcftools_consensus: + in: + ref_fasta: fasta + vcf: bcftools_index_after_qc/indexed + out: [out_fasta] + run: bcftools-consensus.cwl diff --git a/workflows/fastq2fasta/bcftools-concat.cwl b/workflows/fastq2fasta/bcftools-concat.cwl new file mode 100644 index 0000000..fc33a30 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-concat.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - concat + - -Ou + - -o + - $(inputs.output_name) + - $(inputs.bcf_files) +inputs: + - id: output_name + type: string + default: "merged.bcf" + - id: bcf_files + type: File[] +outputs: + - id: merged_bcf + type: File + outputBinding: + glob: "$(inputs.output_name)" diff --git a/workflows/fastq2fasta/bcftools-consensus.cwl b/workflows/fastq2fasta/bcftools-consensus.cwl new file mode 100644 index 0000000..c111792 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-consensus.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - consensus + - -i'QUAL > 1 && GT="A"' + - -Hla + - -f + - $(inputs.ref_fasta) + - $(inputs.vcf) +inputs: + - id: ref_fasta + type: File + - id: vcf + type: File + secondaryFiles: [.csi] +outputs: + - id: out_fasta + type: stdout +stdout: sequence.fasta diff --git a/workflows/fastq2fasta/bcftools-index.cwl b/workflows/fastq2fasta/bcftools-index.cwl new file mode 100644 index 0000000..396b680 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-index.cwl @@ -0,0 +1,23 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" + InitialWorkDirRequirement: + listing: + - $(inputs.bcf) +baseCommand: bcftools +arguments: + - index + - $(inputs.bcf) +inputs: + - id: bcf + type: File +outputs: + - id: indexed + type: File + outputBinding: + glob: "$(inputs.bcf.basename)" + secondaryFiles: + - .csi diff --git a/workflows/fastq2fasta/bcftools-norm.cwl b/workflows/fastq2fasta/bcftools-norm.cwl new file mode 100644 index 0000000..a765e65 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-norm.cwl @@ -0,0 +1,32 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - norm + - -Ob + - -f + - $(inputs.ref_fasta) + - -o + - $(inputs.output_name) + - --threads + - $(inputs.threads) + - $(inputs.bcf) +inputs: + - id: ref_fasta + type: File + - id: output_name + type: string + default: "normalized.bcf" + - id: threads + type: int + - id: bcf + type: File +outputs: + - id: normalized_bcf + type: File + outputBinding: + glob: "$(inputs.output_name)" diff --git a/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl b/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl new file mode 100644 index 0000000..849bd88 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl @@ -0,0 +1,23 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - view + - --no-version + - -Ou + - -e'type=ref' + - --threads=$(inputs.threads) + - $(inputs.vcf) +inputs: + - id: vcf + type: File + - id: threads + type: int +outputs: + - id: bcf + type: stdout +stdout: $(inputs.vcf.nameroot).without-ref.bcf diff --git a/workflows/fastq2fasta/bcftools-view-qc.cwl b/workflows/fastq2fasta/bcftools-view-qc.cwl new file mode 100644 index 0000000..477c596 --- /dev/null +++ b/workflows/fastq2fasta/bcftools-view-qc.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - view + - -i + - 'QUAL>1 && (GT="AA" || GT="Aa")' + - -Oz + - --threads=$(inputs.threads) + - $(inputs.bcf) +inputs: + - id: threads + type: int + - id: bcf + type: File + secondaryFiles: [.csi] +outputs: + - id: vcf + type: stdout +stdout: out.changes.vcf.gz diff --git a/workflows/fastq2fasta/bcftools-view.cwl b/workflows/fastq2fasta/bcftools-view.cwl new file mode 100644 index 0000000..2d1a51f --- /dev/null +++ b/workflows/fastq2fasta/bcftools-view.cwl @@ -0,0 +1,19 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" +baseCommand: bcftools +arguments: + - view + - --no-version + - -Ou + - $(inputs.vcf) +inputs: + - id: vcf + type: File +outputs: + - id: bcf + type: stdout +stdout: $(inputs.vcf.nameroot).bcf diff --git a/workflows/fastq2fasta/bwa-index.cwl b/workflows/fastq2fasta/bwa-index.cwl new file mode 100644 index 0000000..775ba8d --- /dev/null +++ b/workflows/fastq2fasta/bwa-index.cwl @@ -0,0 +1,32 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.1 +class: CommandLineTool +doc: string +requirements: + DockerRequirement: + dockerPull: quay.io/biocontainers/bwa:0.7.17--h84994c4_5 + InitialWorkDirRequirement: + listing: + - $(inputs.input_fasta) +baseCommand: [bwa, index] +inputs: + input_fasta: + type: File + label: "input fasta file" + inputBinding: + position: 1 +outputs: + indexed_fasta: + type: File + outputBinding: + glob: $(inputs.input_fasta.basename) + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + stdout: stdout + stderr: stderr +stdout: bwa-index-stdout.log +stderr: bwa-index-stderr.log diff --git a/workflows/fastq2fasta/bwa-mem.cwl b/workflows/fastq2fasta/bwa-mem.cwl new file mode 100644 index 0000000..195411c --- /dev/null +++ b/workflows/fastq2fasta/bwa-mem.cwl @@ -0,0 +1,59 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.1 +class: CommandLineTool +doc: string +requirements: + DockerRequirement: + dockerPull: quay.io/biocontainers/bwa:0.7.17--h84994c4_5 + +baseCommand: [bwa, mem] + +inputs: + threads: + type: int + label: "number of threads" + default: 4 + inputBinding: + prefix: -t + output_sam: + type: string + label: "sam file to output results to" + default: "out.sam" + inputBinding: + prefix: -o + group_header_line: + type: string? + label: "read group header line such as '@RG\tID:foo\tSM:bar'" + inputBinding: + prefix: -R + index_base: + type: File + label: "fasta file for index basename" + inputBinding: + position: 1 + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + fastq_forward: + type: File + label: "input fastq file to map (single-end or forward for pair-end)" + inputBinding: + position: 2 + fastq_reverse: + type: File? + label: "input fastq file to map (reverse for pair-end)" + inputBinding: + position: 3 + +outputs: + output: + type: File + outputBinding: + glob: "$(inputs.output_sam)" + stdout: stdout + stderr: stderr +stdout: bwa-mem-stdout.log +stderr: bwa-mem-stderr.log diff --git a/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl b/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl new file mode 100644 index 0000000..dab7ff2 --- /dev/null +++ b/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl @@ -0,0 +1,41 @@ +cwlVersion: v1.1 +class: Workflow +requirements: + SubworkflowFeatureRequirement: {} +hints: + ResourceRequirement: + ramMin: 3000 + +inputs: + ref_fasta: + type: File + fastq_forward: + type: File + fastq_reverse: + type: File? + threads: + type: int + default: 4 + +outputs: + out_fasta: + type: File + outputSource: fastq2fasta/out_fasta + +steps: + bwa-index: + in: {input_fasta: ref_fasta} + out: [indexed_fasta] + run: bwa-index.cwl + samtools-faidx: + in: {input_fasta: bwa-index/indexed_fasta} + out: [indexed_fasta] + run: samtools-faidx.cwl + fastq2fasta: + in: + fastq_forward: fastq_forward + fastq_reverse: fastq_reverse + ref_fasta: samtools-faidx/indexed_fasta + threads: threads + out: [out_fasta] + run: fastq2fasta.cwl diff --git a/workflows/fastq2fasta/fastq2fasta.cwl b/workflows/fastq2fasta/fastq2fasta.cwl new file mode 100644 index 0000000..0cf5c48 --- /dev/null +++ b/workflows/fastq2fasta/fastq2fasta.cwl @@ -0,0 +1,61 @@ +cwlVersion: v1.1 +class: Workflow +requirements: + SubworkflowFeatureRequirement: {} +hints: + ResourceRequirement: + ramMin: 3000 + +inputs: + fastq_forward: File + fastq_reverse: File? + ref_fasta: + type: File + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + threads: + type: int + default: 4 + metadata: File? + +outputs: + out_fasta: + type: File + outputSource: bam2fasta/out_fasta + out_metadata: + type: File? + outputSource: metadata + +steps: + bwa-mem: + in: + threads: threads + fastq_forward: fastq_forward + fastq_reverse: fastq_reverse + index_base: ref_fasta + out: [output] + run: bwa-mem.cwl + samtools-view: + in: + threads: threads + input_file: bwa-mem/output + out: [bam] + run: samtools-view.cwl + samtools-sort: + in: + input_bamfile: samtools-view/bam + threads: threads + out: [sorted_bam] + run: samtools-sort.cwl + bam2fasta: + in: + bam: samtools-sort/sorted_bam + fasta: ref_fasta + threads: threads + out: [out_fasta] + run: bam2fasta.cwl diff --git a/workflows/fastq2fasta/freebayes.cwl b/workflows/fastq2fasta/freebayes.cwl new file mode 100644 index 0000000..1bf9b2f --- /dev/null +++ b/workflows/fastq2fasta/freebayes.cwl @@ -0,0 +1,25 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/freebayes:1.3.2--py37hc088bd4_0" +baseCommand: freebayes +arguments: [ + --bam, $(inputs.bam), + # --region=$(inputs.contig):1-$(inputs.contig_end) + --ploidy, "1", + -f, $(inputs.ref_fasta)] +inputs: + - id: bam + type: File + # - id: contig + # type: string + # - id: contig_end + # type: int + - id: ref_fasta + type: File +outputs: + - id: vcf + type: stdout +stdout: var.vcf diff --git a/workflows/fastq2fasta/samtools-faidx.cwl b/workflows/fastq2fasta/samtools-faidx.cwl new file mode 100644 index 0000000..5196607 --- /dev/null +++ b/workflows/fastq2fasta/samtools-faidx.cwl @@ -0,0 +1,39 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +doc: "samtools sort, sort given bam file" +requirements: + DockerRequirement: + dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11 + InitialWorkDirRequirement: + listing: + - $(inputs.input_fasta) +baseCommand: [samtools, faidx] +inputs: + input_fasta: + type: File + label: "Input fasta" + inputBinding: + position: 1 + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa +outputs: + indexed_fasta: + type: File + outputBinding: + glob: "$(inputs.input_fasta.basename)" + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + stdout: stdout + stderr: stderr +stdout: samtools-sort-stdout.log +stderr: samtools-sort-stderr.log diff --git a/workflows/fastq2fasta/samtools-sort.cwl b/workflows/fastq2fasta/samtools-sort.cwl new file mode 100644 index 0000000..4c6340c --- /dev/null +++ b/workflows/fastq2fasta/samtools-sort.cwl @@ -0,0 +1,41 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +doc: "samtools sort, sort given bam file" +requirements: + DockerRequirement: + dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11 +baseCommand: [samtools, sort] +inputs: + threads: + type: int + default: 4 + inputBinding: + prefix: -@ + tmpfile: + type: string + default: sort.tmp + label: "Write temporary files to PREFIX.nnnn.bam" + inputBinding: + prefix: -T + output_bam: + type: string + default: aln.sorted.bam + label: "Write final output to FILENAME" + inputBinding: + prefix: -o + input_bamfile: + type: File + label: "Input bamfile" + inputBinding: + position: 1 + +outputs: + sorted_bam: + type: File + outputBinding: + glob: "$(inputs.output_bam)" + stdout: stdout + stderr: stderr +stdout: samtools-sort-stdout.log +stderr: samtools-sort-stderr.log diff --git a/workflows/fastq2fasta/samtools-view.cwl b/workflows/fastq2fasta/samtools-view.cwl new file mode 100644 index 0000000..9f11cc6 --- /dev/null +++ b/workflows/fastq2fasta/samtools-view.cwl @@ -0,0 +1,63 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +doc: "samtools view to convert sam format to bam format" +requirements: + DockerRequirement: + dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11 +baseCommand: [samtools, view] +inputs: + threads: + type: int + label: "Number of additional threads to use" + default: 4 + inputBinding: + prefix: -@ + output_bam: + type: boolean + label: "output BAM" + default: true + inputBinding: + prefix: -b + output_filename: + type: string + label: "output file name" + default: "aln.bam" + inputBinding: + prefix: -o + input_file: + type: File + label: "input file" + inputBinding: + position: 1 + include_header: + type: boolean + label: "include the header in the output" + default: false + inputBinding: + prefix: -h + ignore_previous_version: + type: boolean + label: "ignored for compatibility with previous samtools versions" + default: false + inputBinding: + prefix: -S + filter_alignments: + type: string? + label: "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0]." + inputBinding: + prefix: -F + skip_alignments: + type: int? + label: "Skip alignments with MAPQ smaller than INT [0]." + inputBinding: + prefix: -q +outputs: + bam: + type: File + outputBinding: + glob: "$(inputs.output_filename)" + stdout: stdout + stderr: stderr +stdout: samtools-view-stdout.log +stderr: samtools-view-stderr.log diff --git a/workflows/pangenome-generate/merge-metadata.cwl b/workflows/pangenome-generate/merge-metadata.cwl new file mode 100644 index 0000000..9164c09 --- /dev/null +++ b/workflows/pangenome-generate/merge-metadata.cwl @@ -0,0 +1,18 @@ +cwlVersion: v1.1 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: commonworkflowlanguage/cwltool_module +inputs: + metadata: File[] + metadataSchema: File + subjects: string[] +outputs: + merged: stdout +stdout: mergedmetadata.ttl +requirements: + InitialWorkDirRequirement: + listing: + - entry: {$include: merge-metadata.py} + entryname: merge-metadata.py +baseCommand: [python3, merge-metadata.py] diff --git a/workflows/pangenome-generate/merge-metadata.py b/workflows/pangenome-generate/merge-metadata.py new file mode 100644 index 0000000..64275b1 --- /dev/null +++ b/workflows/pangenome-generate/merge-metadata.py @@ -0,0 +1,17 @@ +import schema_salad.schema +import schema_salad.jsonld_context + +metadataSchema = '$(inputs.metadataSchema.path)' +metadata = $(inputs.metadata) +subjects = $(inputs.subjects) + +(document_loader, + avsc_names, + schema_metadata, + metaschema_loader) = schema_salad.schema.load_schema(metadataSchema) + +for i, m in enumerate(metadata): + doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, m["path"], True) + doc["id"] = subjects[i] + g = schema_salad.jsonld_context.makerdf(subjects[i], doc, document_loader.ctx) + print(g.serialize(format="ntriples").decode("utf-8")) diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl new file mode 100644 index 0000000..bf19ef7 --- /dev/null +++ b/workflows/pangenome-generate/minimap2.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File +outputs: + readsPAF: stdout +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1" + ResourceRequirement: + coresMin: 8 + coresMax: 32 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20)) +stdout: $(inputs.readsFA.nameroot).paf +baseCommand: minimap2 +arguments: [-cx, asm20, + -w, "1", + -t, $(runtime.cores), + $(inputs.readsFA), + $(inputs.readsFA)] diff --git a/workflows/pangenome-generate/odgi-build.cwl b/workflows/pangenome-generate/odgi-build.cwl new file mode 100644 index 0000000..0bd6a20 --- /dev/null +++ b/workflows/pangenome-generate/odgi-build.cwl @@ -0,0 +1,26 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + inputGFA: File +outputs: + odgiGraph: + type: File + outputBinding: + glob: $(inputs.inputGFA.nameroot).odgi +requirements: + InlineJavascriptRequirement: {} + ShellCommandRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2)) + InitialWorkDirRequirement: + listing: + - entry: $(inputs.inputGFA) + writable: true +arguments: [odgi, build, -g, $(inputs.inputGFA), -s, -o, -, + {shellQuote: false, valueFrom: "|"}, + odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).odgi] diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl new file mode 100644 index 0000000..d440fcb --- /dev/null +++ b/workflows/pangenome-generate/odgi-viz.cwl @@ -0,0 +1,25 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + inputODGI: File +outputs: + odgiPNG: + type: File + outputBinding: + glob: $(inputs.inputODGI.nameroot).png +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: 1 +baseCommand: [odgi, viz] +arguments: [-i, $(inputs.inputODGI), + -o, $(inputs.inputODGI.nameroot).png, + -x, "50000", + -y, "500", + -R, + -P, "4"] diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl new file mode 100644 index 0000000..079d6fb --- /dev/null +++ b/workflows/pangenome-generate/odgi_to_rdf.cwl @@ -0,0 +1,25 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.1 +hints: + DockerRequirement: + dockerPull: spodgi/spodgi +requirements: + InlineJavascriptRequirement: {} + ShellCommandRequirement: {} +inputs: + - id: odgi + type: File + - id: output_name + type: string? + +stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz') + +arguments: + [odgi_to_rdf.py, $(inputs.odgi), "-", + {valueFrom: "|", shellQuote: false}, + xz, --stdout] + +outputs: + - id: rdf + type: stdout diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl new file mode 100644 index 0000000..2710743 --- /dev/null +++ b/workflows/pangenome-generate/pangenome-generate.cwl @@ -0,0 +1,57 @@ +cwlVersion: v1.1 +class: Workflow +inputs: + inputReads: File[] + metadata: File[] + metadataSchema: File + subjects: string[] +outputs: + odgiGraph: + type: File + outputSource: buildGraph/odgiGraph + odgiPNG: + type: File + outputSource: vizGraph/odgiPNG + seqwishGFA: + type: File + outputSource: induceGraph/seqwishGFA + odgiRDF: + type: File + outputSource: odgi2rdf/rdf + mergedMetadata: + type: File + outputSource: mergeMetadata/merged +steps: + dedup: + in: {readsFA: inputReads} + out: [readsMergeDedup] + run: seqkit-rmdup.cwl + overlapReads: + in: {readsFA: dedup/readsMergeDedup} + out: [readsPAF] + run: minimap2.cwl + induceGraph: + in: + readsFA: dedup/readsMergeDedup + readsPAF: overlapReads/readsPAF + out: [seqwishGFA] + run: seqwish.cwl + buildGraph: + in: {inputGFA: induceGraph/seqwishGFA} + out: [odgiGraph] + run: odgi-build.cwl + vizGraph: + in: {inputODGI: buildGraph/odgiGraph} + out: [odgiPNG] + run: odgi-viz.cwl + odgi2rdf: + in: {odgi: buildGraph/odgiGraph} + out: [rdf] + run: odgi_to_rdf.cwl + mergeMetadata: + in: + metadata: metadata + metadataSchema: metadataSchema + subjects: subjects + out: [merged] + run: merge-metadata.cwl diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl new file mode 100644 index 0000000..d3626f5 --- /dev/null +++ b/workflows/pangenome-generate/seqkit-rmdup.cwl @@ -0,0 +1,32 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File[] +outputs: + readsMergeDedup: + type: File + outputBinding: + glob: readsMergeDedup.fasta +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0" + ResourceRequirement: + coresMin: 8 + coresMax: 32 + ramMin: $(7 * 1024) + outdirMin: | + ${ + var sum = 0; + for (var i = 0; i < inputs.readsFA.length; i++) { + sum += inputs.readsFA[i].size; + } + return (sum/(1024*1024*1024)+1) + 20; + } +baseCommand: seqkit +arguments: [rmdup, + --by-seq, + --ignore-case, + -o, readsMergeDedup.fasta, + $(inputs.readsFA)] diff --git a/workflows/pangenome-generate/seqwish.cwl b/workflows/pangenome-generate/seqwish.cwl new file mode 100644 index 0000000..9f8cb15 --- /dev/null +++ b/workflows/pangenome-generate/seqwish.cwl @@ -0,0 +1,29 @@ +cwlVersion: v1.1 +class: CommandLineTool +inputs: + readsFA: File + readsPAF: File + kmerSize: + type: int + default: 16 +outputs: + seqwishGFA: + type: File + outputBinding: + glob: $(inputs.readsPAF.nameroot).gfa +requirements: + InlineJavascriptRequirement: {} +hints: + DockerRequirement: + dockerPull: "quay.io/biocontainers/seqwish:0.4.1--h8b12597_0" + ResourceRequirement: + coresMin: 4 + ramMin: $(7 * 1024) + outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20)) +stdout: $(inputs.readsFA.nameroot).paf +baseCommand: seqwish +arguments: [-t, $(runtime.cores), + -k, $(inputs.kmerSize), + -s, $(inputs.readsFA), + -p, $(inputs.readsPAF), + -g, $(inputs.readsPAF.nameroot).gfa] diff --git a/workflows/update-workflows.sh b/workflows/update-workflows.sh new file mode 100755 index 0000000..ea9e199 --- /dev/null +++ b/workflows/update-workflows.sh @@ -0,0 +1,3 @@ +#!/bin/sh +arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-2zp9q4jo5xpif9y fastq2fasta/fastq2fasta.cwl +arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/pangenome-generate.cwl -- cgit v1.2.3