aboutsummaryrefslogtreecommitdiff
path: root/workflows
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 12:55:18 -0400
committerPeter Amstutz2020-04-20 12:55:18 -0400
commitd781e42c9adac07253cb928ae66e9b7314710267 (patch)
tree4718165cbc069169eaff98080c61c5f07763ffc1 /workflows
parent1219eaf496c899f3043b90e30eb956f0f363bfb3 (diff)
downloadbh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.gz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.tar.lz
bh20-seq-resource-d781e42c9adac07253cb928ae66e9b7314710267.zip
Move workflows into main repo
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows')
-rw-r--r--workflows/fastq2fasta/bam2fasta.cwl65
-rw-r--r--workflows/fastq2fasta/bcftools-concat.cwl24
-rw-r--r--workflows/fastq2fasta/bcftools-consensus.cwl24
-rw-r--r--workflows/fastq2fasta/bcftools-index.cwl23
-rw-r--r--workflows/fastq2fasta/bcftools-norm.cwl32
-rw-r--r--workflows/fastq2fasta/bcftools-view-exclude-ref.cwl23
-rw-r--r--workflows/fastq2fasta/bcftools-view-qc.cwl24
-rw-r--r--workflows/fastq2fasta/bcftools-view.cwl19
-rw-r--r--workflows/fastq2fasta/bwa-index.cwl32
-rw-r--r--workflows/fastq2fasta/bwa-mem.cwl59
-rw-r--r--workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl41
-rw-r--r--workflows/fastq2fasta/fastq2fasta.cwl61
-rw-r--r--workflows/fastq2fasta/freebayes.cwl25
-rw-r--r--workflows/fastq2fasta/samtools-faidx.cwl39
-rw-r--r--workflows/fastq2fasta/samtools-sort.cwl41
-rw-r--r--workflows/fastq2fasta/samtools-view.cwl63
-rw-r--r--workflows/pangenome-generate/merge-metadata.cwl18
-rw-r--r--workflows/pangenome-generate/merge-metadata.py17
-rw-r--r--workflows/pangenome-generate/minimap2.cwl23
-rw-r--r--workflows/pangenome-generate/odgi-build.cwl26
-rw-r--r--workflows/pangenome-generate/odgi-viz.cwl25
-rw-r--r--workflows/pangenome-generate/odgi_to_rdf.cwl25
-rw-r--r--workflows/pangenome-generate/pangenome-generate.cwl57
-rw-r--r--workflows/pangenome-generate/seqkit-rmdup.cwl32
-rw-r--r--workflows/pangenome-generate/seqwish.cwl29
-rwxr-xr-xworkflows/update-workflows.sh3
26 files changed, 850 insertions, 0 deletions
diff --git a/workflows/fastq2fasta/bam2fasta.cwl b/workflows/fastq2fasta/bam2fasta.cwl
new file mode 100644
index 0000000..efe580f
--- /dev/null
+++ b/workflows/fastq2fasta/bam2fasta.cwl
@@ -0,0 +1,65 @@
+# Reference:
+# https://github.com/VGP/vgp-assembly/blob/33cd6236a68a1aee5f282e365dfe6b97e0b4ebb7/pipeline/freebayes-polish/freebayes.sh
+# https://github.com/VGP/vgp-assembly/blob/33cd6236a68a1aee5f282e365dfe6b97e0b4ebb7/pipeline/freebayes-polish/consensus.sh
+class: Workflow
+cwlVersion: v1.1
+id: bam2fasta
+label: bam2fasta
+requirements: []
+
+inputs:
+ bam:
+ type: File
+ fasta:
+ type: File
+ threads:
+ type: int
+ default: 4
+
+outputs:
+ out_fasta:
+ type: File
+ outputSource: bcftools_consensus/out_fasta
+
+steps:
+ freebayes:
+ in:
+ bam: bam
+ ref_fasta: fasta
+ out: [vcf]
+ run: freebayes.cwl
+ bcftools_view_exclude_ref:
+ in:
+ vcf: freebayes/vcf
+ threads: threads
+ out: [bcf]
+ run: bcftools-view-exclude-ref.cwl
+ bcftools_norm:
+ in:
+ ref_fasta: fasta
+ bcf: bcftools_view_exclude_ref/bcf
+ threads: threads
+ out: [normalized_bcf]
+ run: bcftools-norm.cwl
+ bcftools_index_after_normalization:
+ in:
+ bcf: bcftools_norm/normalized_bcf
+ out: [indexed]
+ run: bcftools-index.cwl
+ bcftools_view_qc:
+ in:
+ bcf: bcftools_index_after_normalization/indexed
+ threads: threads
+ out: [vcf]
+ run: bcftools-view-qc.cwl
+ bcftools_index_after_qc:
+ in:
+ bcf: bcftools_view_qc/vcf
+ out: [indexed]
+ run: bcftools-index.cwl
+ bcftools_consensus:
+ in:
+ ref_fasta: fasta
+ vcf: bcftools_index_after_qc/indexed
+ out: [out_fasta]
+ run: bcftools-consensus.cwl
diff --git a/workflows/fastq2fasta/bcftools-concat.cwl b/workflows/fastq2fasta/bcftools-concat.cwl
new file mode 100644
index 0000000..fc33a30
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-concat.cwl
@@ -0,0 +1,24 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - concat
+ - -Ou
+ - -o
+ - $(inputs.output_name)
+ - $(inputs.bcf_files)
+inputs:
+ - id: output_name
+ type: string
+ default: "merged.bcf"
+ - id: bcf_files
+ type: File[]
+outputs:
+ - id: merged_bcf
+ type: File
+ outputBinding:
+ glob: "$(inputs.output_name)"
diff --git a/workflows/fastq2fasta/bcftools-consensus.cwl b/workflows/fastq2fasta/bcftools-consensus.cwl
new file mode 100644
index 0000000..c111792
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-consensus.cwl
@@ -0,0 +1,24 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - consensus
+ - -i'QUAL > 1 && GT="A"'
+ - -Hla
+ - -f
+ - $(inputs.ref_fasta)
+ - $(inputs.vcf)
+inputs:
+ - id: ref_fasta
+ type: File
+ - id: vcf
+ type: File
+ secondaryFiles: [.csi]
+outputs:
+ - id: out_fasta
+ type: stdout
+stdout: sequence.fasta
diff --git a/workflows/fastq2fasta/bcftools-index.cwl b/workflows/fastq2fasta/bcftools-index.cwl
new file mode 100644
index 0000000..396b680
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-index.cwl
@@ -0,0 +1,23 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+ InitialWorkDirRequirement:
+ listing:
+ - $(inputs.bcf)
+baseCommand: bcftools
+arguments:
+ - index
+ - $(inputs.bcf)
+inputs:
+ - id: bcf
+ type: File
+outputs:
+ - id: indexed
+ type: File
+ outputBinding:
+ glob: "$(inputs.bcf.basename)"
+ secondaryFiles:
+ - .csi
diff --git a/workflows/fastq2fasta/bcftools-norm.cwl b/workflows/fastq2fasta/bcftools-norm.cwl
new file mode 100644
index 0000000..a765e65
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-norm.cwl
@@ -0,0 +1,32 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - norm
+ - -Ob
+ - -f
+ - $(inputs.ref_fasta)
+ - -o
+ - $(inputs.output_name)
+ - --threads
+ - $(inputs.threads)
+ - $(inputs.bcf)
+inputs:
+ - id: ref_fasta
+ type: File
+ - id: output_name
+ type: string
+ default: "normalized.bcf"
+ - id: threads
+ type: int
+ - id: bcf
+ type: File
+outputs:
+ - id: normalized_bcf
+ type: File
+ outputBinding:
+ glob: "$(inputs.output_name)"
diff --git a/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl b/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl
new file mode 100644
index 0000000..849bd88
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-view-exclude-ref.cwl
@@ -0,0 +1,23 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - view
+ - --no-version
+ - -Ou
+ - -e'type=ref'
+ - --threads=$(inputs.threads)
+ - $(inputs.vcf)
+inputs:
+ - id: vcf
+ type: File
+ - id: threads
+ type: int
+outputs:
+ - id: bcf
+ type: stdout
+stdout: $(inputs.vcf.nameroot).without-ref.bcf
diff --git a/workflows/fastq2fasta/bcftools-view-qc.cwl b/workflows/fastq2fasta/bcftools-view-qc.cwl
new file mode 100644
index 0000000..477c596
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-view-qc.cwl
@@ -0,0 +1,24 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - view
+ - -i
+ - 'QUAL>1 && (GT="AA" || GT="Aa")'
+ - -Oz
+ - --threads=$(inputs.threads)
+ - $(inputs.bcf)
+inputs:
+ - id: threads
+ type: int
+ - id: bcf
+ type: File
+ secondaryFiles: [.csi]
+outputs:
+ - id: vcf
+ type: stdout
+stdout: out.changes.vcf.gz
diff --git a/workflows/fastq2fasta/bcftools-view.cwl b/workflows/fastq2fasta/bcftools-view.cwl
new file mode 100644
index 0000000..2d1a51f
--- /dev/null
+++ b/workflows/fastq2fasta/bcftools-view.cwl
@@ -0,0 +1,19 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+baseCommand: bcftools
+arguments:
+ - view
+ - --no-version
+ - -Ou
+ - $(inputs.vcf)
+inputs:
+ - id: vcf
+ type: File
+outputs:
+ - id: bcf
+ type: stdout
+stdout: $(inputs.vcf.nameroot).bcf
diff --git a/workflows/fastq2fasta/bwa-index.cwl b/workflows/fastq2fasta/bwa-index.cwl
new file mode 100644
index 0000000..775ba8d
--- /dev/null
+++ b/workflows/fastq2fasta/bwa-index.cwl
@@ -0,0 +1,32 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.1
+class: CommandLineTool
+doc: string
+requirements:
+ DockerRequirement:
+ dockerPull: quay.io/biocontainers/bwa:0.7.17--h84994c4_5
+ InitialWorkDirRequirement:
+ listing:
+ - $(inputs.input_fasta)
+baseCommand: [bwa, index]
+inputs:
+ input_fasta:
+ type: File
+ label: "input fasta file"
+ inputBinding:
+ position: 1
+outputs:
+ indexed_fasta:
+ type: File
+ outputBinding:
+ glob: $(inputs.input_fasta.basename)
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ stdout: stdout
+ stderr: stderr
+stdout: bwa-index-stdout.log
+stderr: bwa-index-stderr.log
diff --git a/workflows/fastq2fasta/bwa-mem.cwl b/workflows/fastq2fasta/bwa-mem.cwl
new file mode 100644
index 0000000..195411c
--- /dev/null
+++ b/workflows/fastq2fasta/bwa-mem.cwl
@@ -0,0 +1,59 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.1
+class: CommandLineTool
+doc: string
+requirements:
+ DockerRequirement:
+ dockerPull: quay.io/biocontainers/bwa:0.7.17--h84994c4_5
+
+baseCommand: [bwa, mem]
+
+inputs:
+ threads:
+ type: int
+ label: "number of threads"
+ default: 4
+ inputBinding:
+ prefix: -t
+ output_sam:
+ type: string
+ label: "sam file to output results to"
+ default: "out.sam"
+ inputBinding:
+ prefix: -o
+ group_header_line:
+ type: string?
+ label: "read group header line such as '@RG\tID:foo\tSM:bar'"
+ inputBinding:
+ prefix: -R
+ index_base:
+ type: File
+ label: "fasta file for index basename"
+ inputBinding:
+ position: 1
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ fastq_forward:
+ type: File
+ label: "input fastq file to map (single-end or forward for pair-end)"
+ inputBinding:
+ position: 2
+ fastq_reverse:
+ type: File?
+ label: "input fastq file to map (reverse for pair-end)"
+ inputBinding:
+ position: 3
+
+outputs:
+ output:
+ type: File
+ outputBinding:
+ glob: "$(inputs.output_sam)"
+ stdout: stdout
+ stderr: stderr
+stdout: bwa-mem-stdout.log
+stderr: bwa-mem-stderr.log
diff --git a/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl b/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl
new file mode 100644
index 0000000..dab7ff2
--- /dev/null
+++ b/workflows/fastq2fasta/fastq2fasta-create-bwaindex.cwl
@@ -0,0 +1,41 @@
+cwlVersion: v1.1
+class: Workflow
+requirements:
+ SubworkflowFeatureRequirement: {}
+hints:
+ ResourceRequirement:
+ ramMin: 3000
+
+inputs:
+ ref_fasta:
+ type: File
+ fastq_forward:
+ type: File
+ fastq_reverse:
+ type: File?
+ threads:
+ type: int
+ default: 4
+
+outputs:
+ out_fasta:
+ type: File
+ outputSource: fastq2fasta/out_fasta
+
+steps:
+ bwa-index:
+ in: {input_fasta: ref_fasta}
+ out: [indexed_fasta]
+ run: bwa-index.cwl
+ samtools-faidx:
+ in: {input_fasta: bwa-index/indexed_fasta}
+ out: [indexed_fasta]
+ run: samtools-faidx.cwl
+ fastq2fasta:
+ in:
+ fastq_forward: fastq_forward
+ fastq_reverse: fastq_reverse
+ ref_fasta: samtools-faidx/indexed_fasta
+ threads: threads
+ out: [out_fasta]
+ run: fastq2fasta.cwl
diff --git a/workflows/fastq2fasta/fastq2fasta.cwl b/workflows/fastq2fasta/fastq2fasta.cwl
new file mode 100644
index 0000000..0cf5c48
--- /dev/null
+++ b/workflows/fastq2fasta/fastq2fasta.cwl
@@ -0,0 +1,61 @@
+cwlVersion: v1.1
+class: Workflow
+requirements:
+ SubworkflowFeatureRequirement: {}
+hints:
+ ResourceRequirement:
+ ramMin: 3000
+
+inputs:
+ fastq_forward: File
+ fastq_reverse: File?
+ ref_fasta:
+ type: File
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ - .fai
+ threads:
+ type: int
+ default: 4
+ metadata: File?
+
+outputs:
+ out_fasta:
+ type: File
+ outputSource: bam2fasta/out_fasta
+ out_metadata:
+ type: File?
+ outputSource: metadata
+
+steps:
+ bwa-mem:
+ in:
+ threads: threads
+ fastq_forward: fastq_forward
+ fastq_reverse: fastq_reverse
+ index_base: ref_fasta
+ out: [output]
+ run: bwa-mem.cwl
+ samtools-view:
+ in:
+ threads: threads
+ input_file: bwa-mem/output
+ out: [bam]
+ run: samtools-view.cwl
+ samtools-sort:
+ in:
+ input_bamfile: samtools-view/bam
+ threads: threads
+ out: [sorted_bam]
+ run: samtools-sort.cwl
+ bam2fasta:
+ in:
+ bam: samtools-sort/sorted_bam
+ fasta: ref_fasta
+ threads: threads
+ out: [out_fasta]
+ run: bam2fasta.cwl
diff --git a/workflows/fastq2fasta/freebayes.cwl b/workflows/fastq2fasta/freebayes.cwl
new file mode 100644
index 0000000..1bf9b2f
--- /dev/null
+++ b/workflows/fastq2fasta/freebayes.cwl
@@ -0,0 +1,25 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/freebayes:1.3.2--py37hc088bd4_0"
+baseCommand: freebayes
+arguments: [
+ --bam, $(inputs.bam),
+ # --region=$(inputs.contig):1-$(inputs.contig_end)
+ --ploidy, "1",
+ -f, $(inputs.ref_fasta)]
+inputs:
+ - id: bam
+ type: File
+ # - id: contig
+ # type: string
+ # - id: contig_end
+ # type: int
+ - id: ref_fasta
+ type: File
+outputs:
+ - id: vcf
+ type: stdout
+stdout: var.vcf
diff --git a/workflows/fastq2fasta/samtools-faidx.cwl b/workflows/fastq2fasta/samtools-faidx.cwl
new file mode 100644
index 0000000..5196607
--- /dev/null
+++ b/workflows/fastq2fasta/samtools-faidx.cwl
@@ -0,0 +1,39 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+doc: "samtools sort, sort given bam file"
+requirements:
+ DockerRequirement:
+ dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11
+ InitialWorkDirRequirement:
+ listing:
+ - $(inputs.input_fasta)
+baseCommand: [samtools, faidx]
+inputs:
+ input_fasta:
+ type: File
+ label: "Input fasta"
+ inputBinding:
+ position: 1
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+outputs:
+ indexed_fasta:
+ type: File
+ outputBinding:
+ glob: "$(inputs.input_fasta.basename)"
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ - .fai
+ stdout: stdout
+ stderr: stderr
+stdout: samtools-sort-stdout.log
+stderr: samtools-sort-stderr.log
diff --git a/workflows/fastq2fasta/samtools-sort.cwl b/workflows/fastq2fasta/samtools-sort.cwl
new file mode 100644
index 0000000..4c6340c
--- /dev/null
+++ b/workflows/fastq2fasta/samtools-sort.cwl
@@ -0,0 +1,41 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+doc: "samtools sort, sort given bam file"
+requirements:
+ DockerRequirement:
+ dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11
+baseCommand: [samtools, sort]
+inputs:
+ threads:
+ type: int
+ default: 4
+ inputBinding:
+ prefix: -@
+ tmpfile:
+ type: string
+ default: sort.tmp
+ label: "Write temporary files to PREFIX.nnnn.bam"
+ inputBinding:
+ prefix: -T
+ output_bam:
+ type: string
+ default: aln.sorted.bam
+ label: "Write final output to FILENAME"
+ inputBinding:
+ prefix: -o
+ input_bamfile:
+ type: File
+ label: "Input bamfile"
+ inputBinding:
+ position: 1
+
+outputs:
+ sorted_bam:
+ type: File
+ outputBinding:
+ glob: "$(inputs.output_bam)"
+ stdout: stdout
+ stderr: stderr
+stdout: samtools-sort-stdout.log
+stderr: samtools-sort-stderr.log
diff --git a/workflows/fastq2fasta/samtools-view.cwl b/workflows/fastq2fasta/samtools-view.cwl
new file mode 100644
index 0000000..9f11cc6
--- /dev/null
+++ b/workflows/fastq2fasta/samtools-view.cwl
@@ -0,0 +1,63 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+doc: "samtools view to convert sam format to bam format"
+requirements:
+ DockerRequirement:
+ dockerPull: quay.io/biocontainers/samtools:1.9--h8571acd_11
+baseCommand: [samtools, view]
+inputs:
+ threads:
+ type: int
+ label: "Number of additional threads to use"
+ default: 4
+ inputBinding:
+ prefix: -@
+ output_bam:
+ type: boolean
+ label: "output BAM"
+ default: true
+ inputBinding:
+ prefix: -b
+ output_filename:
+ type: string
+ label: "output file name"
+ default: "aln.bam"
+ inputBinding:
+ prefix: -o
+ input_file:
+ type: File
+ label: "input file"
+ inputBinding:
+ position: 1
+ include_header:
+ type: boolean
+ label: "include the header in the output"
+ default: false
+ inputBinding:
+ prefix: -h
+ ignore_previous_version:
+ type: boolean
+ label: "ignored for compatibility with previous samtools versions"
+ default: false
+ inputBinding:
+ prefix: -S
+ filter_alignments:
+ type: string?
+ label: "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0]."
+ inputBinding:
+ prefix: -F
+ skip_alignments:
+ type: int?
+ label: "Skip alignments with MAPQ smaller than INT [0]."
+ inputBinding:
+ prefix: -q
+outputs:
+ bam:
+ type: File
+ outputBinding:
+ glob: "$(inputs.output_filename)"
+ stdout: stdout
+ stderr: stderr
+stdout: samtools-view-stdout.log
+stderr: samtools-view-stderr.log
diff --git a/workflows/pangenome-generate/merge-metadata.cwl b/workflows/pangenome-generate/merge-metadata.cwl
new file mode 100644
index 0000000..9164c09
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.cwl
@@ -0,0 +1,18 @@
+cwlVersion: v1.1
+class: CommandLineTool
+hints:
+ DockerRequirement:
+ dockerPull: commonworkflowlanguage/cwltool_module
+inputs:
+ metadata: File[]
+ metadataSchema: File
+ subjects: string[]
+outputs:
+ merged: stdout
+stdout: mergedmetadata.ttl
+requirements:
+ InitialWorkDirRequirement:
+ listing:
+ - entry: {$include: merge-metadata.py}
+ entryname: merge-metadata.py
+baseCommand: [python3, merge-metadata.py]
diff --git a/workflows/pangenome-generate/merge-metadata.py b/workflows/pangenome-generate/merge-metadata.py
new file mode 100644
index 0000000..64275b1
--- /dev/null
+++ b/workflows/pangenome-generate/merge-metadata.py
@@ -0,0 +1,17 @@
+import schema_salad.schema
+import schema_salad.jsonld_context
+
+metadataSchema = '$(inputs.metadataSchema.path)'
+metadata = $(inputs.metadata)
+subjects = $(inputs.subjects)
+
+(document_loader,
+ avsc_names,
+ schema_metadata,
+ metaschema_loader) = schema_salad.schema.load_schema(metadataSchema)
+
+for i, m in enumerate(metadata):
+ doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, m["path"], True)
+ doc["id"] = subjects[i]
+ g = schema_salad.jsonld_context.makerdf(subjects[i], doc, document_loader.ctx)
+ print(g.serialize(format="ntriples").decode("utf-8"))
diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl
new file mode 100644
index 0000000..bf19ef7
--- /dev/null
+++ b/workflows/pangenome-generate/minimap2.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File
+outputs:
+ readsPAF: stdout
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/minimap2:2.17--h8b12597_1"
+ ResourceRequirement:
+ coresMin: 8
+ coresMax: 32
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: minimap2
+arguments: [-cx, asm20,
+ -w, "1",
+ -t, $(runtime.cores),
+ $(inputs.readsFA),
+ $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/odgi-build.cwl b/workflows/pangenome-generate/odgi-build.cwl
new file mode 100644
index 0000000..0bd6a20
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-build.cwl
@@ -0,0 +1,26 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ inputGFA: File
+outputs:
+ odgiGraph:
+ type: File
+ outputBinding:
+ glob: $(inputs.inputGFA.nameroot).odgi
+requirements:
+ InlineJavascriptRequirement: {}
+ ShellCommandRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil((inputs.inputGFA.size/(1024*1024*1024)+1) * 2))
+ InitialWorkDirRequirement:
+ listing:
+ - entry: $(inputs.inputGFA)
+ writable: true
+arguments: [odgi, build, -g, $(inputs.inputGFA), -s, -o, -,
+ {shellQuote: false, valueFrom: "|"},
+ odgi, sort, -i, -, -p, s, -o, $(inputs.inputGFA.nameroot).odgi]
diff --git a/workflows/pangenome-generate/odgi-viz.cwl b/workflows/pangenome-generate/odgi-viz.cwl
new file mode 100644
index 0000000..d440fcb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi-viz.cwl
@@ -0,0 +1,25 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ inputODGI: File
+outputs:
+ odgiPNG:
+ type: File
+ outputBinding:
+ glob: $(inputs.inputODGI.nameroot).png
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/odgi:v0.3--py37h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: 1
+baseCommand: [odgi, viz]
+arguments: [-i, $(inputs.inputODGI),
+ -o, $(inputs.inputODGI.nameroot).png,
+ -x, "50000",
+ -y, "500",
+ -R,
+ -P, "4"]
diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl
new file mode 100644
index 0000000..079d6fb
--- /dev/null
+++ b/workflows/pangenome-generate/odgi_to_rdf.cwl
@@ -0,0 +1,25 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.1
+hints:
+ DockerRequirement:
+ dockerPull: spodgi/spodgi
+requirements:
+ InlineJavascriptRequirement: {}
+ ShellCommandRequirement: {}
+inputs:
+ - id: odgi
+ type: File
+ - id: output_name
+ type: string?
+
+stdout: $(inputs.output_name || inputs.odgi.nameroot+'.ttl.xz')
+
+arguments:
+ [odgi_to_rdf.py, $(inputs.odgi), "-",
+ {valueFrom: "|", shellQuote: false},
+ xz, --stdout]
+
+outputs:
+ - id: rdf
+ type: stdout
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
new file mode 100644
index 0000000..2710743
--- /dev/null
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -0,0 +1,57 @@
+cwlVersion: v1.1
+class: Workflow
+inputs:
+ inputReads: File[]
+ metadata: File[]
+ metadataSchema: File
+ subjects: string[]
+outputs:
+ odgiGraph:
+ type: File
+ outputSource: buildGraph/odgiGraph
+ odgiPNG:
+ type: File
+ outputSource: vizGraph/odgiPNG
+ seqwishGFA:
+ type: File
+ outputSource: induceGraph/seqwishGFA
+ odgiRDF:
+ type: File
+ outputSource: odgi2rdf/rdf
+ mergedMetadata:
+ type: File
+ outputSource: mergeMetadata/merged
+steps:
+ dedup:
+ in: {readsFA: inputReads}
+ out: [readsMergeDedup]
+ run: seqkit-rmdup.cwl
+ overlapReads:
+ in: {readsFA: dedup/readsMergeDedup}
+ out: [readsPAF]
+ run: minimap2.cwl
+ induceGraph:
+ in:
+ readsFA: dedup/readsMergeDedup
+ readsPAF: overlapReads/readsPAF
+ out: [seqwishGFA]
+ run: seqwish.cwl
+ buildGraph:
+ in: {inputGFA: induceGraph/seqwishGFA}
+ out: [odgiGraph]
+ run: odgi-build.cwl
+ vizGraph:
+ in: {inputODGI: buildGraph/odgiGraph}
+ out: [odgiPNG]
+ run: odgi-viz.cwl
+ odgi2rdf:
+ in: {odgi: buildGraph/odgiGraph}
+ out: [rdf]
+ run: odgi_to_rdf.cwl
+ mergeMetadata:
+ in:
+ metadata: metadata
+ metadataSchema: metadataSchema
+ subjects: subjects
+ out: [merged]
+ run: merge-metadata.cwl
diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl
new file mode 100644
index 0000000..d3626f5
--- /dev/null
+++ b/workflows/pangenome-generate/seqkit-rmdup.cwl
@@ -0,0 +1,32 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File[]
+outputs:
+ readsMergeDedup:
+ type: File
+ outputBinding:
+ glob: readsMergeDedup.fasta
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/seqkit:0.7.1--0"
+ ResourceRequirement:
+ coresMin: 8
+ coresMax: 32
+ ramMin: $(7 * 1024)
+ outdirMin: |
+ ${
+ var sum = 0;
+ for (var i = 0; i < inputs.readsFA.length; i++) {
+ sum += inputs.readsFA[i].size;
+ }
+ return (sum/(1024*1024*1024)+1) + 20;
+ }
+baseCommand: seqkit
+arguments: [rmdup,
+ --by-seq,
+ --ignore-case,
+ -o, readsMergeDedup.fasta,
+ $(inputs.readsFA)]
diff --git a/workflows/pangenome-generate/seqwish.cwl b/workflows/pangenome-generate/seqwish.cwl
new file mode 100644
index 0000000..9f8cb15
--- /dev/null
+++ b/workflows/pangenome-generate/seqwish.cwl
@@ -0,0 +1,29 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File
+ readsPAF: File
+ kmerSize:
+ type: int
+ default: 16
+outputs:
+ seqwishGFA:
+ type: File
+ outputBinding:
+ glob: $(inputs.readsPAF.nameroot).gfa
+requirements:
+ InlineJavascriptRequirement: {}
+hints:
+ DockerRequirement:
+ dockerPull: "quay.io/biocontainers/seqwish:0.4.1--h8b12597_0"
+ ResourceRequirement:
+ coresMin: 4
+ ramMin: $(7 * 1024)
+ outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
+stdout: $(inputs.readsFA.nameroot).paf
+baseCommand: seqwish
+arguments: [-t, $(runtime.cores),
+ -k, $(inputs.kmerSize),
+ -s, $(inputs.readsFA),
+ -p, $(inputs.readsPAF),
+ -g, $(inputs.readsPAF.nameroot).gfa]
diff --git a/workflows/update-workflows.sh b/workflows/update-workflows.sh
new file mode 100755
index 0000000..ea9e199
--- /dev/null
+++ b/workflows/update-workflows.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-2zp9q4jo5xpif9y fastq2fasta/fastq2fasta.cwl
+arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/pangenome-generate.cwl