From db577d11808d91ff5c0b3643a37b5ce9765afc78 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Thu, 25 Feb 2021 14:51:11 +0530 Subject: Rewrite fastq2fasta workflow using clitool-step --- scripts/fastq2fasta.scm | 438 ++++++++++++++++++------------------------------ 1 file changed, 165 insertions(+), 273 deletions(-) diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm index c014852..9880c0e 100644 --- a/scripts/fastq2fasta.scm +++ b/scripts/fastq2fasta.scm @@ -13,280 +13,172 @@ #:default 4)) (define ref-fasta - (input "ref_fasta" #:type 'File)) + (input "ref_fasta" + #:type 'File + #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))) + +(define output-sam + (input "output_sam" + #:type 'string + #:label "sam file to output results to" + #:default "out.sam")) + +(define group-header-line + (input "group_header_line" + #:type 'string? + #:label "read group header line such as '@RG\tID:foo\tSM:bar'")) + +(define fastq-forward + (input "fastq_forward" + #:type 'File + #:label "input fastq file to map (single-end or forward for pair-end)")) + +(define fastq-reverse + (input "fastq_reverse" + #:type 'File? + #:label "input fastq file to map (reverse for pair-end)")) (define sample-id (input "sample_id" #:type 'string)) -(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl" - (cut scm->yaml - (clitool - (list "bwa" "mem" - "-t" threads - "-o" (input "output_sam" - #:type 'string - #:label "sam file to output results to" - #:default "out.sam") - "-R" (input "group_header_line" - #:type 'string? - #:label "read group header line such as '@RG\tID:foo\tSM:bar'") - (input "index_base" - #:type 'File - #:label "fasta file for index basename" - #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))) - (input "fastq_forward" - #:type 'File - #:label "input fastq file to map (single-end or forward for pair-end)") - (input "fastq_reverse" - #:type 'File? - #:label "input fastq file to map (reverse for pair-end)")) - #:outputs (list (output "output" - #:type 'File - #:binding '((glob . "$(inputs.output_sam)"))) - (output "stdout" #:type 'stdout) - (output "stderr" #:type 'stderr)) - #:stdout "bwa-mem-stdout.log" - #:stderr "bwa-mem-stderr.log" - #:other '((requirements - (Docker-requirement - (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl" - (cut scm->yaml - (clitool - (list "samtools" "view" - "-@" threads - "-b" (input "output_bam" - #:type 'boolean - #:label "output BAM" - #:default #t) - "-o" (input "output_filename" - #:type 'string - #:label "output file name" - #:default "aln.bam") - (input "input_file" - #:type 'File - #:label "input file") - "-h" (input "include_header" - #:type 'boolean - #:label "include the header in the output" - #:default #f) - "-S" (input "ignore_previous_version" - #:type 'boolean - #:label "ignored for compatibility with previous samtools versions" - #:default #f) - "-F" (input "filter_alignments" - #:type 'string? - #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].") - "-q" (input "skip_alignments" - #:type 'int? - #:label "Skip alignments with MAPQ smaller than INT [0].")) - #:outputs (list (output "bam" - #:type 'File - #:binding '((glob . "$(inputs.output_filename)"))) - (output "stdout" #:type 'stdout) - (output "stderr" #:type 'stderr)) - #:stdout "samtools-view-stdout.log" - #:stderr "samtools-view-stderr.log" - #:other '((doc . "samtools view to convert sam format to bam format") - (requirements - (Docker-requirement - (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl" - (cut scm->yaml - (clitool - (list "samtools" "sort" - "-@" threads - "-T" (input "tmpfile" - #:type 'string - #:label "Write temporary files to PREFIX.nnnn.bam" - #:default "sort.tmp") - "-o" (input "output_bam" - #:type 'string - #:label "Write final output to FILENAME" - #:default "aln.sorted.bam") - (input "input_bamfile" - #:type 'File - #:label "Input bamfile")) - #:outputs (list (output "sorted_bam" - #:type 'File - #:binding '((glob . "$(inputs.output_bam)"))) - (output "stdout" #:type 'stdout) - (output "stderr" #:type 'stderr)) - #:stdout "samtools-sort-stdout.log" - #:stderr "samtools-sort-stderr.log" - #:other '((doc . "samtools sort, sort given bam file") - (requirements - (Docker-requirement - (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/freebayes.cwl" - (cut scm->yaml - (clitool - (list "freebayes" "--ploidy" "1" - "--bam" (input "bam" #:type 'File) - "-f" ref-fasta) - #:outputs (list (output "vcf" #:type 'stdout)) - #:stdout "var.vcf") - <>)) - -(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl" - (cut scm->yaml - (clitool - (list "bcftools" "view" "--no-version" "-Ou" - "-e'type=ref'" "--threads" threads - (input "vcf" #:type 'File)) - #:outputs (list (output "bcf" #:type 'stdout)) - #:stdout "$(inputs.vcf.nameroot).without-ref.bcf") - <>)) - -(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl" - (cut scm->yaml - (clitool - (list "bcftools" "norm" "-Ob" - "-f" ref-fasta - "-o" (input "output_name" #:type 'string #:default "normalized.bcf") - "--threads" threads - (input "bcf" #:type 'File)) - #:outputs (list (output "normalized_bcf" - #:type 'File - #:binding '((glob . "$(inputs.output_name)")))) - #:other '((hints - (Docker-requirement - (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl" - (cut scm->yaml - (clitool - (list "bcftools" "index" (input "bcf" #:type 'File)) - #:outputs (list (output "indexed" - #:type 'File - #:binding '((glob . "$(inputs.bcf.basename)")) - #:other '((secondary-files . #(".csi"))))) - #:other '((hints - (Docker-requirement - (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")) - (Initial-work-dir-requirement - (listing . #("$(inputs.bcf)")))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl" - (cut scm->yaml - (clitool - (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz" - "--threads" threads - (input "bcf" - #:type 'File - #:other '((secondary-files . #(".csi"))))) - #:outputs (list (output "vcf" #:type 'stdout)) - #:other '((hints - (Docker-requirement - (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl" - (cut scm->yaml - (clitool - (list "bcftools" "consensus" - "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla" - "-f" ref-fasta - (input "vcf" - #:type 'File - #:other '((secondary-files . #(".csi"))))) - #:outputs (list (output "out_fasta" #:type 'stdout)) - #:stdout "sequence.fasta" - #:other '((hints - (Docker-requirement - (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) - <>)) - -(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl" - (cut scm->yaml - (clitool - (list "sed" (input "fasta" #:type 'File)) - #:additional-inputs (list (input "sample_id" #:type 'string)) - #:outputs (list (output "out_fasta" #:type 'stdout)) - #:stdout "sequence.fasta" - #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))) - <>)) - -(let ((fasta (input "fasta" #:type 'File))) - (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl" - (cut scm->yaml - (workflow - (list (step "freebayes" "freebayes.cwl" - `((bam . ,(input "bam" #:type 'File)) - (ref_fasta . ,fasta)) - (list "vcf")) - (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl" - `((vcf . "freebayes/vcf") - (threads . ,threads)) - (list "bcf")) - (step "bcftools_norm" "bcftools-norm.cwl" - `((ref_fasta . ,fasta) - (bcf . "bcftools_view_exclude_ref/bcf") - (threads . ,threads)) - (list "normalized_bcf")) - (step "bcftools_index_after_normalization" "bcftools-index.cwl" - '((bcf . "bcftools_norm/normalized_bcf")) - (list "indexed")) - (step "bcftools_view_qc" "bcftools-view-qc.cwl" - `((bcf . "bcftools_index_after_normalization/indexed") - (threads . ,threads)) - (list "vcf")) - (step "bcftools_index_after_qc" "bcftools-index.cwl" - '((bcf . "bcftools_view_qc/vcf")) - (list "indexed")) - (step "bcftools_consensus" "bcftools-consensus.cwl" - `((ref_fasta . ,fasta) - (vcf . "bcftools_index_after_qc/indexed")) - (list "out_fasta")) - (step "set_sample_id" "set-sample-id.cwl" - `((fasta . "bcftools_consensus/out_fasta") - (sample_id . ,sample-id)) - (list "out_fasta"))) - (list (workflow-output "out_fasta" - #:type 'File - #:source "set_sample_id/out_fasta"))) - <>))) - -(let ((ref-fasta (input "ref_fasta" - #:type 'File - #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai")))))) - (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" - (cut scm->yaml - (workflow - (list (step "bwa-mem" "bwa-mem.cwl" - `((threads . ,threads) - (fastq_forward . ,(input "fastq_forward" #:type 'File)) - (fastq_reverse . ,(input "fastq_reverse" #:type 'File?)) - (index_base . ,ref-fasta)) - (list "output")) - (step "samtools-view" "samtools-view.cwl" - `((threads . ,threads) - (input_file . "bwa-mem/output")) - (list "bam")) - (step "samtools-sort" "samtools-sort.cwl" - `((input_bamfile . "samtools-view/bam") - (threads . ,threads)) - (list "sorted_bam")) - (step "bam2fasta" "bam2fasta.cwl" - `((bam . "samtools-sort/sorted_bam") - (fasta . ,ref-fasta) - (threads . ,threads) - (sample_id . ,sample-id)) - (list "out_fasta"))) - (list (workflow-output "out_fasta" - #:type 'File - #:source "bam2fasta/out_fasta") - (workflow-output "out_metadata" - #:type 'File? - #:source (input "metadata" #:type 'File?))) - #:other '((requirements (Subworkflow-feature-requirement)) - (hints (Resource-requirement (ram-min . 3000))))) - <>))) +(define bwa-docker + '(Docker-requirement + (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))) + +(define samtools-docker + '(Docker-requirement + (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))) + +(define bcftools-docker + '(Docker-requirement + (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))) + +(define fastq2fasta + (workflow + (list (clitool-step "bwa_mem" + (list "bwa" "mem" + "-t" threads + "-o" output-sam + "-R" group-header-line + ref-fasta + fastq-forward + fastq-reverse) + #:outputs (list (output "output" + #:type 'File + #:binding '((glob . "$(inputs.output_sam)")))) + #:other `((requirements ,bwa-docker))) + (clitool-step "samtools_view" + (list "samtools" "view" + "-b" + "-o" "aln.bam" + "-@" threads + (intermediate (input "input_file" + #:type 'File + #:label "input file") + "bwa_mem/output")) + #:outputs (list (output "bam" + #:type 'File + #:binding '((glob . "aln.bam")))) + #:other `((doc . "samtools view to convert sam format to bam format") + (requirements ,samtools-docker))) + (clitool-step "samtools_sort" + (list "samtools" "sort" + "-T" "sort.tmp" + "-o" "aln.sorted.bam" + "-@" threads + (intermediate (input "input_bamfile" + #:type 'File + #:label "Input bamfile") + "samtools_view/bam")) + #:outputs (list (output "sorted_bam" + #:type 'File + #:binding '((glob . "aln.sorted.bam")))) + #:other `((doc . "samtools sort, sort given bam file") + (requirements ,samtools-docker))) + (clitool-step "freebayes" + (list "freebayes" + "--ploidy" "1" + "--bam" (intermediate (input "bam" #:type 'File) + "samtools_sort/sorted_bam") + "-f" ref-fasta) + #:outputs (list (output "vcf" #:type 'stdout)) + #:stdout "var.vcf") + (clitool-step "bcftools_view_exclude_ref" + (list "bcftools" "view" + "--no-version" "-Ou" + "-e'type=ref'" + "--threads" threads + (intermediate (input "vcf" #:type 'File) + "freebayes/vcf")) + #:outputs (list (output "bcf" #:type 'stdout)) + #:other `((hints ,bcftools-docker))) + (clitool-step "bcftools_norm" + (list "bcftools" "norm" + "-Ob" + "-o" "normalized.bcf" + "-f" ref-fasta + "--threads" threads + (intermediate (input "bcf" #:type 'File) + "bcftools_view_exclude_ref/bcf")) + #:outputs (list (output "normalized_bcf" + #:type 'File + #:binding '((glob . "normalized.bcf")))) + #:other `((hints ,bcftools-docker))) + (clitool-step "bcftools_index_after_normalization" + (list "bcftools" "index" + (intermediate (input "bcf" #:type 'File) + "bcftools_norm/normalized_bcf")) + #:outputs (list (output "indexed" + #:type 'File + #:binding '((glob . "$(inputs.bcf.basename)")) + #:other '((secondary-files . #(".csi"))))) + #:other `((hints ,bcftools-docker + (Initial-work-dir-requirement + (listing . #("$(inputs.bcf)")))))) + (clitool-step "bcftools_view_qc" + (list "bcftools" "view" + "-i" "'QUAL > 10 && GT=\"a\"'" + "-Oz" + "--threads" threads + (intermediate (input "bcf" + #:type 'File + #:other '((secondary-files . #(".csi")))) + "bcftools_index_after_normalization/indexed")) + #:outputs (list (output "vcf" #:type 'stdout)) + #:other `((hints ,bcftools-docker))) + (clitool-step "bcftools_index_after_qc" + (list "bcftools" "index" + (intermediate (input "bcf" #:type 'File) + "bcftools_view_qc/vcf")) + #:outputs (list (output "indexed" + #:type 'File + #:binding '((glob . "$(inputs.bcf.basename)")) + #:other '((secondary-files . #(".csi"))))) + #:other `((hints ,bcftools-docker + (Initial-work-dir-requirement + (listing . #("$(inputs.bcf)")))))) + (clitool-step "bcftools_consensus" + (list "bcftools" "consensus" + "-i" "'QUAL > 10 && GT=\"a\"'" + "-Hla" + "-f" ref-fasta + (intermediate (input "vcf" + #:type 'File + #:other '((secondary-files . #(".csi")))) + "bcftools_index_after_qc/indexed")) + #:outputs (list (output "out_fasta" #:type 'stdout)) + #:stdout "sequence.fasta" + #:other `((hints ,bcftools-docker))) + (clitool-step "set_sample_id" + (list "sed" (intermediate (input "fasta" #:type 'File) + "bcftools_consensus/out_fasta")) + #:additional-inputs (list sample-id) + #:outputs (list (output "out_fasta" #:type 'stdout)) + #:stdout "sequence.fasta" + #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))) + (list (workflow-output "out_fasta" + #:type 'File + #:source "set_sample_id/out_fasta")))) + +(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" + (cut scm->yaml fastq2fasta <>)) -- cgit v1.2.3