aboutsummaryrefslogtreecommitdiff
path: root/scripts/fastq2fasta.scm
diff options
context:
space:
mode:
authorArun Isaac2021-02-25 14:51:11 +0530
committerArun Isaac2021-02-25 14:52:58 +0530
commitdb577d11808d91ff5c0b3643a37b5ce9765afc78 (patch)
treece3f214144f347e2115a5fb975d86ba32e7a84e7 /scripts/fastq2fasta.scm
parent950898df6928c0cdec4f40238eb4da4ec6901bde (diff)
downloadbh20-seq-resource-db577d11808d91ff5c0b3643a37b5ce9765afc78.tar.gz
bh20-seq-resource-db577d11808d91ff5c0b3643a37b5ce9765afc78.tar.lz
bh20-seq-resource-db577d11808d91ff5c0b3643a37b5ce9765afc78.zip
Rewrite fastq2fasta workflow using clitool-step
Diffstat (limited to 'scripts/fastq2fasta.scm')
-rw-r--r--scripts/fastq2fasta.scm438
1 files changed, 165 insertions, 273 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
index c014852..9880c0e 100644
--- a/scripts/fastq2fasta.scm
+++ b/scripts/fastq2fasta.scm
@@ -13,280 +13,172 @@
#:default 4))
(define ref-fasta
- (input "ref_fasta" #:type 'File))
+ (input "ref_fasta"
+ #:type 'File
+ #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))))
+
+(define output-sam
+ (input "output_sam"
+ #:type 'string
+ #:label "sam file to output results to"
+ #:default "out.sam"))
+
+(define group-header-line
+ (input "group_header_line"
+ #:type 'string?
+ #:label "read group header line such as '@RG\tID:foo\tSM:bar'"))
+
+(define fastq-forward
+ (input "fastq_forward"
+ #:type 'File
+ #:label "input fastq file to map (single-end or forward for pair-end)"))
+
+(define fastq-reverse
+ (input "fastq_reverse"
+ #:type 'File?
+ #:label "input fastq file to map (reverse for pair-end)"))
(define sample-id
(input "sample_id" #:type 'string))
-(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl"
- (cut scm->yaml
- (clitool
- (list "bwa" "mem"
- "-t" threads
- "-o" (input "output_sam"
- #:type 'string
- #:label "sam file to output results to"
- #:default "out.sam")
- "-R" (input "group_header_line"
- #:type 'string?
- #:label "read group header line such as '@RG\tID:foo\tSM:bar'")
- (input "index_base"
- #:type 'File
- #:label "fasta file for index basename"
- #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))
- (input "fastq_forward"
- #:type 'File
- #:label "input fastq file to map (single-end or forward for pair-end)")
- (input "fastq_reverse"
- #:type 'File?
- #:label "input fastq file to map (reverse for pair-end)"))
- #:outputs (list (output "output"
- #:type 'File
- #:binding '((glob . "$(inputs.output_sam)")))
- (output "stdout" #:type 'stdout)
- (output "stderr" #:type 'stderr))
- #:stdout "bwa-mem-stdout.log"
- #:stderr "bwa-mem-stderr.log"
- #:other '((requirements
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl"
- (cut scm->yaml
- (clitool
- (list "samtools" "view"
- "-@" threads
- "-b" (input "output_bam"
- #:type 'boolean
- #:label "output BAM"
- #:default #t)
- "-o" (input "output_filename"
- #:type 'string
- #:label "output file name"
- #:default "aln.bam")
- (input "input_file"
- #:type 'File
- #:label "input file")
- "-h" (input "include_header"
- #:type 'boolean
- #:label "include the header in the output"
- #:default #f)
- "-S" (input "ignore_previous_version"
- #:type 'boolean
- #:label "ignored for compatibility with previous samtools versions"
- #:default #f)
- "-F" (input "filter_alignments"
- #:type 'string?
- #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].")
- "-q" (input "skip_alignments"
- #:type 'int?
- #:label "Skip alignments with MAPQ smaller than INT [0]."))
- #:outputs (list (output "bam"
- #:type 'File
- #:binding '((glob . "$(inputs.output_filename)")))
- (output "stdout" #:type 'stdout)
- (output "stderr" #:type 'stderr))
- #:stdout "samtools-view-stdout.log"
- #:stderr "samtools-view-stderr.log"
- #:other '((doc . "samtools view to convert sam format to bam format")
- (requirements
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl"
- (cut scm->yaml
- (clitool
- (list "samtools" "sort"
- "-@" threads
- "-T" (input "tmpfile"
- #:type 'string
- #:label "Write temporary files to PREFIX.nnnn.bam"
- #:default "sort.tmp")
- "-o" (input "output_bam"
- #:type 'string
- #:label "Write final output to FILENAME"
- #:default "aln.sorted.bam")
- (input "input_bamfile"
- #:type 'File
- #:label "Input bamfile"))
- #:outputs (list (output "sorted_bam"
- #:type 'File
- #:binding '((glob . "$(inputs.output_bam)")))
- (output "stdout" #:type 'stdout)
- (output "stderr" #:type 'stderr))
- #:stdout "samtools-sort-stdout.log"
- #:stderr "samtools-sort-stderr.log"
- #:other '((doc . "samtools sort, sort given bam file")
- (requirements
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/freebayes.cwl"
- (cut scm->yaml
- (clitool
- (list "freebayes" "--ploidy" "1"
- "--bam" (input "bam" #:type 'File)
- "-f" ref-fasta)
- #:outputs (list (output "vcf" #:type 'stdout))
- #:stdout "var.vcf")
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl"
- (cut scm->yaml
- (clitool
- (list "bcftools" "view" "--no-version" "-Ou"
- "-e'type=ref'" "--threads" threads
- (input "vcf" #:type 'File))
- #:outputs (list (output "bcf" #:type 'stdout))
- #:stdout "$(inputs.vcf.nameroot).without-ref.bcf")
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl"
- (cut scm->yaml
- (clitool
- (list "bcftools" "norm" "-Ob"
- "-f" ref-fasta
- "-o" (input "output_name" #:type 'string #:default "normalized.bcf")
- "--threads" threads
- (input "bcf" #:type 'File))
- #:outputs (list (output "normalized_bcf"
- #:type 'File
- #:binding '((glob . "$(inputs.output_name)"))))
- #:other '((hints
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl"
- (cut scm->yaml
- (clitool
- (list "bcftools" "index" (input "bcf" #:type 'File))
- #:outputs (list (output "indexed"
- #:type 'File
- #:binding '((glob . "$(inputs.bcf.basename)"))
- #:other '((secondary-files . #(".csi")))))
- #:other '((hints
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))
- (Initial-work-dir-requirement
- (listing . #("$(inputs.bcf)"))))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl"
- (cut scm->yaml
- (clitool
- (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz"
- "--threads" threads
- (input "bcf"
- #:type 'File
- #:other '((secondary-files . #(".csi")))))
- #:outputs (list (output "vcf" #:type 'stdout))
- #:other '((hints
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl"
- (cut scm->yaml
- (clitool
- (list "bcftools" "consensus"
- "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla"
- "-f" ref-fasta
- (input "vcf"
- #:type 'File
- #:other '((secondary-files . #(".csi")))))
- #:outputs (list (output "out_fasta" #:type 'stdout))
- #:stdout "sequence.fasta"
- #:other '((hints
- (Docker-requirement
- (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
- <>))
-
-(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl"
- (cut scm->yaml
- (clitool
- (list "sed" (input "fasta" #:type 'File))
- #:additional-inputs (list (input "sample_id" #:type 'string))
- #:outputs (list (output "out_fasta" #:type 'stdout))
- #:stdout "sequence.fasta"
- #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))
- <>))
-
-(let ((fasta (input "fasta" #:type 'File)))
- (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl"
- (cut scm->yaml
- (workflow
- (list (step "freebayes" "freebayes.cwl"
- `((bam . ,(input "bam" #:type 'File))
- (ref_fasta . ,fasta))
- (list "vcf"))
- (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl"
- `((vcf . "freebayes/vcf")
- (threads . ,threads))
- (list "bcf"))
- (step "bcftools_norm" "bcftools-norm.cwl"
- `((ref_fasta . ,fasta)
- (bcf . "bcftools_view_exclude_ref/bcf")
- (threads . ,threads))
- (list "normalized_bcf"))
- (step "bcftools_index_after_normalization" "bcftools-index.cwl"
- '((bcf . "bcftools_norm/normalized_bcf"))
- (list "indexed"))
- (step "bcftools_view_qc" "bcftools-view-qc.cwl"
- `((bcf . "bcftools_index_after_normalization/indexed")
- (threads . ,threads))
- (list "vcf"))
- (step "bcftools_index_after_qc" "bcftools-index.cwl"
- '((bcf . "bcftools_view_qc/vcf"))
- (list "indexed"))
- (step "bcftools_consensus" "bcftools-consensus.cwl"
- `((ref_fasta . ,fasta)
- (vcf . "bcftools_index_after_qc/indexed"))
- (list "out_fasta"))
- (step "set_sample_id" "set-sample-id.cwl"
- `((fasta . "bcftools_consensus/out_fasta")
- (sample_id . ,sample-id))
- (list "out_fasta")))
- (list (workflow-output "out_fasta"
- #:type 'File
- #:source "set_sample_id/out_fasta")))
- <>)))
-
-(let ((ref-fasta (input "ref_fasta"
- #:type 'File
- #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai"))))))
- (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
- (cut scm->yaml
- (workflow
- (list (step "bwa-mem" "bwa-mem.cwl"
- `((threads . ,threads)
- (fastq_forward . ,(input "fastq_forward" #:type 'File))
- (fastq_reverse . ,(input "fastq_reverse" #:type 'File?))
- (index_base . ,ref-fasta))
- (list "output"))
- (step "samtools-view" "samtools-view.cwl"
- `((threads . ,threads)
- (input_file . "bwa-mem/output"))
- (list "bam"))
- (step "samtools-sort" "samtools-sort.cwl"
- `((input_bamfile . "samtools-view/bam")
- (threads . ,threads))
- (list "sorted_bam"))
- (step "bam2fasta" "bam2fasta.cwl"
- `((bam . "samtools-sort/sorted_bam")
- (fasta . ,ref-fasta)
- (threads . ,threads)
- (sample_id . ,sample-id))
- (list "out_fasta")))
- (list (workflow-output "out_fasta"
- #:type 'File
- #:source "bam2fasta/out_fasta")
- (workflow-output "out_metadata"
- #:type 'File?
- #:source (input "metadata" #:type 'File?)))
- #:other '((requirements (Subworkflow-feature-requirement))
- (hints (Resource-requirement (ram-min . 3000)))))
- <>)))
+(define bwa-docker
+ '(Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))
+
+(define samtools-docker
+ '(Docker-requirement
+ (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))
+
+(define bcftools-docker
+ '(Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))
+
+(define fastq2fasta
+ (workflow
+ (list (clitool-step "bwa_mem"
+ (list "bwa" "mem"
+ "-t" threads
+ "-o" output-sam
+ "-R" group-header-line
+ ref-fasta
+ fastq-forward
+ fastq-reverse)
+ #:outputs (list (output "output"
+ #:type 'File
+ #:binding '((glob . "$(inputs.output_sam)"))))
+ #:other `((requirements ,bwa-docker)))
+ (clitool-step "samtools_view"
+ (list "samtools" "view"
+ "-b"
+ "-o" "aln.bam"
+ "-@" threads
+ (intermediate (input "input_file"
+ #:type 'File
+ #:label "input file")
+ "bwa_mem/output"))
+ #:outputs (list (output "bam"
+ #:type 'File
+ #:binding '((glob . "aln.bam"))))
+ #:other `((doc . "samtools view to convert sam format to bam format")
+ (requirements ,samtools-docker)))
+ (clitool-step "samtools_sort"
+ (list "samtools" "sort"
+ "-T" "sort.tmp"
+ "-o" "aln.sorted.bam"
+ "-@" threads
+ (intermediate (input "input_bamfile"
+ #:type 'File
+ #:label "Input bamfile")
+ "samtools_view/bam"))
+ #:outputs (list (output "sorted_bam"
+ #:type 'File
+ #:binding '((glob . "aln.sorted.bam"))))
+ #:other `((doc . "samtools sort, sort given bam file")
+ (requirements ,samtools-docker)))
+ (clitool-step "freebayes"
+ (list "freebayes"
+ "--ploidy" "1"
+ "--bam" (intermediate (input "bam" #:type 'File)
+ "samtools_sort/sorted_bam")
+ "-f" ref-fasta)
+ #:outputs (list (output "vcf" #:type 'stdout))
+ #:stdout "var.vcf")
+ (clitool-step "bcftools_view_exclude_ref"
+ (list "bcftools" "view"
+ "--no-version" "-Ou"
+ "-e'type=ref'"
+ "--threads" threads
+ (intermediate (input "vcf" #:type 'File)
+ "freebayes/vcf"))
+ #:outputs (list (output "bcf" #:type 'stdout))
+ #:other `((hints ,bcftools-docker)))
+ (clitool-step "bcftools_norm"
+ (list "bcftools" "norm"
+ "-Ob"
+ "-o" "normalized.bcf"
+ "-f" ref-fasta
+ "--threads" threads
+ (intermediate (input "bcf" #:type 'File)
+ "bcftools_view_exclude_ref/bcf"))
+ #:outputs (list (output "normalized_bcf"
+ #:type 'File
+ #:binding '((glob . "normalized.bcf"))))
+ #:other `((hints ,bcftools-docker)))
+ (clitool-step "bcftools_index_after_normalization"
+ (list "bcftools" "index"
+ (intermediate (input "bcf" #:type 'File)
+ "bcftools_norm/normalized_bcf"))
+ #:outputs (list (output "indexed"
+ #:type 'File
+ #:binding '((glob . "$(inputs.bcf.basename)"))
+ #:other '((secondary-files . #(".csi")))))
+ #:other `((hints ,bcftools-docker
+ (Initial-work-dir-requirement
+ (listing . #("$(inputs.bcf)"))))))
+ (clitool-step "bcftools_view_qc"
+ (list "bcftools" "view"
+ "-i" "'QUAL > 10 && GT=\"a\"'"
+ "-Oz"
+ "--threads" threads
+ (intermediate (input "bcf"
+ #:type 'File
+ #:other '((secondary-files . #(".csi"))))
+ "bcftools_index_after_normalization/indexed"))
+ #:outputs (list (output "vcf" #:type 'stdout))
+ #:other `((hints ,bcftools-docker)))
+ (clitool-step "bcftools_index_after_qc"
+ (list "bcftools" "index"
+ (intermediate (input "bcf" #:type 'File)
+ "bcftools_view_qc/vcf"))
+ #:outputs (list (output "indexed"
+ #:type 'File
+ #:binding '((glob . "$(inputs.bcf.basename)"))
+ #:other '((secondary-files . #(".csi")))))
+ #:other `((hints ,bcftools-docker
+ (Initial-work-dir-requirement
+ (listing . #("$(inputs.bcf)"))))))
+ (clitool-step "bcftools_consensus"
+ (list "bcftools" "consensus"
+ "-i" "'QUAL > 10 && GT=\"a\"'"
+ "-Hla"
+ "-f" ref-fasta
+ (intermediate (input "vcf"
+ #:type 'File
+ #:other '((secondary-files . #(".csi"))))
+ "bcftools_index_after_qc/indexed"))
+ #:outputs (list (output "out_fasta" #:type 'stdout))
+ #:stdout "sequence.fasta"
+ #:other `((hints ,bcftools-docker)))
+ (clitool-step "set_sample_id"
+ (list "sed" (intermediate (input "fasta" #:type 'File)
+ "bcftools_consensus/out_fasta"))
+ #:additional-inputs (list sample-id)
+ #:outputs (list (output "out_fasta" #:type 'stdout))
+ #:stdout "sequence.fasta"
+ #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))))
+ (list (workflow-output "out_fasta"
+ #:type 'File
+ #:source "set_sample_id/out_fasta"))))
+
+(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
+ (cut scm->yaml fastq2fasta <>))