about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2021-02-21 19:45:49 +0530
committerArun Isaac2021-02-21 19:45:49 +0530
commit85c63fecd1eb20aebb7ec8ce3137af253619b01b (patch)
tree52263e1612ab298fe5410782847c5523020b95d1
parent2ed13ca5e4c779b59d04e574a496b6cb69bcba44 (diff)
downloadbh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.tar.gz
bh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.tar.lz
bh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.zip
Implement fastq2fasta workflows in scheme
-rw-r--r--scripts/fastq2fasta.scm294
1 files changed, 294 insertions, 0 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
new file mode 100644
index 0000000..9eaa88f
--- /dev/null
+++ b/scripts/fastq2fasta.scm
@@ -0,0 +1,294 @@
+;;
+;; fastq2fasta.cwl workflow
+;;
+
+(add-to-load-path "./scripts")
+
+(use-modules (srfi srfi-26)
+             (generate-cwl)
+             (yaml))
+
+(define threads
+  (input "threads"
+         #:type 'int
+         #:label "number of threads"
+         #:default 4))
+
+(define ref-fasta
+  (input "ref_fasta" #:type 'File))
+
+(define sample-id
+  (input "sample_id" #:type 'string))
+
+(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bwa" "mem"
+           "-t" threads
+           "-o" (input "output_sam"
+                       #:type 'string
+                       #:label "sam file to output results to"
+                       #:default "out.sam")
+           "-R" (input "group_header_line"
+                       #:type 'string?
+                       #:label "read group header line such as '@RG\tID:foo\tSM:bar'")
+           (input "index_base"
+                  #:type 'File
+                  #:label "fasta file for index basename"
+                  #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))
+           (input "fastq_forward"
+                  #:type 'File
+                  #:label "input fastq file to map (single-end or forward for pair-end)")
+           (input "fastq_reverse"
+                  #:type 'File?
+                  #:label "input fastq file to map (reverse for pair-end)"))
+     #:outputs (list (output "output"
+                             #:type 'File
+                             #:binding '((glob . "$(inputs.output_sam)")))
+                     (output "stdout" #:type 'stdout)
+                     (output "stderr" #:type 'stderr))
+     #:stdout "bwa-mem-stdout.log"
+     #:stderr "bwa-mem-stderr.log"
+     #:other '((requirements
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "samtools" "view"
+           "-@" threads
+           "-b" (input "output_bam"
+                       #:type 'boolean
+                       #:label "output BAM"
+                       #:default #t)
+           "-o" (input "output_filename"
+                       #:type 'string
+                       #:label "output file name"
+                       #:default "aln.bam")
+           (input "input_file"
+                  #:type 'File
+                  #:label "input file")
+           "-h" (input "include_header"
+                       #:type 'boolean
+                       #:label "include the header in the output"
+                       #:default #f)
+           "-S" (input "ignore_previous_version"
+                       #:type 'boolean
+                       #:label "ignored for compatibility with previous samtools versions"
+                       #:default #f)
+           "-F" (input "filter_alignments"
+                       #:type 'string?
+                       #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].")
+           "-q" (input "skip_alignments"
+                       #:type 'int?
+                       #:label "Skip alignments with MAPQ smaller than INT [0]."))
+     #:outputs (list (output "bam"
+                             #:type 'File
+                             #:binding '((glob . "$(inputs.output_filename)")))
+                     (output "stdout" #:type 'stdout)
+                     (output "stderr" #:type 'stderr))
+     #:stdout "samtools-view-stdout.log"
+     #:stderr "samtools-view-stderr.log"
+     #:other '((doc . "samtools view to convert sam format to bam format")
+               (requirements
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "samtools" "sort"
+           "-@" threads
+           "-T" (input "tmpfile"
+                       #:type 'string
+                       #:label "Write temporary files to PREFIX.nnnn.bam"
+                       #:default "sort.tmp")
+           "-o" (input "output_bam"
+                       #:type 'string
+                       #:label "Write final output to FILENAME"
+                       #:default "aln.sorted.bam")
+           (input "input_bamfile"
+                  #:type 'File
+                  #:label "Input bamfile"))
+     #:outputs (list (output "sorted_bam"
+                             #:type 'File
+                             #:binding '((glob . "$(inputs.output_bam)")))
+                     (output "stdout" #:type 'stdout)
+                     (output "stderr" #:type 'stderr))
+     #:stdout "samtools-sort-stdout.log"
+     #:stderr "samtools-sort-stderr.log"
+     #:other '((doc . "samtools sort, sort given bam file")
+               (requirements
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/freebayes.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "freebayes" "--ploidy" "1"
+           "--bam" (input "bam" #:type 'File)
+           "-f" ref-fasta)
+     #:outputs (list (output "vcf" #:type 'stdout))
+     #:stdout "var.vcf")
+   <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bcftools" "view" "--no-version" "-Ou"
+           "-e'type=ref'" "--threads" threads
+           (input "vcf" #:type 'File))
+     #:outputs (list (output "bcf" #:type 'stdout))
+     #:stdout "$(inputs.vcf.nameroot).without-ref.bcf")
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bcftools" "norm" "-Ob"
+           "-f" ref-fasta
+           "-o" (input "output_name" #:type 'string #:default "normalized.bcf")
+           "--threads" threads
+           (input "bcf" #:type 'File))
+     #:outputs (list (output "normalized_bcf"
+                             #:type 'File
+                             #:binding '((glob . "$(inputs.output_name)"))))
+     #:other '((hints
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bcftools" "index" (input "bcf" #:type 'File))
+     #:outputs (list (output "indexed"
+                             #:type 'File
+                             #:binding '((glob . "$(inputs.bcf.basename)"))
+                             #:other '((secondary-files . #(".csi")))))
+     #:other '((hints
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))
+                (Initial-work-dir-requirement
+                 (listing . #("$(inputs.bcf)"))))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz"
+           "--threads" threads
+           (input "bcf"
+                  #:type 'File
+                  #:other '((secondary-files . #(".csi")))))
+     #:outputs (list (output "vcf" #:type 'stdout))
+     #:other '((hints
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "bcftools" "consensus"
+           "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla"
+           "-f" ref-fasta
+           (input "vcf"
+                  #:type 'File
+                  #:other '((secondary-files . #(".csi")))))
+     #:outputs (list (output "out_fasta" #:type 'stdout))
+     #:stdout "sequence.fasta"
+     #:other '((hints
+                (Docker-requirement
+                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+    <>))
+
+(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl"
+  (cut scm->yaml
+    (clitool
+     (list "sed" (input "fasta" #:type 'File))
+     #:additional-inputs (list (input "sample_id" #:type 'string))
+     #:outputs (list (output "out_fasta" #:type 'stdout))
+     #:stdout "sequence.fasta"
+     #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))
+    <>))
+
+(let ((fasta (input "fasta" #:type 'File)))
+  (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl"
+    (cut scm->yaml
+      (workflow
+       (list (step "freebayes" "freebayes.cwl"
+                   `((bam . ,(input "bam" #:type 'File))
+                     (ref_fasta . ,fasta))
+                   (list "vcf"))
+             (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl"
+                   `((vcf . "freebayes/vcf")
+                     (threads . ,threads))
+                   (list "bcf"))
+             (step "bcftools_norm" "bcftools-norm.cwl"
+                   `((ref_fasta . ,fasta)
+                     (bcf . "bcftools_view_exclude_ref/bcf")
+                     (threads . ,threads))
+                   (list "normalized_bcf"))
+             (step "bcftools_index_after_normalization" "bcftools-index.cwl"
+                   '((bcf . "bcftools_norm/normalized_bcf"))
+                   (list "indexed"))
+             (step "bcftools_view_qc" "bcftools-view-qc.cwl"
+                   `((bcf . "bcftools_index_after_normalization/indexed")
+                     (threads . ,threads))
+                   (list "vcf"))
+             (step "bcftools_index_after_qc" "bcftools-index.cwl"
+                   '((bcf . "bcftools_view_qc/vcf"))
+                   (list "indexed"))
+             (step "bcftools_consensus" "bcftools-consensus.cwl"
+                   `((ref_fasta . ,fasta)
+                     (vcf . "bcftools_index_after_qc/indexed"))
+                   (list "out_fasta"))
+             (step "set_sample_id" "set-sample-id.cwl"
+                   `((fasta . "bcftools_consensus/out_fasta")
+                     (sample_id . ,sample-id))
+                   (list "out_fasta")))
+       (list (workflow-output "out_fasta"
+                              #:type 'File
+                              #:source "set_sample_id/out_fasta")))
+      <>)))
+
+(let ((ref-fasta (input "ref_fasta"
+                        #:type 'File
+                        #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai"))))))
+  (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
+    (cut scm->yaml
+      (workflow
+       (list (step "bwa-mem" "bwa-mem.cwl"
+                   `((threads . ,threads)
+                     (fastq_forward . ,(input "fastq_forward" #:type 'File))
+                     (fastq_reverse . ,(input "fastq_reverse" #:type 'File?))
+                     (index_base . ,ref-fasta))
+                   (list "output"))
+             (step "samtools-view" "samtools-view.cwl"
+                   `((threads . ,threads)
+                     (input_file . "bwa-mem/output"))
+                   (list "bam"))
+             (step "samtools-sort" "samtools-sort.cwl"
+                   `((input_bamfile . "samtools-view/bam")
+                     (threads . ,threads))
+                   (list "sorted_bam"))
+             (step "bam2fasta" "bam2fasta.cwl"
+                   `((bam . "samtools-sort/sorted_bam")
+                     (fasta . ,ref-fasta)
+                     (threads . ,threads)
+                     (sample_id . ,sample-id))
+                   (list "out_fasta")))
+       (list (workflow-output "out_fasta"
+                              #:type 'File
+                              #:source "bam2fasta/out_fasta")
+             (workflow-output "out_metadata"
+                              #:type 'File?
+                              #:source (input "metadata" #:type 'File?)))
+       #:other '((requirements (Subworkflow-feature-requirement))
+                 (hints (Resource-requirement (ram-min . 3000)))))
+      <>)))