about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/fastq2fasta.scm438
1 files changed, 165 insertions, 273 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
index c014852..9880c0e 100644
--- a/scripts/fastq2fasta.scm
+++ b/scripts/fastq2fasta.scm
@@ -13,280 +13,172 @@
          #:default 4))
 
 (define ref-fasta
-  (input "ref_fasta" #:type 'File))
+  (input "ref_fasta"
+         #:type 'File
+         #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))))
+
+(define output-sam
+  (input "output_sam"
+         #:type 'string
+         #:label "sam file to output results to"
+         #:default "out.sam"))
+
+(define group-header-line
+  (input "group_header_line"
+         #:type 'string?
+         #:label "read group header line such as '@RG\tID:foo\tSM:bar'"))
+
+(define fastq-forward
+  (input "fastq_forward"
+         #:type 'File
+         #:label "input fastq file to map (single-end or forward for pair-end)"))
+
+(define fastq-reverse
+  (input "fastq_reverse"
+         #:type 'File?
+         #:label "input fastq file to map (reverse for pair-end)"))
 
 (define sample-id
   (input "sample_id" #:type 'string))
 
-(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bwa" "mem"
-           "-t" threads
-           "-o" (input "output_sam"
-                       #:type 'string
-                       #:label "sam file to output results to"
-                       #:default "out.sam")
-           "-R" (input "group_header_line"
-                       #:type 'string?
-                       #:label "read group header line such as '@RG\tID:foo\tSM:bar'")
-           (input "index_base"
-                  #:type 'File
-                  #:label "fasta file for index basename"
-                  #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))
-           (input "fastq_forward"
-                  #:type 'File
-                  #:label "input fastq file to map (single-end or forward for pair-end)")
-           (input "fastq_reverse"
-                  #:type 'File?
-                  #:label "input fastq file to map (reverse for pair-end)"))
-     #:outputs (list (output "output"
-                             #:type 'File
-                             #:binding '((glob . "$(inputs.output_sam)")))
-                     (output "stdout" #:type 'stdout)
-                     (output "stderr" #:type 'stderr))
-     #:stdout "bwa-mem-stdout.log"
-     #:stderr "bwa-mem-stderr.log"
-     #:other '((requirements
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "samtools" "view"
-           "-@" threads
-           "-b" (input "output_bam"
-                       #:type 'boolean
-                       #:label "output BAM"
-                       #:default #t)
-           "-o" (input "output_filename"
-                       #:type 'string
-                       #:label "output file name"
-                       #:default "aln.bam")
-           (input "input_file"
-                  #:type 'File
-                  #:label "input file")
-           "-h" (input "include_header"
-                       #:type 'boolean
-                       #:label "include the header in the output"
-                       #:default #f)
-           "-S" (input "ignore_previous_version"
-                       #:type 'boolean
-                       #:label "ignored for compatibility with previous samtools versions"
-                       #:default #f)
-           "-F" (input "filter_alignments"
-                       #:type 'string?
-                       #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].")
-           "-q" (input "skip_alignments"
-                       #:type 'int?
-                       #:label "Skip alignments with MAPQ smaller than INT [0]."))
-     #:outputs (list (output "bam"
-                             #:type 'File
-                             #:binding '((glob . "$(inputs.output_filename)")))
-                     (output "stdout" #:type 'stdout)
-                     (output "stderr" #:type 'stderr))
-     #:stdout "samtools-view-stdout.log"
-     #:stderr "samtools-view-stderr.log"
-     #:other '((doc . "samtools view to convert sam format to bam format")
-               (requirements
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "samtools" "sort"
-           "-@" threads
-           "-T" (input "tmpfile"
-                       #:type 'string
-                       #:label "Write temporary files to PREFIX.nnnn.bam"
-                       #:default "sort.tmp")
-           "-o" (input "output_bam"
-                       #:type 'string
-                       #:label "Write final output to FILENAME"
-                       #:default "aln.sorted.bam")
-           (input "input_bamfile"
-                  #:type 'File
-                  #:label "Input bamfile"))
-     #:outputs (list (output "sorted_bam"
-                             #:type 'File
-                             #:binding '((glob . "$(inputs.output_bam)")))
-                     (output "stdout" #:type 'stdout)
-                     (output "stderr" #:type 'stderr))
-     #:stdout "samtools-sort-stdout.log"
-     #:stderr "samtools-sort-stderr.log"
-     #:other '((doc . "samtools sort, sort given bam file")
-               (requirements
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/freebayes.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "freebayes" "--ploidy" "1"
-           "--bam" (input "bam" #:type 'File)
-           "-f" ref-fasta)
-     #:outputs (list (output "vcf" #:type 'stdout))
-     #:stdout "var.vcf")
-   <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bcftools" "view" "--no-version" "-Ou"
-           "-e'type=ref'" "--threads" threads
-           (input "vcf" #:type 'File))
-     #:outputs (list (output "bcf" #:type 'stdout))
-     #:stdout "$(inputs.vcf.nameroot).without-ref.bcf")
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bcftools" "norm" "-Ob"
-           "-f" ref-fasta
-           "-o" (input "output_name" #:type 'string #:default "normalized.bcf")
-           "--threads" threads
-           (input "bcf" #:type 'File))
-     #:outputs (list (output "normalized_bcf"
-                             #:type 'File
-                             #:binding '((glob . "$(inputs.output_name)"))))
-     #:other '((hints
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bcftools" "index" (input "bcf" #:type 'File))
-     #:outputs (list (output "indexed"
-                             #:type 'File
-                             #:binding '((glob . "$(inputs.bcf.basename)"))
-                             #:other '((secondary-files . #(".csi")))))
-     #:other '((hints
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))
-                (Initial-work-dir-requirement
-                 (listing . #("$(inputs.bcf)"))))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz"
-           "--threads" threads
-           (input "bcf"
-                  #:type 'File
-                  #:other '((secondary-files . #(".csi")))))
-     #:outputs (list (output "vcf" #:type 'stdout))
-     #:other '((hints
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "bcftools" "consensus"
-           "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla"
-           "-f" ref-fasta
-           (input "vcf"
-                  #:type 'File
-                  #:other '((secondary-files . #(".csi")))))
-     #:outputs (list (output "out_fasta" #:type 'stdout))
-     #:stdout "sequence.fasta"
-     #:other '((hints
-                (Docker-requirement
-                 (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
-    <>))
-
-(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl"
-  (cut scm->yaml
-    (clitool
-     (list "sed" (input "fasta" #:type 'File))
-     #:additional-inputs (list (input "sample_id" #:type 'string))
-     #:outputs (list (output "out_fasta" #:type 'stdout))
-     #:stdout "sequence.fasta"
-     #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))
-    <>))
-
-(let ((fasta (input "fasta" #:type 'File)))
-  (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl"
-    (cut scm->yaml
-      (workflow
-       (list (step "freebayes" "freebayes.cwl"
-                   `((bam . ,(input "bam" #:type 'File))
-                     (ref_fasta . ,fasta))
-                   (list "vcf"))
-             (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl"
-                   `((vcf . "freebayes/vcf")
-                     (threads . ,threads))
-                   (list "bcf"))
-             (step "bcftools_norm" "bcftools-norm.cwl"
-                   `((ref_fasta . ,fasta)
-                     (bcf . "bcftools_view_exclude_ref/bcf")
-                     (threads . ,threads))
-                   (list "normalized_bcf"))
-             (step "bcftools_index_after_normalization" "bcftools-index.cwl"
-                   '((bcf . "bcftools_norm/normalized_bcf"))
-                   (list "indexed"))
-             (step "bcftools_view_qc" "bcftools-view-qc.cwl"
-                   `((bcf . "bcftools_index_after_normalization/indexed")
-                     (threads . ,threads))
-                   (list "vcf"))
-             (step "bcftools_index_after_qc" "bcftools-index.cwl"
-                   '((bcf . "bcftools_view_qc/vcf"))
-                   (list "indexed"))
-             (step "bcftools_consensus" "bcftools-consensus.cwl"
-                   `((ref_fasta . ,fasta)
-                     (vcf . "bcftools_index_after_qc/indexed"))
-                   (list "out_fasta"))
-             (step "set_sample_id" "set-sample-id.cwl"
-                   `((fasta . "bcftools_consensus/out_fasta")
-                     (sample_id . ,sample-id))
-                   (list "out_fasta")))
-       (list (workflow-output "out_fasta"
-                              #:type 'File
-                              #:source "set_sample_id/out_fasta")))
-      <>)))
-
-(let ((ref-fasta (input "ref_fasta"
-                        #:type 'File
-                        #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai"))))))
-  (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
-    (cut scm->yaml
-      (workflow
-       (list (step "bwa-mem" "bwa-mem.cwl"
-                   `((threads . ,threads)
-                     (fastq_forward . ,(input "fastq_forward" #:type 'File))
-                     (fastq_reverse . ,(input "fastq_reverse" #:type 'File?))
-                     (index_base . ,ref-fasta))
-                   (list "output"))
-             (step "samtools-view" "samtools-view.cwl"
-                   `((threads . ,threads)
-                     (input_file . "bwa-mem/output"))
-                   (list "bam"))
-             (step "samtools-sort" "samtools-sort.cwl"
-                   `((input_bamfile . "samtools-view/bam")
-                     (threads . ,threads))
-                   (list "sorted_bam"))
-             (step "bam2fasta" "bam2fasta.cwl"
-                   `((bam . "samtools-sort/sorted_bam")
-                     (fasta . ,ref-fasta)
-                     (threads . ,threads)
-                     (sample_id . ,sample-id))
-                   (list "out_fasta")))
-       (list (workflow-output "out_fasta"
-                              #:type 'File
-                              #:source "bam2fasta/out_fasta")
-             (workflow-output "out_metadata"
-                              #:type 'File?
-                              #:source (input "metadata" #:type 'File?)))
-       #:other '((requirements (Subworkflow-feature-requirement))
-                 (hints (Resource-requirement (ram-min . 3000)))))
-      <>)))
+(define bwa-docker
+  '(Docker-requirement
+    (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))
+
+(define samtools-docker
+  '(Docker-requirement
+    (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))
+
+(define bcftools-docker
+  '(Docker-requirement
+    (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))
+
+(define fastq2fasta
+  (workflow
+   (list (clitool-step "bwa_mem"
+                       (list "bwa" "mem"
+                             "-t" threads
+                             "-o" output-sam
+                             "-R" group-header-line
+                             ref-fasta
+                             fastq-forward
+                             fastq-reverse)
+                       #:outputs (list (output "output"
+                                               #:type 'File
+                                               #:binding '((glob . "$(inputs.output_sam)"))))
+                       #:other `((requirements ,bwa-docker)))
+         (clitool-step "samtools_view"
+                       (list "samtools" "view"
+                             "-b"
+                             "-o" "aln.bam"
+                             "-@" threads
+                             (intermediate (input "input_file"
+                                                  #:type 'File
+                                                  #:label "input file")
+                                           "bwa_mem/output"))
+                       #:outputs (list (output "bam"
+                                               #:type 'File
+                                               #:binding '((glob . "aln.bam"))))
+                       #:other `((doc . "samtools view to convert sam format to bam format")
+                                 (requirements ,samtools-docker)))
+         (clitool-step "samtools_sort"
+                       (list "samtools" "sort"
+                             "-T" "sort.tmp"
+                             "-o" "aln.sorted.bam"
+                             "-@" threads
+                             (intermediate (input "input_bamfile"
+                                                  #:type 'File
+                                                  #:label "Input bamfile")
+                                           "samtools_view/bam"))
+                       #:outputs (list (output "sorted_bam"
+                                               #:type 'File
+                                               #:binding '((glob . "aln.sorted.bam"))))
+                       #:other `((doc . "samtools sort, sort given bam file")
+                                 (requirements ,samtools-docker)))
+         (clitool-step "freebayes"
+                       (list "freebayes"
+                             "--ploidy" "1"
+                             "--bam" (intermediate (input "bam" #:type 'File)
+                                                   "samtools_sort/sorted_bam")
+                             "-f" ref-fasta)
+                       #:outputs (list (output "vcf" #:type 'stdout))
+                       #:stdout "var.vcf")
+         (clitool-step "bcftools_view_exclude_ref"
+                       (list "bcftools" "view"
+                             "--no-version" "-Ou"
+                             "-e'type=ref'"
+                             "--threads" threads
+                             (intermediate (input "vcf" #:type 'File)
+                                           "freebayes/vcf"))
+                       #:outputs (list (output "bcf" #:type 'stdout))
+                       #:other `((hints ,bcftools-docker)))
+         (clitool-step "bcftools_norm"
+                       (list "bcftools" "norm"
+                             "-Ob"
+                             "-o" "normalized.bcf"
+                             "-f" ref-fasta
+                             "--threads" threads
+                             (intermediate (input "bcf" #:type 'File)
+                                           "bcftools_view_exclude_ref/bcf"))
+                       #:outputs (list (output "normalized_bcf"
+                                               #:type 'File
+                                               #:binding '((glob . "normalized.bcf"))))
+                       #:other `((hints ,bcftools-docker)))
+         (clitool-step "bcftools_index_after_normalization"
+                       (list "bcftools" "index"
+                             (intermediate (input "bcf" #:type 'File)
+                                           "bcftools_norm/normalized_bcf"))
+                       #:outputs (list (output "indexed"
+                                               #:type 'File
+                                               #:binding '((glob . "$(inputs.bcf.basename)"))
+                                               #:other '((secondary-files . #(".csi")))))
+                       #:other `((hints ,bcftools-docker
+                                        (Initial-work-dir-requirement
+                                         (listing . #("$(inputs.bcf)"))))))
+         (clitool-step "bcftools_view_qc"
+                       (list "bcftools" "view"
+                             "-i" "'QUAL > 10 && GT=\"a\"'"
+                             "-Oz"
+                             "--threads" threads
+                             (intermediate (input "bcf"
+                                                  #:type 'File
+                                                  #:other '((secondary-files . #(".csi"))))
+                                           "bcftools_index_after_normalization/indexed"))
+                       #:outputs (list (output "vcf" #:type 'stdout))
+                       #:other `((hints ,bcftools-docker)))
+         (clitool-step "bcftools_index_after_qc"
+                       (list "bcftools" "index"
+                             (intermediate (input "bcf" #:type 'File)
+                                           "bcftools_view_qc/vcf"))
+                       #:outputs (list (output "indexed"
+                                               #:type 'File
+                                               #:binding '((glob . "$(inputs.bcf.basename)"))
+                                               #:other '((secondary-files . #(".csi")))))
+                       #:other `((hints ,bcftools-docker
+                                        (Initial-work-dir-requirement
+                                         (listing . #("$(inputs.bcf)"))))))
+         (clitool-step "bcftools_consensus"
+                       (list "bcftools" "consensus"
+                             "-i" "'QUAL > 10 && GT=\"a\"'"
+                             "-Hla"
+                             "-f" ref-fasta
+                             (intermediate (input "vcf"
+                                                  #:type 'File
+                                                  #:other '((secondary-files . #(".csi"))))
+                                           "bcftools_index_after_qc/indexed"))
+                       #:outputs (list (output "out_fasta" #:type 'stdout))
+                       #:stdout "sequence.fasta"
+                       #:other `((hints ,bcftools-docker)))
+         (clitool-step "set_sample_id"
+                       (list "sed" (intermediate (input "fasta" #:type 'File)
+                                                 "bcftools_consensus/out_fasta"))
+                       #:additional-inputs (list sample-id)
+                       #:outputs (list (output "out_fasta" #:type 'stdout))
+                       #:stdout "sequence.fasta"
+                       #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))))
+   (list (workflow-output "out_fasta"
+                          #:type 'File
+                          #:source "set_sample_id/out_fasta"))))
+
+(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
+  (cut scm->yaml fastq2fasta <>))