about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorArun Isaac2021-03-07 04:00:51 +0530
committerArun Isaac2021-03-07 04:00:51 +0530
commit3050817d690f200076964b620064f93c9bd48d3a (patch)
tree485725e3f552fb0fc71d60a7caa11d1947ac6dea /scripts
parent68f8201eb21d9acfbf4a6853da19b374b4dec10c (diff)
downloadbh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.tar.gz
bh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.tar.lz
bh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.zip
Rewrite fastq2fasta.scm using new ccwl API
Diffstat (limited to 'scripts')
-rw-r--r--scripts/fastq2fasta.scm211
1 files changed, 65 insertions, 146 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
index 9880c0e..ef4682c 100644
--- a/scripts/fastq2fasta.scm
+++ b/scripts/fastq2fasta.scm
@@ -2,9 +2,7 @@
 ;; fastq2fasta.cwl workflow
 ;;
 
-(use-modules (srfi srfi-26)
-             (generate-cwl generate-cwl)
-             (generate-cwl yaml))
+(use-modules (ccwl ccwl))
 
 (define threads
   (input "threads"
@@ -23,11 +21,6 @@
          #:label "sam file to output results to"
          #:default "out.sam"))
 
-(define group-header-line
-  (input "group_header_line"
-         #:type 'string?
-         #:label "read group header line such as '@RG\tID:foo\tSM:bar'"))
-
 (define fastq-forward
   (input "fastq_forward"
          #:type 'File
@@ -41,144 +34,70 @@
 (define sample-id
   (input "sample_id" #:type 'string))
 
-(define bwa-docker
-  '(Docker-requirement
-    (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))
-
-(define samtools-docker
-  '(Docker-requirement
-    (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))
-
-(define bcftools-docker
-  '(Docker-requirement
-    (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))
-
 (define fastq2fasta
-  (workflow
-   (list (clitool-step "bwa_mem"
-                       (list "bwa" "mem"
-                             "-t" threads
-                             "-o" output-sam
-                             "-R" group-header-line
-                             ref-fasta
-                             fastq-forward
-                             fastq-reverse)
-                       #:outputs (list (output "output"
-                                               #:type 'File
-                                               #:binding '((glob . "$(inputs.output_sam)"))))
-                       #:other `((requirements ,bwa-docker)))
-         (clitool-step "samtools_view"
-                       (list "samtools" "view"
-                             "-b"
-                             "-o" "aln.bam"
-                             "-@" threads
-                             (intermediate (input "input_file"
-                                                  #:type 'File
-                                                  #:label "input file")
-                                           "bwa_mem/output"))
-                       #:outputs (list (output "bam"
-                                               #:type 'File
-                                               #:binding '((glob . "aln.bam"))))
-                       #:other `((doc . "samtools view to convert sam format to bam format")
-                                 (requirements ,samtools-docker)))
-         (clitool-step "samtools_sort"
-                       (list "samtools" "sort"
-                             "-T" "sort.tmp"
-                             "-o" "aln.sorted.bam"
-                             "-@" threads
-                             (intermediate (input "input_bamfile"
-                                                  #:type 'File
-                                                  #:label "Input bamfile")
-                                           "samtools_view/bam"))
-                       #:outputs (list (output "sorted_bam"
-                                               #:type 'File
-                                               #:binding '((glob . "aln.sorted.bam"))))
-                       #:other `((doc . "samtools sort, sort given bam file")
-                                 (requirements ,samtools-docker)))
-         (clitool-step "freebayes"
-                       (list "freebayes"
-                             "--ploidy" "1"
-                             "--bam" (intermediate (input "bam" #:type 'File)
-                                                   "samtools_sort/sorted_bam")
-                             "-f" ref-fasta)
-                       #:outputs (list (output "vcf" #:type 'stdout))
-                       #:stdout "var.vcf")
-         (clitool-step "bcftools_view_exclude_ref"
-                       (list "bcftools" "view"
-                             "--no-version" "-Ou"
-                             "-e'type=ref'"
-                             "--threads" threads
-                             (intermediate (input "vcf" #:type 'File)
-                                           "freebayes/vcf"))
-                       #:outputs (list (output "bcf" #:type 'stdout))
-                       #:other `((hints ,bcftools-docker)))
-         (clitool-step "bcftools_norm"
-                       (list "bcftools" "norm"
-                             "-Ob"
-                             "-o" "normalized.bcf"
-                             "-f" ref-fasta
-                             "--threads" threads
-                             (intermediate (input "bcf" #:type 'File)
-                                           "bcftools_view_exclude_ref/bcf"))
-                       #:outputs (list (output "normalized_bcf"
-                                               #:type 'File
-                                               #:binding '((glob . "normalized.bcf"))))
-                       #:other `((hints ,bcftools-docker)))
-         (clitool-step "bcftools_index_after_normalization"
-                       (list "bcftools" "index"
-                             (intermediate (input "bcf" #:type 'File)
-                                           "bcftools_norm/normalized_bcf"))
-                       #:outputs (list (output "indexed"
-                                               #:type 'File
-                                               #:binding '((glob . "$(inputs.bcf.basename)"))
-                                               #:other '((secondary-files . #(".csi")))))
-                       #:other `((hints ,bcftools-docker
-                                        (Initial-work-dir-requirement
-                                         (listing . #("$(inputs.bcf)"))))))
-         (clitool-step "bcftools_view_qc"
-                       (list "bcftools" "view"
-                             "-i" "'QUAL > 10 && GT=\"a\"'"
-                             "-Oz"
-                             "--threads" threads
-                             (intermediate (input "bcf"
-                                                  #:type 'File
-                                                  #:other '((secondary-files . #(".csi"))))
-                                           "bcftools_index_after_normalization/indexed"))
-                       #:outputs (list (output "vcf" #:type 'stdout))
-                       #:other `((hints ,bcftools-docker)))
-         (clitool-step "bcftools_index_after_qc"
-                       (list "bcftools" "index"
-                             (intermediate (input "bcf" #:type 'File)
-                                           "bcftools_view_qc/vcf"))
-                       #:outputs (list (output "indexed"
-                                               #:type 'File
-                                               #:binding '((glob . "$(inputs.bcf.basename)"))
-                                               #:other '((secondary-files . #(".csi")))))
-                       #:other `((hints ,bcftools-docker
-                                        (Initial-work-dir-requirement
-                                         (listing . #("$(inputs.bcf)"))))))
-         (clitool-step "bcftools_consensus"
-                       (list "bcftools" "consensus"
-                             "-i" "'QUAL > 10 && GT=\"a\"'"
-                             "-Hla"
-                             "-f" ref-fasta
-                             (intermediate (input "vcf"
-                                                  #:type 'File
-                                                  #:other '((secondary-files . #(".csi"))))
-                                           "bcftools_index_after_qc/indexed"))
-                       #:outputs (list (output "out_fasta" #:type 'stdout))
-                       #:stdout "sequence.fasta"
-                       #:other `((hints ,bcftools-docker)))
-         (clitool-step "set_sample_id"
-                       (list "sed" (intermediate (input "fasta" #:type 'File)
-                                                 "bcftools_consensus/out_fasta"))
-                       #:additional-inputs (list sample-id)
-                       #:outputs (list (output "out_fasta" #:type 'stdout))
-                       #:stdout "sequence.fasta"
-                       #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))))
-   (list (workflow-output "out_fasta"
+  (workflow "fastq2fasta"
+            (list (pipeline "bwa_mem_to_normalized"
+                            (list (command "bwa_mem"
+                                           (list "bwa" "mem" "-t" threads
+                                                 ref-fasta fastq-forward fastq-reverse))
+                                  (command "samtools_view"
+                                           (list "samtools" "view" "-b" "-@" threads "-"))
+                                  (command "samtools_sort"
+                                           (list "samtools" "sort" "-T" "sort.tmp" "-@" threads "-"))
+                                  (command "freebayes"
+                                           (list "freebayes" "--ploidy" "1" "--stdin" "-f" ref-fasta))
+                                  (command "bcftools_view_exclude_ref"
+                                           (list "bcftools" "view"
+                                                 "--no-version" "-Ou" "-e'type=ref'"
+                                                 "--threads" threads "-"))
+                                  (command "bcftools_norm"
+                                           (list "bcftools" "norm" "-Ob"
+                                                 "-f" ref-fasta "--threads" threads "-")
+                                           #:other '((stdout . "normalized.bcf"))))
+                            (list (output "normalized"
+                                          #:type 'File
+                                          #:source "bcftools_norm/stdout")))
+                  (command "bcftools_index_after_normalization"
+                           (list "bcftools" "index" (input "normalized" #:type 'File))
+                           #:outputs (list (output "index_after_normalization"
+                                                   #:type 'File
+                                                   #:binding '((glob . "$(inputs.normalized.basename)"))
+                                                   #:other '((secondary-files . #(".csi")))))
+                           #:other `((hints (Initial-work-dir-requirement
+                                             (listing . #("$(inputs.normalized)"))))))
+                  (command "bcftools_view_qc"
+                           (list "bcftools" "view"
+                                 "-i" "'QUAL > 10 && GT=\"a\"'"
+                                 "-Oz" "--threads" threads
+                                 (input "index_after_normalization"
+                                        #:type 'File
+                                        #:other '((secondary-files . #(".csi")))))
+                           #:outputs (list (output "bcftools_view_qc_output_vcf" #:type 'stdout))
+                           #:other '((stdout . "bcftools_view_output.vcf.gz")))
+                  (command "bcftools_index_after_qc"
+                           (list "bcftools" "index" (input "bcftools_view_qc_output_vcf" #:type 'File))
+                           #:outputs (list (output "index_after_qc"
+                                                   #:type 'File
+                                                   #:binding '((glob . "$(inputs.bcftools_view_qc_output_vcf.basename)"))
+                                                   #:other '((secondary-files . #(".csi")))))
+                           #:other `((hints (Initial-work-dir-requirement
+                                             (listing . #("$(inputs.bcftools_view_qc_output_vcf)"))))))
+                  (pipeline "consensus"
+                            (list (command "bcftools_consensus"
+                                           (list "bcftools" "consensus"
+                                                 "-i" "'QUAL > 10 && GT=\"a\"'"
+                                                 "-Hla" "-f" ref-fasta
+                                                 (input "index_after_qc"
+                                                        #:type 'File
+                                                        #:other '((secondary-files . #(".csi"))))))
+                                  (command "set_sample_id"
+                                           (list "sed" "s/^>.*/>$(inputs.sample_id)/g")
+                                           #:additional-inputs (list sample-id)))
+                            (list (output "out_fasta"
+                                          #:type 'File
+                                          #:source "set_sample_id/stdout"))))
+            (list (output "out_fasta"
                           #:type 'File
-                          #:source "set_sample_id/out_fasta"))))
+                          #:source "consensus/out_fasta"))))
 
-(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
-  (cut scm->yaml fastq2fasta <>))
+(write-cwl fastq2fasta "workflows/fastq2fasta/fastq2fasta.cwl")