aboutsummaryrefslogtreecommitdiff
path: root/scripts/variant-calling-bowtie-bwa-mem.scm
blob: 59c13377a384bf3ea824d2296f1481ec21295965 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
;; fastq sensitive short read SARS-CoV-2 mutation calling

(use-modules (ccwl ccwl))

(define fastq-1
  (input "fastq_1" #:type 'File))

(define fastq-2
  (input "fastq_2" #:type 'File))

(define bowtie2-index
  (input "bowtie2_index"
         #:type 'File
         #:other '((secondary-files . #(".1.bt2" ".2.bt2" ".3.bt2" ".4.bt2"
                                        ".rev.1.bt2" ".rev.2.bt2")))))

(define bwa-mem-index
  (input "bwa_mem_index"
         #:type 'File
         #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))))

(define reference
  (input "reference"
         #:type 'File
         #:other '((secondary-files . #(".fai")))))

(define (bam2vcf prefix)
  (workflow (string-append prefix "_bam2vcf")
            (list (pipeline "samtools"
                            (list (command "samtools_view"
                                           (list "samtools" "view" "-h" "-b" "-F" "4"
                                                 (input (string-append prefix "_input") #:type 'File)))
                                  (command "samtools_sort"
                                           (list "samtools" "sort" "-n"))
                                  (command "samtools_fixmate"
                                           (list "samtools" "fixmate" "-m" "-" "-"))
                                  (command "samtools_sort2"
                                           (list "samtools" "sort" "-"))
                                  (command "samtools_markdup"
                                           (list "samtools" "markdup" "-" "-")
                                           #:other '((stdout . "dedup.bam"))))
                            (list (output "dedup_bam"
                                          #:type 'File
                                          #:source "samtools_markdup/stdout")))
                  (command "samtools_index"
                           (list "samtools" "index"
                                 (input "dedup_bam" #:type 'File))
                           #:outputs (list (output "indexed_dedup_bam"
                                                   #:type 'File
                                                   #:binding '((glob . "$(inputs.dedup_bam.basename)"))
                                                   #:other '((secondary-files . #(".bai")))))
                           #:other `((hints (Initial-work-dir-requirement
                                             (listing . #("$(inputs.dedup_bam)"))))))
                  (pipeline "freebayes"
                            (list (command "freebayes"
                                           (list "freebayes" "--ploidy" "1"
                                                 "--bam" (input "indexed_dedup_bam"
                                                                #:type 'File
                                                                #:other '((secondary-files . #("bai"))))
                                                 "-f" reference))
                                  (command "bcftools_view"
                                           (list "bcftools" "view"
                                                 "-i" "QUAL > 10"
                                                 "-Oz")
                                           #:other `((stdout . ,(string-append prefix "_output.vcf.gz")))))
                            (list (output "vcf"
                                          #:type 'File
                                          #:source "bcftools_view/stdout")))
                  (command "tabix"
                           (list "tabix" (input "vcf" #:type 'File))
                           #:outputs (list (output "indexed_vcf" #:type 'File
                                                   #:binding '((glob . "$(inputs.vcf.basename)"))
                                                   #:other '((secondary-files . #(".tbi")))))
                           #:other `((hints (Initial-work-dir-requirement
                                             (listing . #("$(inputs.vcf)")))))))
            (list (output (string-append prefix "_output")
                          #:type 'File
                          #:source "tabix/indexed_vcf"))))

(define variant-calling
  (workflow "variant-calling"
            (list (command "bowtie"
                           (list "bowtie2" "-p" "16"
                                 "-x" bowtie2-index
                                 "-1" fastq-1
                                 "-2" fastq-2)
                           #:outputs (list (output "bowtie_input" #:type 'stdout)))
                  (bam2vcf "bowtie")
                  (command "bwa_mem"
                           (list "bwa" "mem" "-K" "100000000" "-v" "3" "-t" "16"
                                 "-Y" bwa-mem-index fastq-1 fastq-2)
                           #:outputs (list (output "bwa_mem_input" #:type 'stdout)))
                  (bam2vcf "bwa_mem"))
            (list (output "bowtie_output"
                          #:type 'File
                          #:source "bowtie_bam2vcf/bowtie_output")
                  (output "bwa_mem_output"
                          #:type 'File
                          #:source "bwa_mem_bam2vcf/bwa_mem_output"))))

(write-cwl variant-calling
           "workflows/variant-calling-bowtie-bwa-mem.cwl")