aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorArun Isaac2021-03-07 03:55:54 +0530
committerArun Isaac2021-03-07 03:55:54 +0530
commit68f8201eb21d9acfbf4a6853da19b374b4dec10c (patch)
treeb43d943bb569ecb49c93df95f3662e4f73e86cc2 /scripts
parentdb577d11808d91ff5c0b3643a37b5ce9765afc78 (diff)
downloadbh20-seq-resource-68f8201eb21d9acfbf4a6853da19b374b4dec10c.tar.gz
bh20-seq-resource-68f8201eb21d9acfbf4a6853da19b374b4dec10c.tar.lz
bh20-seq-resource-68f8201eb21d9acfbf4a6853da19b374b4dec10c.zip
Implement bowtie, bwa-mem variant calling in ccwl
Diffstat (limited to 'scripts')
-rw-r--r--scripts/variant-calling-bowtie-bwa-mem.scm102
1 files changed, 102 insertions, 0 deletions
diff --git a/scripts/variant-calling-bowtie-bwa-mem.scm b/scripts/variant-calling-bowtie-bwa-mem.scm
new file mode 100644
index 0000000..59c1337
--- /dev/null
+++ b/scripts/variant-calling-bowtie-bwa-mem.scm
@@ -0,0 +1,102 @@
+;; fastq sensitive short read SARS-CoV-2 mutation calling
+
+(use-modules (ccwl ccwl))
+
+(define fastq-1
+ (input "fastq_1" #:type 'File))
+
+(define fastq-2
+ (input "fastq_2" #:type 'File))
+
+(define bowtie2-index
+ (input "bowtie2_index"
+ #:type 'File
+ #:other '((secondary-files . #(".1.bt2" ".2.bt2" ".3.bt2" ".4.bt2"
+ ".rev.1.bt2" ".rev.2.bt2")))))
+
+(define bwa-mem-index
+ (input "bwa_mem_index"
+ #:type 'File
+ #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))))
+
+(define reference
+ (input "reference"
+ #:type 'File
+ #:other '((secondary-files . #(".fai")))))
+
+(define (bam2vcf prefix)
+ (workflow (string-append prefix "_bam2vcf")
+ (list (pipeline "samtools"
+ (list (command "samtools_view"
+ (list "samtools" "view" "-h" "-b" "-F" "4"
+ (input (string-append prefix "_input") #:type 'File)))
+ (command "samtools_sort"
+ (list "samtools" "sort" "-n"))
+ (command "samtools_fixmate"
+ (list "samtools" "fixmate" "-m" "-" "-"))
+ (command "samtools_sort2"
+ (list "samtools" "sort" "-"))
+ (command "samtools_markdup"
+ (list "samtools" "markdup" "-" "-")
+ #:other '((stdout . "dedup.bam"))))
+ (list (output "dedup_bam"
+ #:type 'File
+ #:source "samtools_markdup/stdout")))
+ (command "samtools_index"
+ (list "samtools" "index"
+ (input "dedup_bam" #:type 'File))
+ #:outputs (list (output "indexed_dedup_bam"
+ #:type 'File
+ #:binding '((glob . "$(inputs.dedup_bam.basename)"))
+ #:other '((secondary-files . #(".bai")))))
+ #:other `((hints (Initial-work-dir-requirement
+ (listing . #("$(inputs.dedup_bam)"))))))
+ (pipeline "freebayes"
+ (list (command "freebayes"
+ (list "freebayes" "--ploidy" "1"
+ "--bam" (input "indexed_dedup_bam"
+ #:type 'File
+ #:other '((secondary-files . #("bai"))))
+ "-f" reference))
+ (command "bcftools_view"
+ (list "bcftools" "view"
+ "-i" "QUAL > 10"
+ "-Oz")
+ #:other `((stdout . ,(string-append prefix "_output.vcf.gz")))))
+ (list (output "vcf"
+ #:type 'File
+ #:source "bcftools_view/stdout")))
+ (command "tabix"
+ (list "tabix" (input "vcf" #:type 'File))
+ #:outputs (list (output "indexed_vcf" #:type 'File
+ #:binding '((glob . "$(inputs.vcf.basename)"))
+ #:other '((secondary-files . #(".tbi")))))
+ #:other `((hints (Initial-work-dir-requirement
+ (listing . #("$(inputs.vcf)")))))))
+ (list (output (string-append prefix "_output")
+ #:type 'File
+ #:source "tabix/indexed_vcf"))))
+
+(define variant-calling
+ (workflow "variant-calling"
+ (list (command "bowtie"
+ (list "bowtie2" "-p" "16"
+ "-x" bowtie2-index
+ "-1" fastq-1
+ "-2" fastq-2)
+ #:outputs (list (output "bowtie_input" #:type 'stdout)))
+ (bam2vcf "bowtie")
+ (command "bwa_mem"
+ (list "bwa" "mem" "-K" "100000000" "-v" "3" "-t" "16"
+ "-Y" bwa-mem-index fastq-1 fastq-2)
+ #:outputs (list (output "bwa_mem_input" #:type 'stdout)))
+ (bam2vcf "bwa_mem"))
+ (list (output "bowtie_output"
+ #:type 'File
+ #:source "bowtie_bam2vcf/bowtie_output")
+ (output "bwa_mem_output"
+ #:type 'File
+ #:source "bwa_mem_bam2vcf/bwa_mem_output"))))
+
+(write-cwl variant-calling
+ "workflows/variant-calling-bowtie-bwa-mem.cwl")