From 68f8201eb21d9acfbf4a6853da19b374b4dec10c Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 7 Mar 2021 03:55:54 +0530 Subject: Implement bowtie, bwa-mem variant calling in ccwl --- scripts/variant-calling-bowtie-bwa-mem.scm | 102 +++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 scripts/variant-calling-bowtie-bwa-mem.scm diff --git a/scripts/variant-calling-bowtie-bwa-mem.scm b/scripts/variant-calling-bowtie-bwa-mem.scm new file mode 100644 index 0000000..59c1337 --- /dev/null +++ b/scripts/variant-calling-bowtie-bwa-mem.scm @@ -0,0 +1,102 @@ +;; fastq sensitive short read SARS-CoV-2 mutation calling + +(use-modules (ccwl ccwl)) + +(define fastq-1 + (input "fastq_1" #:type 'File)) + +(define fastq-2 + (input "fastq_2" #:type 'File)) + +(define bowtie2-index + (input "bowtie2_index" + #:type 'File + #:other '((secondary-files . #(".1.bt2" ".2.bt2" ".3.bt2" ".4.bt2" + ".rev.1.bt2" ".rev.2.bt2"))))) + +(define bwa-mem-index + (input "bwa_mem_index" + #:type 'File + #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))) + +(define reference + (input "reference" + #:type 'File + #:other '((secondary-files . #(".fai"))))) + +(define (bam2vcf prefix) + (workflow (string-append prefix "_bam2vcf") + (list (pipeline "samtools" + (list (command "samtools_view" + (list "samtools" "view" "-h" "-b" "-F" "4" + (input (string-append prefix "_input") #:type 'File))) + (command "samtools_sort" + (list "samtools" "sort" "-n")) + (command "samtools_fixmate" + (list "samtools" "fixmate" "-m" "-" "-")) + (command "samtools_sort2" + (list "samtools" "sort" "-")) + (command "samtools_markdup" + (list "samtools" "markdup" "-" "-") + #:other '((stdout . "dedup.bam")))) + (list (output "dedup_bam" + #:type 'File + #:source "samtools_markdup/stdout"))) + (command "samtools_index" + (list "samtools" "index" + (input "dedup_bam" #:type 'File)) + #:outputs (list (output "indexed_dedup_bam" + #:type 'File + #:binding '((glob . "$(inputs.dedup_bam.basename)")) + #:other '((secondary-files . #(".bai"))))) + #:other `((hints (Initial-work-dir-requirement + (listing . #("$(inputs.dedup_bam)")))))) + (pipeline "freebayes" + (list (command "freebayes" + (list "freebayes" "--ploidy" "1" + "--bam" (input "indexed_dedup_bam" + #:type 'File + #:other '((secondary-files . #("bai")))) + "-f" reference)) + (command "bcftools_view" + (list "bcftools" "view" + "-i" "QUAL > 10" + "-Oz") + #:other `((stdout . ,(string-append prefix "_output.vcf.gz"))))) + (list (output "vcf" + #:type 'File + #:source "bcftools_view/stdout"))) + (command "tabix" + (list "tabix" (input "vcf" #:type 'File)) + #:outputs (list (output "indexed_vcf" #:type 'File + #:binding '((glob . "$(inputs.vcf.basename)")) + #:other '((secondary-files . #(".tbi"))))) + #:other `((hints (Initial-work-dir-requirement + (listing . #("$(inputs.vcf)"))))))) + (list (output (string-append prefix "_output") + #:type 'File + #:source "tabix/indexed_vcf")))) + +(define variant-calling + (workflow "variant-calling" + (list (command "bowtie" + (list "bowtie2" "-p" "16" + "-x" bowtie2-index + "-1" fastq-1 + "-2" fastq-2) + #:outputs (list (output "bowtie_input" #:type 'stdout))) + (bam2vcf "bowtie") + (command "bwa_mem" + (list "bwa" "mem" "-K" "100000000" "-v" "3" "-t" "16" + "-Y" bwa-mem-index fastq-1 fastq-2) + #:outputs (list (output "bwa_mem_input" #:type 'stdout))) + (bam2vcf "bwa_mem")) + (list (output "bowtie_output" + #:type 'File + #:source "bowtie_bam2vcf/bowtie_output") + (output "bwa_mem_output" + #:type 'File + #:source "bwa_mem_bam2vcf/bwa_mem_output")))) + +(write-cwl variant-calling + "workflows/variant-calling-bowtie-bwa-mem.cwl") -- cgit v1.2.3