aboutsummaryrefslogtreecommitdiff
path: root/scripts/fastq2fasta.scm
diff options
context:
space:
mode:
authorArun Isaac2021-03-07 04:00:51 +0530
committerArun Isaac2021-03-07 04:00:51 +0530
commit3050817d690f200076964b620064f93c9bd48d3a (patch)
tree485725e3f552fb0fc71d60a7caa11d1947ac6dea /scripts/fastq2fasta.scm
parent68f8201eb21d9acfbf4a6853da19b374b4dec10c (diff)
downloadbh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.tar.gz
bh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.tar.lz
bh20-seq-resource-3050817d690f200076964b620064f93c9bd48d3a.zip
Rewrite fastq2fasta.scm using new ccwl API
Diffstat (limited to 'scripts/fastq2fasta.scm')
-rw-r--r--scripts/fastq2fasta.scm211
1 files changed, 65 insertions, 146 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
index 9880c0e..ef4682c 100644
--- a/scripts/fastq2fasta.scm
+++ b/scripts/fastq2fasta.scm
@@ -2,9 +2,7 @@
;; fastq2fasta.cwl workflow
;;
-(use-modules (srfi srfi-26)
- (generate-cwl generate-cwl)
- (generate-cwl yaml))
+(use-modules (ccwl ccwl))
(define threads
(input "threads"
@@ -23,11 +21,6 @@
#:label "sam file to output results to"
#:default "out.sam"))
-(define group-header-line
- (input "group_header_line"
- #:type 'string?
- #:label "read group header line such as '@RG\tID:foo\tSM:bar'"))
-
(define fastq-forward
(input "fastq_forward"
#:type 'File
@@ -41,144 +34,70 @@
(define sample-id
(input "sample_id" #:type 'string))
-(define bwa-docker
- '(Docker-requirement
- (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))
-
-(define samtools-docker
- '(Docker-requirement
- (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))
-
-(define bcftools-docker
- '(Docker-requirement
- (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))
-
(define fastq2fasta
- (workflow
- (list (clitool-step "bwa_mem"
- (list "bwa" "mem"
- "-t" threads
- "-o" output-sam
- "-R" group-header-line
- ref-fasta
- fastq-forward
- fastq-reverse)
- #:outputs (list (output "output"
- #:type 'File
- #:binding '((glob . "$(inputs.output_sam)"))))
- #:other `((requirements ,bwa-docker)))
- (clitool-step "samtools_view"
- (list "samtools" "view"
- "-b"
- "-o" "aln.bam"
- "-@" threads
- (intermediate (input "input_file"
- #:type 'File
- #:label "input file")
- "bwa_mem/output"))
- #:outputs (list (output "bam"
- #:type 'File
- #:binding '((glob . "aln.bam"))))
- #:other `((doc . "samtools view to convert sam format to bam format")
- (requirements ,samtools-docker)))
- (clitool-step "samtools_sort"
- (list "samtools" "sort"
- "-T" "sort.tmp"
- "-o" "aln.sorted.bam"
- "-@" threads
- (intermediate (input "input_bamfile"
- #:type 'File
- #:label "Input bamfile")
- "samtools_view/bam"))
- #:outputs (list (output "sorted_bam"
- #:type 'File
- #:binding '((glob . "aln.sorted.bam"))))
- #:other `((doc . "samtools sort, sort given bam file")
- (requirements ,samtools-docker)))
- (clitool-step "freebayes"
- (list "freebayes"
- "--ploidy" "1"
- "--bam" (intermediate (input "bam" #:type 'File)
- "samtools_sort/sorted_bam")
- "-f" ref-fasta)
- #:outputs (list (output "vcf" #:type 'stdout))
- #:stdout "var.vcf")
- (clitool-step "bcftools_view_exclude_ref"
- (list "bcftools" "view"
- "--no-version" "-Ou"
- "-e'type=ref'"
- "--threads" threads
- (intermediate (input "vcf" #:type 'File)
- "freebayes/vcf"))
- #:outputs (list (output "bcf" #:type 'stdout))
- #:other `((hints ,bcftools-docker)))
- (clitool-step "bcftools_norm"
- (list "bcftools" "norm"
- "-Ob"
- "-o" "normalized.bcf"
- "-f" ref-fasta
- "--threads" threads
- (intermediate (input "bcf" #:type 'File)
- "bcftools_view_exclude_ref/bcf"))
- #:outputs (list (output "normalized_bcf"
- #:type 'File
- #:binding '((glob . "normalized.bcf"))))
- #:other `((hints ,bcftools-docker)))
- (clitool-step "bcftools_index_after_normalization"
- (list "bcftools" "index"
- (intermediate (input "bcf" #:type 'File)
- "bcftools_norm/normalized_bcf"))
- #:outputs (list (output "indexed"
- #:type 'File
- #:binding '((glob . "$(inputs.bcf.basename)"))
- #:other '((secondary-files . #(".csi")))))
- #:other `((hints ,bcftools-docker
- (Initial-work-dir-requirement
- (listing . #("$(inputs.bcf)"))))))
- (clitool-step "bcftools_view_qc"
- (list "bcftools" "view"
- "-i" "'QUAL > 10 && GT=\"a\"'"
- "-Oz"
- "--threads" threads
- (intermediate (input "bcf"
- #:type 'File
- #:other '((secondary-files . #(".csi"))))
- "bcftools_index_after_normalization/indexed"))
- #:outputs (list (output "vcf" #:type 'stdout))
- #:other `((hints ,bcftools-docker)))
- (clitool-step "bcftools_index_after_qc"
- (list "bcftools" "index"
- (intermediate (input "bcf" #:type 'File)
- "bcftools_view_qc/vcf"))
- #:outputs (list (output "indexed"
- #:type 'File
- #:binding '((glob . "$(inputs.bcf.basename)"))
- #:other '((secondary-files . #(".csi")))))
- #:other `((hints ,bcftools-docker
- (Initial-work-dir-requirement
- (listing . #("$(inputs.bcf)"))))))
- (clitool-step "bcftools_consensus"
- (list "bcftools" "consensus"
- "-i" "'QUAL > 10 && GT=\"a\"'"
- "-Hla"
- "-f" ref-fasta
- (intermediate (input "vcf"
- #:type 'File
- #:other '((secondary-files . #(".csi"))))
- "bcftools_index_after_qc/indexed"))
- #:outputs (list (output "out_fasta" #:type 'stdout))
- #:stdout "sequence.fasta"
- #:other `((hints ,bcftools-docker)))
- (clitool-step "set_sample_id"
- (list "sed" (intermediate (input "fasta" #:type 'File)
- "bcftools_consensus/out_fasta"))
- #:additional-inputs (list sample-id)
- #:outputs (list (output "out_fasta" #:type 'stdout))
- #:stdout "sequence.fasta"
- #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))))
- (list (workflow-output "out_fasta"
+ (workflow "fastq2fasta"
+ (list (pipeline "bwa_mem_to_normalized"
+ (list (command "bwa_mem"
+ (list "bwa" "mem" "-t" threads
+ ref-fasta fastq-forward fastq-reverse))
+ (command "samtools_view"
+ (list "samtools" "view" "-b" "-@" threads "-"))
+ (command "samtools_sort"
+ (list "samtools" "sort" "-T" "sort.tmp" "-@" threads "-"))
+ (command "freebayes"
+ (list "freebayes" "--ploidy" "1" "--stdin" "-f" ref-fasta))
+ (command "bcftools_view_exclude_ref"
+ (list "bcftools" "view"
+ "--no-version" "-Ou" "-e'type=ref'"
+ "--threads" threads "-"))
+ (command "bcftools_norm"
+ (list "bcftools" "norm" "-Ob"
+ "-f" ref-fasta "--threads" threads "-")
+ #:other '((stdout . "normalized.bcf"))))
+ (list (output "normalized"
+ #:type 'File
+ #:source "bcftools_norm/stdout")))
+ (command "bcftools_index_after_normalization"
+ (list "bcftools" "index" (input "normalized" #:type 'File))
+ #:outputs (list (output "index_after_normalization"
+ #:type 'File
+ #:binding '((glob . "$(inputs.normalized.basename)"))
+ #:other '((secondary-files . #(".csi")))))
+ #:other `((hints (Initial-work-dir-requirement
+ (listing . #("$(inputs.normalized)"))))))
+ (command "bcftools_view_qc"
+ (list "bcftools" "view"
+ "-i" "'QUAL > 10 && GT=\"a\"'"
+ "-Oz" "--threads" threads
+ (input "index_after_normalization"
+ #:type 'File
+ #:other '((secondary-files . #(".csi")))))
+ #:outputs (list (output "bcftools_view_qc_output_vcf" #:type 'stdout))
+ #:other '((stdout . "bcftools_view_output.vcf.gz")))
+ (command "bcftools_index_after_qc"
+ (list "bcftools" "index" (input "bcftools_view_qc_output_vcf" #:type 'File))
+ #:outputs (list (output "index_after_qc"
+ #:type 'File
+ #:binding '((glob . "$(inputs.bcftools_view_qc_output_vcf.basename)"))
+ #:other '((secondary-files . #(".csi")))))
+ #:other `((hints (Initial-work-dir-requirement
+ (listing . #("$(inputs.bcftools_view_qc_output_vcf)"))))))
+ (pipeline "consensus"
+ (list (command "bcftools_consensus"
+ (list "bcftools" "consensus"
+ "-i" "'QUAL > 10 && GT=\"a\"'"
+ "-Hla" "-f" ref-fasta
+ (input "index_after_qc"
+ #:type 'File
+ #:other '((secondary-files . #(".csi"))))))
+ (command "set_sample_id"
+ (list "sed" "s/^>.*/>$(inputs.sample_id)/g")
+ #:additional-inputs (list sample-id)))
+ (list (output "out_fasta"
+ #:type 'File
+ #:source "set_sample_id/stdout"))))
+ (list (output "out_fasta"
#:type 'File
- #:source "set_sample_id/out_fasta"))))
+ #:source "consensus/out_fasta"))))
-(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
- (cut scm->yaml fastq2fasta <>))
+(write-cwl fastq2fasta "workflows/fastq2fasta/fastq2fasta.cwl")