From 3050817d690f200076964b620064f93c9bd48d3a Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 7 Mar 2021 04:00:51 +0530 Subject: Rewrite fastq2fasta.scm using new ccwl API --- scripts/fastq2fasta.scm | 211 +++++++++++++++--------------------------------- 1 file changed, 65 insertions(+), 146 deletions(-) diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm index 9880c0e..ef4682c 100644 --- a/scripts/fastq2fasta.scm +++ b/scripts/fastq2fasta.scm @@ -2,9 +2,7 @@ ;; fastq2fasta.cwl workflow ;; -(use-modules (srfi srfi-26) - (generate-cwl generate-cwl) - (generate-cwl yaml)) +(use-modules (ccwl ccwl)) (define threads (input "threads" @@ -23,11 +21,6 @@ #:label "sam file to output results to" #:default "out.sam")) -(define group-header-line - (input "group_header_line" - #:type 'string? - #:label "read group header line such as '@RG\tID:foo\tSM:bar'")) - (define fastq-forward (input "fastq_forward" #:type 'File @@ -41,144 +34,70 @@ (define sample-id (input "sample_id" #:type 'string)) -(define bwa-docker - '(Docker-requirement - (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))) - -(define samtools-docker - '(Docker-requirement - (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))) - -(define bcftools-docker - '(Docker-requirement - (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))) - (define fastq2fasta - (workflow - (list (clitool-step "bwa_mem" - (list "bwa" "mem" - "-t" threads - "-o" output-sam - "-R" group-header-line - ref-fasta - fastq-forward - fastq-reverse) - #:outputs (list (output "output" - #:type 'File - #:binding '((glob . "$(inputs.output_sam)")))) - #:other `((requirements ,bwa-docker))) - (clitool-step "samtools_view" - (list "samtools" "view" - "-b" - "-o" "aln.bam" - "-@" threads - (intermediate (input "input_file" - #:type 'File - #:label "input file") - "bwa_mem/output")) - #:outputs (list (output "bam" - #:type 'File - #:binding '((glob . "aln.bam")))) - #:other `((doc . "samtools view to convert sam format to bam format") - (requirements ,samtools-docker))) - (clitool-step "samtools_sort" - (list "samtools" "sort" - "-T" "sort.tmp" - "-o" "aln.sorted.bam" - "-@" threads - (intermediate (input "input_bamfile" - #:type 'File - #:label "Input bamfile") - "samtools_view/bam")) - #:outputs (list (output "sorted_bam" - #:type 'File - #:binding '((glob . "aln.sorted.bam")))) - #:other `((doc . "samtools sort, sort given bam file") - (requirements ,samtools-docker))) - (clitool-step "freebayes" - (list "freebayes" - "--ploidy" "1" - "--bam" (intermediate (input "bam" #:type 'File) - "samtools_sort/sorted_bam") - "-f" ref-fasta) - #:outputs (list (output "vcf" #:type 'stdout)) - #:stdout "var.vcf") - (clitool-step "bcftools_view_exclude_ref" - (list "bcftools" "view" - "--no-version" "-Ou" - "-e'type=ref'" - "--threads" threads - (intermediate (input "vcf" #:type 'File) - "freebayes/vcf")) - #:outputs (list (output "bcf" #:type 'stdout)) - #:other `((hints ,bcftools-docker))) - (clitool-step "bcftools_norm" - (list "bcftools" "norm" - "-Ob" - "-o" "normalized.bcf" - "-f" ref-fasta - "--threads" threads - (intermediate (input "bcf" #:type 'File) - "bcftools_view_exclude_ref/bcf")) - #:outputs (list (output "normalized_bcf" - #:type 'File - #:binding '((glob . "normalized.bcf")))) - #:other `((hints ,bcftools-docker))) - (clitool-step "bcftools_index_after_normalization" - (list "bcftools" "index" - (intermediate (input "bcf" #:type 'File) - "bcftools_norm/normalized_bcf")) - #:outputs (list (output "indexed" - #:type 'File - #:binding '((glob . "$(inputs.bcf.basename)")) - #:other '((secondary-files . #(".csi"))))) - #:other `((hints ,bcftools-docker - (Initial-work-dir-requirement - (listing . #("$(inputs.bcf)")))))) - (clitool-step "bcftools_view_qc" - (list "bcftools" "view" - "-i" "'QUAL > 10 && GT=\"a\"'" - "-Oz" - "--threads" threads - (intermediate (input "bcf" - #:type 'File - #:other '((secondary-files . #(".csi")))) - "bcftools_index_after_normalization/indexed")) - #:outputs (list (output "vcf" #:type 'stdout)) - #:other `((hints ,bcftools-docker))) - (clitool-step "bcftools_index_after_qc" - (list "bcftools" "index" - (intermediate (input "bcf" #:type 'File) - "bcftools_view_qc/vcf")) - #:outputs (list (output "indexed" - #:type 'File - #:binding '((glob . "$(inputs.bcf.basename)")) - #:other '((secondary-files . #(".csi"))))) - #:other `((hints ,bcftools-docker - (Initial-work-dir-requirement - (listing . #("$(inputs.bcf)")))))) - (clitool-step "bcftools_consensus" - (list "bcftools" "consensus" - "-i" "'QUAL > 10 && GT=\"a\"'" - "-Hla" - "-f" ref-fasta - (intermediate (input "vcf" - #:type 'File - #:other '((secondary-files . #(".csi")))) - "bcftools_index_after_qc/indexed")) - #:outputs (list (output "out_fasta" #:type 'stdout)) - #:stdout "sequence.fasta" - #:other `((hints ,bcftools-docker))) - (clitool-step "set_sample_id" - (list "sed" (intermediate (input "fasta" #:type 'File) - "bcftools_consensus/out_fasta")) - #:additional-inputs (list sample-id) - #:outputs (list (output "out_fasta" #:type 'stdout)) - #:stdout "sequence.fasta" - #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))) - (list (workflow-output "out_fasta" + (workflow "fastq2fasta" + (list (pipeline "bwa_mem_to_normalized" + (list (command "bwa_mem" + (list "bwa" "mem" "-t" threads + ref-fasta fastq-forward fastq-reverse)) + (command "samtools_view" + (list "samtools" "view" "-b" "-@" threads "-")) + (command "samtools_sort" + (list "samtools" "sort" "-T" "sort.tmp" "-@" threads "-")) + (command "freebayes" + (list "freebayes" "--ploidy" "1" "--stdin" "-f" ref-fasta)) + (command "bcftools_view_exclude_ref" + (list "bcftools" "view" + "--no-version" "-Ou" "-e'type=ref'" + "--threads" threads "-")) + (command "bcftools_norm" + (list "bcftools" "norm" "-Ob" + "-f" ref-fasta "--threads" threads "-") + #:other '((stdout . "normalized.bcf")))) + (list (output "normalized" + #:type 'File + #:source "bcftools_norm/stdout"))) + (command "bcftools_index_after_normalization" + (list "bcftools" "index" (input "normalized" #:type 'File)) + #:outputs (list (output "index_after_normalization" + #:type 'File + #:binding '((glob . "$(inputs.normalized.basename)")) + #:other '((secondary-files . #(".csi"))))) + #:other `((hints (Initial-work-dir-requirement + (listing . #("$(inputs.normalized)")))))) + (command "bcftools_view_qc" + (list "bcftools" "view" + "-i" "'QUAL > 10 && GT=\"a\"'" + "-Oz" "--threads" threads + (input "index_after_normalization" + #:type 'File + #:other '((secondary-files . #(".csi"))))) + #:outputs (list (output "bcftools_view_qc_output_vcf" #:type 'stdout)) + #:other '((stdout . "bcftools_view_output.vcf.gz"))) + (command "bcftools_index_after_qc" + (list "bcftools" "index" (input "bcftools_view_qc_output_vcf" #:type 'File)) + #:outputs (list (output "index_after_qc" + #:type 'File + #:binding '((glob . "$(inputs.bcftools_view_qc_output_vcf.basename)")) + #:other '((secondary-files . #(".csi"))))) + #:other `((hints (Initial-work-dir-requirement + (listing . #("$(inputs.bcftools_view_qc_output_vcf)")))))) + (pipeline "consensus" + (list (command "bcftools_consensus" + (list "bcftools" "consensus" + "-i" "'QUAL > 10 && GT=\"a\"'" + "-Hla" "-f" ref-fasta + (input "index_after_qc" + #:type 'File + #:other '((secondary-files . #(".csi")))))) + (command "set_sample_id" + (list "sed" "s/^>.*/>$(inputs.sample_id)/g") + #:additional-inputs (list sample-id))) + (list (output "out_fasta" + #:type 'File + #:source "set_sample_id/stdout")))) + (list (output "out_fasta" #:type 'File - #:source "set_sample_id/out_fasta")))) + #:source "consensus/out_fasta")))) -(call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" - (cut scm->yaml fastq2fasta <>)) +(write-cwl fastq2fasta "workflows/fastq2fasta/fastq2fasta.cwl") -- cgit v1.2.3