From 85c63fecd1eb20aebb7ec8ce3137af253619b01b Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 21 Feb 2021 19:45:49 +0530 Subject: Implement fastq2fasta workflows in scheme --- scripts/fastq2fasta.scm | 294 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 scripts/fastq2fasta.scm diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm new file mode 100644 index 0000000..9eaa88f --- /dev/null +++ b/scripts/fastq2fasta.scm @@ -0,0 +1,294 @@ +;; +;; fastq2fasta.cwl workflow +;; + +(add-to-load-path "./scripts") + +(use-modules (srfi srfi-26) + (generate-cwl) + (yaml)) + +(define threads + (input "threads" + #:type 'int + #:label "number of threads" + #:default 4)) + +(define ref-fasta + (input "ref_fasta" #:type 'File)) + +(define sample-id + (input "sample_id" #:type 'string)) + +(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl" + (cut scm->yaml + (clitool + (list "bwa" "mem" + "-t" threads + "-o" (input "output_sam" + #:type 'string + #:label "sam file to output results to" + #:default "out.sam") + "-R" (input "group_header_line" + #:type 'string? + #:label "read group header line such as '@RG\tID:foo\tSM:bar'") + (input "index_base" + #:type 'File + #:label "fasta file for index basename" + #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))) + (input "fastq_forward" + #:type 'File + #:label "input fastq file to map (single-end or forward for pair-end)") + (input "fastq_reverse" + #:type 'File? + #:label "input fastq file to map (reverse for pair-end)")) + #:outputs (list (output "output" + #:type 'File + #:binding '((glob . "$(inputs.output_sam)"))) + (output "stdout" #:type 'stdout) + (output "stderr" #:type 'stderr)) + #:stdout "bwa-mem-stdout.log" + #:stderr "bwa-mem-stderr.log" + #:other '((requirements + (Docker-requirement + (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl" + (cut scm->yaml + (clitool + (list "samtools" "view" + "-@" threads + "-b" (input "output_bam" + #:type 'boolean + #:label "output BAM" + #:default #t) + "-o" (input "output_filename" + #:type 'string + #:label "output file name" + #:default "aln.bam") + (input "input_file" + #:type 'File + #:label "input file") + "-h" (input "include_header" + #:type 'boolean + #:label "include the header in the output" + #:default #f) + "-S" (input "ignore_previous_version" + #:type 'boolean + #:label "ignored for compatibility with previous samtools versions" + #:default #f) + "-F" (input "filter_alignments" + #:type 'string? + #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].") + "-q" (input "skip_alignments" + #:type 'int? + #:label "Skip alignments with MAPQ smaller than INT [0].")) + #:outputs (list (output "bam" + #:type 'File + #:binding '((glob . "$(inputs.output_filename)"))) + (output "stdout" #:type 'stdout) + (output "stderr" #:type 'stderr)) + #:stdout "samtools-view-stdout.log" + #:stderr "samtools-view-stderr.log" + #:other '((doc . "samtools view to convert sam format to bam format") + (requirements + (Docker-requirement + (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl" + (cut scm->yaml + (clitool + (list "samtools" "sort" + "-@" threads + "-T" (input "tmpfile" + #:type 'string + #:label "Write temporary files to PREFIX.nnnn.bam" + #:default "sort.tmp") + "-o" (input "output_bam" + #:type 'string + #:label "Write final output to FILENAME" + #:default "aln.sorted.bam") + (input "input_bamfile" + #:type 'File + #:label "Input bamfile")) + #:outputs (list (output "sorted_bam" + #:type 'File + #:binding '((glob . "$(inputs.output_bam)"))) + (output "stdout" #:type 'stdout) + (output "stderr" #:type 'stderr)) + #:stdout "samtools-sort-stdout.log" + #:stderr "samtools-sort-stderr.log" + #:other '((doc . "samtools sort, sort given bam file") + (requirements + (Docker-requirement + (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/freebayes.cwl" + (cut scm->yaml + (clitool + (list "freebayes" "--ploidy" "1" + "--bam" (input "bam" #:type 'File) + "-f" ref-fasta) + #:outputs (list (output "vcf" #:type 'stdout)) + #:stdout "var.vcf") + <>)) + +(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl" + (cut scm->yaml + (clitool + (list "bcftools" "view" "--no-version" "-Ou" + "-e'type=ref'" "--threads" threads + (input "vcf" #:type 'File)) + #:outputs (list (output "bcf" #:type 'stdout)) + #:stdout "$(inputs.vcf.nameroot).without-ref.bcf") + <>)) + +(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl" + (cut scm->yaml + (clitool + (list "bcftools" "norm" "-Ob" + "-f" ref-fasta + "-o" (input "output_name" #:type 'string #:default "normalized.bcf") + "--threads" threads + (input "bcf" #:type 'File)) + #:outputs (list (output "normalized_bcf" + #:type 'File + #:binding '((glob . "$(inputs.output_name)")))) + #:other '((hints + (Docker-requirement + (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl" + (cut scm->yaml + (clitool + (list "bcftools" "index" (input "bcf" #:type 'File)) + #:outputs (list (output "indexed" + #:type 'File + #:binding '((glob . "$(inputs.bcf.basename)")) + #:other '((secondary-files . #(".csi"))))) + #:other '((hints + (Docker-requirement + (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")) + (Initial-work-dir-requirement + (listing . #("$(inputs.bcf)")))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl" + (cut scm->yaml + (clitool + (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz" + "--threads" threads + (input "bcf" + #:type 'File + #:other '((secondary-files . #(".csi"))))) + #:outputs (list (output "vcf" #:type 'stdout)) + #:other '((hints + (Docker-requirement + (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl" + (cut scm->yaml + (clitool + (list "bcftools" "consensus" + "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla" + "-f" ref-fasta + (input "vcf" + #:type 'File + #:other '((secondary-files . #(".csi"))))) + #:outputs (list (output "out_fasta" #:type 'stdout)) + #:stdout "sequence.fasta" + #:other '((hints + (Docker-requirement + (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) + <>)) + +(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl" + (cut scm->yaml + (clitool + (list "sed" (input "fasta" #:type 'File)) + #:additional-inputs (list (input "sample_id" #:type 'string)) + #:outputs (list (output "out_fasta" #:type 'stdout)) + #:stdout "sequence.fasta" + #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))) + <>)) + +(let ((fasta (input "fasta" #:type 'File))) + (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl" + (cut scm->yaml + (workflow + (list (step "freebayes" "freebayes.cwl" + `((bam . ,(input "bam" #:type 'File)) + (ref_fasta . ,fasta)) + (list "vcf")) + (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl" + `((vcf . "freebayes/vcf") + (threads . ,threads)) + (list "bcf")) + (step "bcftools_norm" "bcftools-norm.cwl" + `((ref_fasta . ,fasta) + (bcf . "bcftools_view_exclude_ref/bcf") + (threads . ,threads)) + (list "normalized_bcf")) + (step "bcftools_index_after_normalization" "bcftools-index.cwl" + '((bcf . "bcftools_norm/normalized_bcf")) + (list "indexed")) + (step "bcftools_view_qc" "bcftools-view-qc.cwl" + `((bcf . "bcftools_index_after_normalization/indexed") + (threads . ,threads)) + (list "vcf")) + (step "bcftools_index_after_qc" "bcftools-index.cwl" + '((bcf . "bcftools_view_qc/vcf")) + (list "indexed")) + (step "bcftools_consensus" "bcftools-consensus.cwl" + `((ref_fasta . ,fasta) + (vcf . "bcftools_index_after_qc/indexed")) + (list "out_fasta")) + (step "set_sample_id" "set-sample-id.cwl" + `((fasta . "bcftools_consensus/out_fasta") + (sample_id . ,sample-id)) + (list "out_fasta"))) + (list (workflow-output "out_fasta" + #:type 'File + #:source "set_sample_id/out_fasta"))) + <>))) + +(let ((ref-fasta (input "ref_fasta" + #:type 'File + #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai")))))) + (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" + (cut scm->yaml + (workflow + (list (step "bwa-mem" "bwa-mem.cwl" + `((threads . ,threads) + (fastq_forward . ,(input "fastq_forward" #:type 'File)) + (fastq_reverse . ,(input "fastq_reverse" #:type 'File?)) + (index_base . ,ref-fasta)) + (list "output")) + (step "samtools-view" "samtools-view.cwl" + `((threads . ,threads) + (input_file . "bwa-mem/output")) + (list "bam")) + (step "samtools-sort" "samtools-sort.cwl" + `((input_bamfile . "samtools-view/bam") + (threads . ,threads)) + (list "sorted_bam")) + (step "bam2fasta" "bam2fasta.cwl" + `((bam . "samtools-sort/sorted_bam") + (fasta . ,ref-fasta) + (threads . ,threads) + (sample_id . ,sample-id)) + (list "out_fasta"))) + (list (workflow-output "out_fasta" + #:type 'File + #:source "bam2fasta/out_fasta") + (workflow-output "out_metadata" + #:type 'File? + #:source (input "metadata" #:type 'File?))) + #:other '((requirements (Subworkflow-feature-requirement)) + (hints (Resource-requirement (ram-min . 3000))))) + <>))) -- cgit v1.2.3