;; ;; fastq2fasta.cwl workflow ;; (use-modules (srfi srfi-26) (generate-cwl generate-cwl) (generate-cwl yaml)) (define threads (input "threads" #:type 'int #:label "number of threads" #:default 4)) (define ref-fasta (input "ref_fasta" #:type 'File)) (define sample-id (input "sample_id" #:type 'string)) (call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl" (cut scm->yaml (clitool (list "bwa" "mem" "-t" threads "-o" (input "output_sam" #:type 'string #:label "sam file to output results to" #:default "out.sam") "-R" (input "group_header_line" #:type 'string? #:label "read group header line such as '@RG\tID:foo\tSM:bar'") (input "index_base" #:type 'File #:label "fasta file for index basename" #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))) (input "fastq_forward" #:type 'File #:label "input fastq file to map (single-end or forward for pair-end)") (input "fastq_reverse" #:type 'File? #:label "input fastq file to map (reverse for pair-end)")) #:outputs (list (output "output" #:type 'File #:binding '((glob . "$(inputs.output_sam)"))) (output "stdout" #:type 'stdout) (output "stderr" #:type 'stderr)) #:stdout "bwa-mem-stdout.log" #:stderr "bwa-mem-stderr.log" #:other '((requirements (Docker-requirement (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))))) <>)) (call-with-output-file "workflows/fastq2fasta/samtools-view.cwl" (cut scm->yaml (clitool (list "samtools" "view" "-@" threads "-b" (input "output_bam" #:type 'boolean #:label "output BAM" #:default #t) "-o" (input "output_filename" #:type 'string #:label "output file name" #:default "aln.bam") (input "input_file" #:type 'File #:label "input file") "-h" (input "include_header" #:type 'boolean #:label "include the header in the output" #:default #f) "-S" (input "ignore_previous_version" #:type 'boolean #:label "ignored for compatibility with previous samtools versions" #:default #f) "-F" (input "filter_alignments" #:type 'string? #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].") "-q" (input "skip_alignments" #:type 'int? #:label "Skip alignments with MAPQ smaller than INT [0].")) #:outputs (list (output "bam" #:type 'File #:binding '((glob . "$(inputs.output_filename)"))) (output "stdout" #:type 'stdout) (output "stderr" #:type 'stderr)) #:stdout "samtools-view-stdout.log" #:stderr "samtools-view-stderr.log" #:other '((doc . "samtools view to convert sam format to bam format") (requirements (Docker-requirement (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) <>)) (call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl" (cut scm->yaml (clitool (list "samtools" "sort" "-@" threads "-T" (input "tmpfile" #:type 'string #:label "Write temporary files to PREFIX.nnnn.bam" #:default "sort.tmp") "-o" (input "output_bam" #:type 'string #:label "Write final output to FILENAME" #:default "aln.sorted.bam") (input "input_bamfile" #:type 'File #:label "Input bamfile")) #:outputs (list (output "sorted_bam" #:type 'File #:binding '((glob . "$(inputs.output_bam)"))) (output "stdout" #:type 'stdout) (output "stderr" #:type 'stderr)) #:stdout "samtools-sort-stdout.log" #:stderr "samtools-sort-stderr.log" #:other '((doc . "samtools sort, sort given bam file") (requirements (Docker-requirement (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))))) <>)) (call-with-output-file "workflows/fastq2fasta/freebayes.cwl" (cut scm->yaml (clitool (list "freebayes" "--ploidy" "1" "--bam" (input "bam" #:type 'File) "-f" ref-fasta) #:outputs (list (output "vcf" #:type 'stdout)) #:stdout "var.vcf") <>)) (call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl" (cut scm->yaml (clitool (list "bcftools" "view" "--no-version" "-Ou" "-e'type=ref'" "--threads" threads (input "vcf" #:type 'File)) #:outputs (list (output "bcf" #:type 'stdout)) #:stdout "$(inputs.vcf.nameroot).without-ref.bcf") <>)) (call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl" (cut scm->yaml (clitool (list "bcftools" "norm" "-Ob" "-f" ref-fasta "-o" (input "output_name" #:type 'string #:default "normalized.bcf") "--threads" threads (input "bcf" #:type 'File)) #:outputs (list (output "normalized_bcf" #:type 'File #:binding '((glob . "$(inputs.output_name)")))) #:other '((hints (Docker-requirement (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) <>)) (call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl" (cut scm->yaml (clitool (list "bcftools" "index" (input "bcf" #:type 'File)) #:outputs (list (output "indexed" #:type 'File #:binding '((glob . "$(inputs.bcf.basename)")) #:other '((secondary-files . #(".csi"))))) #:other '((hints (Docker-requirement (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")) (Initial-work-dir-requirement (listing . #("$(inputs.bcf)")))))) <>)) (call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl" (cut scm->yaml (clitool (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz" "--threads" threads (input "bcf" #:type 'File #:other '((secondary-files . #(".csi"))))) #:outputs (list (output "vcf" #:type 'stdout)) #:other '((hints (Docker-requirement (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) <>)) (call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl" (cut scm->yaml (clitool (list "bcftools" "consensus" "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla" "-f" ref-fasta (input "vcf" #:type 'File #:other '((secondary-files . #(".csi"))))) #:outputs (list (output "out_fasta" #:type 'stdout)) #:stdout "sequence.fasta" #:other '((hints (Docker-requirement (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))))) <>)) (call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl" (cut scm->yaml (clitool (list "sed" (input "fasta" #:type 'File)) #:additional-inputs (list (input "sample_id" #:type 'string)) #:outputs (list (output "out_fasta" #:type 'stdout)) #:stdout "sequence.fasta" #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g")))) <>)) (let ((fasta (input "fasta" #:type 'File))) (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl" (cut scm->yaml (workflow (list (step "freebayes" "freebayes.cwl" `((bam . ,(input "bam" #:type 'File)) (ref_fasta . ,fasta)) (list "vcf")) (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl" `((vcf . "freebayes/vcf") (threads . ,threads)) (list "bcf")) (step "bcftools_norm" "bcftools-norm.cwl" `((ref_fasta . ,fasta) (bcf . "bcftools_view_exclude_ref/bcf") (threads . ,threads)) (list "normalized_bcf")) (step "bcftools_index_after_normalization" "bcftools-index.cwl" '((bcf . "bcftools_norm/normalized_bcf")) (list "indexed")) (step "bcftools_view_qc" "bcftools-view-qc.cwl" `((bcf . "bcftools_index_after_normalization/indexed") (threads . ,threads)) (list "vcf")) (step "bcftools_index_after_qc" "bcftools-index.cwl" '((bcf . "bcftools_view_qc/vcf")) (list "indexed")) (step "bcftools_consensus" "bcftools-consensus.cwl" `((ref_fasta . ,fasta) (vcf . "bcftools_index_after_qc/indexed")) (list "out_fasta")) (step "set_sample_id" "set-sample-id.cwl" `((fasta . "bcftools_consensus/out_fasta") (sample_id . ,sample-id)) (list "out_fasta"))) (list (workflow-output "out_fasta" #:type 'File #:source "set_sample_id/out_fasta"))) <>))) (let ((ref-fasta (input "ref_fasta" #:type 'File #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai")))))) (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" (cut scm->yaml (workflow (list (step "bwa-mem" "bwa-mem.cwl" `((threads . ,threads) (fastq_forward . ,(input "fastq_forward" #:type 'File)) (fastq_reverse . ,(input "fastq_reverse" #:type 'File?)) (index_base . ,ref-fasta)) (list "output")) (step "samtools-view" "samtools-view.cwl" `((threads . ,threads) (input_file . "bwa-mem/output")) (list "bam")) (step "samtools-sort" "samtools-sort.cwl" `((input_bamfile . "samtools-view/bam") (threads . ,threads)) (list "sorted_bam")) (step "bam2fasta" "bam2fasta.cwl" `((bam . "samtools-sort/sorted_bam") (fasta . ,ref-fasta) (threads . ,threads) (sample_id . ,sample-id)) (list "out_fasta"))) (list (workflow-output "out_fasta" #:type 'File #:source "bam2fasta/out_fasta") (workflow-output "out_metadata" #:type 'File? #:source (input "metadata" #:type 'File?))) #:other '((requirements (Subworkflow-feature-requirement)) (hints (Resource-requirement (ram-min . 3000))))) <>)))