;; ;; fastq2fasta.cwl workflow ;; (use-modules (srfi srfi-26) (generate-cwl generate-cwl) (generate-cwl yaml)) (define threads (input "threads" #:type 'int #:label "number of threads" #:default 4)) (define ref-fasta (input "ref_fasta" #:type 'File #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))) (define output-sam (input "output_sam" #:type 'string #:label "sam file to output results to" #:default "out.sam")) (define group-header-line (input "group_header_line" #:type 'string? #:label "read group header line such as '@RG\tID:foo\tSM:bar'")) (define fastq-forward (input "fastq_forward" #:type 'File #:label "input fastq file to map (single-end or forward for pair-end)")) (define fastq-reverse (input "fastq_reverse" #:type 'File? #:label "input fastq file to map (reverse for pair-end)")) (define sample-id (input "sample_id" #:type 'string)) (define bwa-docker '(Docker-requirement (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5"))) (define samtools-docker '(Docker-requirement (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11"))) (define bcftools-docker '(Docker-requirement (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))) (define fastq2fasta (workflow (list (clitool-step "bwa_mem" (list "bwa" "mem" "-t" threads "-o" output-sam "-R" group-header-line ref-fasta fastq-forward fastq-reverse) #:outputs (list (output "output" #:type 'File #:binding '((glob . "$(inputs.output_sam)")))) #:other `((requirements ,bwa-docker))) (clitool-step "samtools_view" (list "samtools" "view" "-b" "-o" "aln.bam" "-@" threads (intermediate (input "input_file" #:type 'File #:label "input file") "bwa_mem/output")) #:outputs (list (output "bam" #:type 'File #:binding '((glob . "aln.bam")))) #:other `((doc . "samtools view to convert sam format to bam format") (requirements ,samtools-docker))) (clitool-step "samtools_sort" (list "samtools" "sort" "-T" "sort.tmp" "-o" "aln.sorted.bam" "-@" threads (intermediate (input "input_bamfile" #:type 'File #:label "Input bamfile") "samtools_view/bam")) #:outputs (list (output "sorted_bam" #:type 'File #:binding '((glob . "aln.sorted.bam")))) #:other `((doc . "samtools sort, sort given bam file") (requirements ,samtools-docker))) (clitool-step "freebayes" (list "freebayes" "--ploidy" "1" "--bam" (intermediate (input "bam" #:type 'File) "samtools_sort/sorted_bam") "-f" ref-fasta) #:outputs (list (output "vcf" #:type 'stdout)) #:stdout "var.vcf") (clitool-step "bcftools_view_exclude_ref" (list "bcftools" "view" "--no-version" "-Ou" "-e'type=ref'" "--threads" threads (intermediate (input "vcf" #:type 'File) "freebayes/vcf")) #:outputs (list (output "bcf" #:type 'stdout)) #:other `((hints ,bcftools-docker))) (clitool-step "bcftools_norm" (list "bcftools" "norm" "-Ob" "-o" "normalized.bcf" "-f" ref-fasta "--threads" threads (intermediate (input "bcf" #:type 'File) "bcftools_view_exclude_ref/bcf")) #:outputs (list (output "normalized_bcf" #:type 'File #:binding '((glob . "normalized.bcf")))) #:other `((hints ,bcftools-docker))) (clitool-step "bcftools_index_after_normalization" (list "bcftools" "index" (intermediate (input "bcf" #:type 'File) "bcftools_norm/normalized_bcf")) #:outputs (list (output "indexed" #:type 'File #:binding '((glob . "$(inputs.bcf.basename)")) #:other '((secondary-files . #(".csi"))))) #:other `((hints ,bcftools-docker (Initial-work-dir-requirement (listing . #("$(inputs.bcf)")))))) (clitool-step "bcftools_view_qc" (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz" "--threads" threads (intermediate (input "bcf" #:type 'File #:other '((secondary-files . #(".csi")))) "bcftools_index_after_normalization/indexed")) #:outputs (list (output "vcf" #:type 'stdout)) #:other `((hints ,bcftools-docker))) (clitool-step "bcftools_index_after_qc" (list "bcftools" "index" (intermediate (input "bcf" #:type 'File) "bcftools_view_qc/vcf")) #:outputs (list (output "indexed" #:type 'File #:binding '((glob . "$(inputs.bcf.basename)")) #:other '((secondary-files . #(".csi"))))) #:other `((hints ,bcftools-docker (Initial-work-dir-requirement (listing . #("$(inputs.bcf)")))))) (clitool-step "bcftools_consensus" (list "bcftools" "consensus" "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla" "-f" ref-fasta (intermediate (input "vcf" #:type 'File #:other '((secondary-files . #(".csi")))) "bcftools_index_after_qc/indexed")) #:outputs (list (output "out_fasta" #:type 'stdout)) #:stdout "sequence.fasta" #:other `((hints ,bcftools-docker))) (clitool-step "set_sample_id" (list "sed" (intermediate (input "fasta" #:type 'File) "bcftools_consensus/out_fasta")) #:additional-inputs (list sample-id) #:outputs (list (output "out_fasta" #:type 'stdout)) #:stdout "sequence.fasta" #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))) (list (workflow-output "out_fasta" #:type 'File #:source "set_sample_id/out_fasta")))) (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl" (cut scm->yaml fastq2fasta <>))