aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2021-02-21 19:45:49 +0530
committerArun Isaac2021-02-21 19:45:49 +0530
commit85c63fecd1eb20aebb7ec8ce3137af253619b01b (patch)
tree52263e1612ab298fe5410782847c5523020b95d1
parent2ed13ca5e4c779b59d04e574a496b6cb69bcba44 (diff)
downloadbh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.tar.gz
bh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.tar.lz
bh20-seq-resource-85c63fecd1eb20aebb7ec8ce3137af253619b01b.zip
Implement fastq2fasta workflows in scheme
-rw-r--r--scripts/fastq2fasta.scm294
1 files changed, 294 insertions, 0 deletions
diff --git a/scripts/fastq2fasta.scm b/scripts/fastq2fasta.scm
new file mode 100644
index 0000000..9eaa88f
--- /dev/null
+++ b/scripts/fastq2fasta.scm
@@ -0,0 +1,294 @@
+;;
+;; fastq2fasta.cwl workflow
+;;
+
+(add-to-load-path "./scripts")
+
+(use-modules (srfi srfi-26)
+ (generate-cwl)
+ (yaml))
+
+(define threads
+ (input "threads"
+ #:type 'int
+ #:label "number of threads"
+ #:default 4))
+
+(define ref-fasta
+ (input "ref_fasta" #:type 'File))
+
+(define sample-id
+ (input "sample_id" #:type 'string))
+
+(call-with-output-file "workflows/fastq2fasta/bwa-mem.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bwa" "mem"
+ "-t" threads
+ "-o" (input "output_sam"
+ #:type 'string
+ #:label "sam file to output results to"
+ #:default "out.sam")
+ "-R" (input "group_header_line"
+ #:type 'string?
+ #:label "read group header line such as '@RG\tID:foo\tSM:bar'")
+ (input "index_base"
+ #:type 'File
+ #:label "fasta file for index basename"
+ #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa"))))
+ (input "fastq_forward"
+ #:type 'File
+ #:label "input fastq file to map (single-end or forward for pair-end)")
+ (input "fastq_reverse"
+ #:type 'File?
+ #:label "input fastq file to map (reverse for pair-end)"))
+ #:outputs (list (output "output"
+ #:type 'File
+ #:binding '((glob . "$(inputs.output_sam)")))
+ (output "stdout" #:type 'stdout)
+ (output "stderr" #:type 'stderr))
+ #:stdout "bwa-mem-stdout.log"
+ #:stderr "bwa-mem-stderr.log"
+ #:other '((requirements
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bwa:0.7.17--h84994c4_5")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/samtools-view.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "samtools" "view"
+ "-@" threads
+ "-b" (input "output_bam"
+ #:type 'boolean
+ #:label "output BAM"
+ #:default #t)
+ "-o" (input "output_filename"
+ #:type 'string
+ #:label "output file name"
+ #:default "aln.bam")
+ (input "input_file"
+ #:type 'File
+ #:label "input file")
+ "-h" (input "include_header"
+ #:type 'boolean
+ #:label "include the header in the output"
+ #:default #f)
+ "-S" (input "ignore_previous_version"
+ #:type 'boolean
+ #:label "ignored for compatibility with previous samtools versions"
+ #:default #f)
+ "-F" (input "filter_alignments"
+ #:type 'string?
+ #:label "Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' (i.e. /^0[0-7]+/) [0].")
+ "-q" (input "skip_alignments"
+ #:type 'int?
+ #:label "Skip alignments with MAPQ smaller than INT [0]."))
+ #:outputs (list (output "bam"
+ #:type 'File
+ #:binding '((glob . "$(inputs.output_filename)")))
+ (output "stdout" #:type 'stdout)
+ (output "stderr" #:type 'stderr))
+ #:stdout "samtools-view-stdout.log"
+ #:stderr "samtools-view-stderr.log"
+ #:other '((doc . "samtools view to convert sam format to bam format")
+ (requirements
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/samtools-sort.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "samtools" "sort"
+ "-@" threads
+ "-T" (input "tmpfile"
+ #:type 'string
+ #:label "Write temporary files to PREFIX.nnnn.bam"
+ #:default "sort.tmp")
+ "-o" (input "output_bam"
+ #:type 'string
+ #:label "Write final output to FILENAME"
+ #:default "aln.sorted.bam")
+ (input "input_bamfile"
+ #:type 'File
+ #:label "Input bamfile"))
+ #:outputs (list (output "sorted_bam"
+ #:type 'File
+ #:binding '((glob . "$(inputs.output_bam)")))
+ (output "stdout" #:type 'stdout)
+ (output "stderr" #:type 'stderr))
+ #:stdout "samtools-sort-stdout.log"
+ #:stderr "samtools-sort-stderr.log"
+ #:other '((doc . "samtools sort, sort given bam file")
+ (requirements
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/samtools:1.9--h8571acd_11")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/freebayes.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "freebayes" "--ploidy" "1"
+ "--bam" (input "bam" #:type 'File)
+ "-f" ref-fasta)
+ #:outputs (list (output "vcf" #:type 'stdout))
+ #:stdout "var.vcf")
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-view-exclude-ref.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bcftools" "view" "--no-version" "-Ou"
+ "-e'type=ref'" "--threads" threads
+ (input "vcf" #:type 'File))
+ #:outputs (list (output "bcf" #:type 'stdout))
+ #:stdout "$(inputs.vcf.nameroot).without-ref.bcf")
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-norm.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bcftools" "norm" "-Ob"
+ "-f" ref-fasta
+ "-o" (input "output_name" #:type 'string #:default "normalized.bcf")
+ "--threads" threads
+ (input "bcf" #:type 'File))
+ #:outputs (list (output "normalized_bcf"
+ #:type 'File
+ #:binding '((glob . "$(inputs.output_name)"))))
+ #:other '((hints
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-index.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bcftools" "index" (input "bcf" #:type 'File))
+ #:outputs (list (output "indexed"
+ #:type 'File
+ #:binding '((glob . "$(inputs.bcf.basename)"))
+ #:other '((secondary-files . #(".csi")))))
+ #:other '((hints
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"))
+ (Initial-work-dir-requirement
+ (listing . #("$(inputs.bcf)"))))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-view-qc.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bcftools" "view" "-i" "'QUAL > 10 && GT=\"a\"'" "-Oz"
+ "--threads" threads
+ (input "bcf"
+ #:type 'File
+ #:other '((secondary-files . #(".csi")))))
+ #:outputs (list (output "vcf" #:type 'stdout))
+ #:other '((hints
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/bcftools-consensus.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "bcftools" "consensus"
+ "-i" "'QUAL > 10 && GT=\"a\"'" "-Hla"
+ "-f" ref-fasta
+ (input "vcf"
+ #:type 'File
+ #:other '((secondary-files . #(".csi")))))
+ #:outputs (list (output "out_fasta" #:type 'stdout))
+ #:stdout "sequence.fasta"
+ #:other '((hints
+ (Docker-requirement
+ (docker-pull . "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0")))))
+ <>))
+
+(call-with-output-file "workflows/fastq2fasta/set-sample-id.cwl"
+ (cut scm->yaml
+ (clitool
+ (list "sed" (input "fasta" #:type 'File))
+ #:additional-inputs (list (input "sample_id" #:type 'string))
+ #:outputs (list (output "out_fasta" #:type 'stdout))
+ #:stdout "sequence.fasta"
+ #:other '((arguments . #("s/^>.*/>$(inputs.sample_id)/g"))))
+ <>))
+
+(let ((fasta (input "fasta" #:type 'File)))
+ (call-with-output-file "workflows/fastq2fasta/bam2fasta.cwl"
+ (cut scm->yaml
+ (workflow
+ (list (step "freebayes" "freebayes.cwl"
+ `((bam . ,(input "bam" #:type 'File))
+ (ref_fasta . ,fasta))
+ (list "vcf"))
+ (step "bcftools_view_exclude_ref" "bcftools-view-exclude-ref.cwl"
+ `((vcf . "freebayes/vcf")
+ (threads . ,threads))
+ (list "bcf"))
+ (step "bcftools_norm" "bcftools-norm.cwl"
+ `((ref_fasta . ,fasta)
+ (bcf . "bcftools_view_exclude_ref/bcf")
+ (threads . ,threads))
+ (list "normalized_bcf"))
+ (step "bcftools_index_after_normalization" "bcftools-index.cwl"
+ '((bcf . "bcftools_norm/normalized_bcf"))
+ (list "indexed"))
+ (step "bcftools_view_qc" "bcftools-view-qc.cwl"
+ `((bcf . "bcftools_index_after_normalization/indexed")
+ (threads . ,threads))
+ (list "vcf"))
+ (step "bcftools_index_after_qc" "bcftools-index.cwl"
+ '((bcf . "bcftools_view_qc/vcf"))
+ (list "indexed"))
+ (step "bcftools_consensus" "bcftools-consensus.cwl"
+ `((ref_fasta . ,fasta)
+ (vcf . "bcftools_index_after_qc/indexed"))
+ (list "out_fasta"))
+ (step "set_sample_id" "set-sample-id.cwl"
+ `((fasta . "bcftools_consensus/out_fasta")
+ (sample_id . ,sample-id))
+ (list "out_fasta")))
+ (list (workflow-output "out_fasta"
+ #:type 'File
+ #:source "set_sample_id/out_fasta")))
+ <>)))
+
+(let ((ref-fasta (input "ref_fasta"
+ #:type 'File
+ #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa" ".fai"))))))
+ (call-with-output-file "workflows/fastq2fasta/fastq2fasta.cwl"
+ (cut scm->yaml
+ (workflow
+ (list (step "bwa-mem" "bwa-mem.cwl"
+ `((threads . ,threads)
+ (fastq_forward . ,(input "fastq_forward" #:type 'File))
+ (fastq_reverse . ,(input "fastq_reverse" #:type 'File?))
+ (index_base . ,ref-fasta))
+ (list "output"))
+ (step "samtools-view" "samtools-view.cwl"
+ `((threads . ,threads)
+ (input_file . "bwa-mem/output"))
+ (list "bam"))
+ (step "samtools-sort" "samtools-sort.cwl"
+ `((input_bamfile . "samtools-view/bam")
+ (threads . ,threads))
+ (list "sorted_bam"))
+ (step "bam2fasta" "bam2fasta.cwl"
+ `((bam . "samtools-sort/sorted_bam")
+ (fasta . ,ref-fasta)
+ (threads . ,threads)
+ (sample_id . ,sample-id))
+ (list "out_fasta")))
+ (list (workflow-output "out_fasta"
+ #:type 'File
+ #:source "bam2fasta/out_fasta")
+ (workflow-output "out_metadata"
+ #:type 'File?
+ #:source (input "metadata" #:type 'File?)))
+ #:other '((requirements (Subworkflow-feature-requirement))
+ (hints (Resource-requirement (ram-min . 3000)))))
+ <>)))