aboutsummaryrefslogtreecommitdiff
path: root/scripts/fastq2fasta.scm
blob: ef4682cb8a19ed796189ac6f590cfc0364e61547 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
;;
;; fastq2fasta.cwl workflow
;;

(use-modules (ccwl ccwl))

(define threads
  (input "threads"
         #:type 'int
         #:label "number of threads"
         #:default 4))

(define ref-fasta
  (input "ref_fasta"
         #:type 'File
         #:other '((secondary-files . #(".amb" ".ann" ".bwt" ".pac" ".sa")))))

(define output-sam
  (input "output_sam"
         #:type 'string
         #:label "sam file to output results to"
         #:default "out.sam"))

(define fastq-forward
  (input "fastq_forward"
         #:type 'File
         #:label "input fastq file to map (single-end or forward for pair-end)"))

(define fastq-reverse
  (input "fastq_reverse"
         #:type 'File?
         #:label "input fastq file to map (reverse for pair-end)"))

(define sample-id
  (input "sample_id" #:type 'string))

(define fastq2fasta
  (workflow "fastq2fasta"
            (list (pipeline "bwa_mem_to_normalized"
                            (list (command "bwa_mem"
                                           (list "bwa" "mem" "-t" threads
                                                 ref-fasta fastq-forward fastq-reverse))
                                  (command "samtools_view"
                                           (list "samtools" "view" "-b" "-@" threads "-"))
                                  (command "samtools_sort"
                                           (list "samtools" "sort" "-T" "sort.tmp" "-@" threads "-"))
                                  (command "freebayes"
                                           (list "freebayes" "--ploidy" "1" "--stdin" "-f" ref-fasta))
                                  (command "bcftools_view_exclude_ref"
                                           (list "bcftools" "view"
                                                 "--no-version" "-Ou" "-e'type=ref'"
                                                 "--threads" threads "-"))
                                  (command "bcftools_norm"
                                           (list "bcftools" "norm" "-Ob"
                                                 "-f" ref-fasta "--threads" threads "-")
                                           #:other '((stdout . "normalized.bcf"))))
                            (list (output "normalized"
                                          #:type 'File
                                          #:source "bcftools_norm/stdout")))
                  (command "bcftools_index_after_normalization"
                           (list "bcftools" "index" (input "normalized" #:type 'File))
                           #:outputs (list (output "index_after_normalization"
                                                   #:type 'File
                                                   #:binding '((glob . "$(inputs.normalized.basename)"))
                                                   #:other '((secondary-files . #(".csi")))))
                           #:other `((hints (Initial-work-dir-requirement
                                             (listing . #("$(inputs.normalized)"))))))
                  (command "bcftools_view_qc"
                           (list "bcftools" "view"
                                 "-i" "'QUAL > 10 && GT=\"a\"'"
                                 "-Oz" "--threads" threads
                                 (input "index_after_normalization"
                                        #:type 'File
                                        #:other '((secondary-files . #(".csi")))))
                           #:outputs (list (output "bcftools_view_qc_output_vcf" #:type 'stdout))
                           #:other '((stdout . "bcftools_view_output.vcf.gz")))
                  (command "bcftools_index_after_qc"
                           (list "bcftools" "index" (input "bcftools_view_qc_output_vcf" #:type 'File))
                           #:outputs (list (output "index_after_qc"
                                                   #:type 'File
                                                   #:binding '((glob . "$(inputs.bcftools_view_qc_output_vcf.basename)"))
                                                   #:other '((secondary-files . #(".csi")))))
                           #:other `((hints (Initial-work-dir-requirement
                                             (listing . #("$(inputs.bcftools_view_qc_output_vcf)"))))))
                  (pipeline "consensus"
                            (list (command "bcftools_consensus"
                                           (list "bcftools" "consensus"
                                                 "-i" "'QUAL > 10 && GT=\"a\"'"
                                                 "-Hla" "-f" ref-fasta
                                                 (input "index_after_qc"
                                                        #:type 'File
                                                        #:other '((secondary-files . #(".csi"))))))
                                  (command "set_sample_id"
                                           (list "sed" "s/^>.*/>$(inputs.sample_id)/g")
                                           #:additional-inputs (list sample-id)))
                            (list (output "out_fasta"
                                          #:type 'File
                                          #:source "set_sample_id/stdout"))))
            (list (output "out_fasta"
                          #:type 'File
                          #:source "consensus/out_fasta"))))

(write-cwl fastq2fasta "workflows/fastq2fasta/fastq2fasta.cwl")