aboutsummaryrefslogtreecommitdiff
path: root/workflows
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 13:41:56 -0400
committerPeter Amstutz2020-04-20 13:41:56 -0400
commitd29dfd593233541b85c1cefb239650279d57d59f (patch)
tree51fa615a05a2606021bdf8c537ad8b36ae4e0a8c /workflows
parentd781e42c9adac07253cb928ae66e9b7314710267 (diff)
downloadbh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.tar.gz
bh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.tar.lz
bh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.zip
Relabel sequences to match metadata subjects.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows')
-rw-r--r--workflows/pangenome-generate/pangenome-generate.cwl12
-rw-r--r--workflows/pangenome-generate/relabel-seqs.cwl19
-rw-r--r--workflows/pangenome-generate/relabel-seqs.py13
-rw-r--r--workflows/pangenome-generate/seqkit-rmdup.cwl5
4 files changed, 48 insertions, 1 deletions
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
index 2710743..896f936 100644
--- a/workflows/pangenome-generate/pangenome-generate.cwl
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -22,8 +22,18 @@ outputs:
type: File
outputSource: mergeMetadata/merged
steps:
+ relabel:
+ in:
+ readsFA: inputReads
+ subjects: subjects
+ out: [relabeledSeqs]
+ run: relabel-seqs.cwl
+ common:
+ in: {readsFA: relabel/relabeledSeqs}
+ out: [duplicatedReads]
+ run: seqkit-common.cwl
dedup:
- in: {readsFA: inputReads}
+ in: {readsFA: relabel/relabeledSeqs}
out: [readsMergeDedup]
run: seqkit-rmdup.cwl
overlapReads:
diff --git a/workflows/pangenome-generate/relabel-seqs.cwl b/workflows/pangenome-generate/relabel-seqs.cwl
new file mode 100644
index 0000000..b5b7231
--- /dev/null
+++ b/workflows/pangenome-generate/relabel-seqs.cwl
@@ -0,0 +1,19 @@
+cwlVersion: v1.1
+class: CommandLineTool
+inputs:
+ readsFA: File[]
+ subjects: string[]
+outputs:
+ relabeledSeqs:
+ type: stdout
+requirements:
+ InlineJavascriptRequirement: {}
+ InitialWorkDirRequirement:
+ listing:
+ - entry: {$include: relabel-seqs.py}
+ entryname: relabel-seqs.py
+hints:
+ DockerRequirement:
+ dockerPull: commonworkflowlanguage/cwltool_module
+stdout: relabeledSeqs.fasta
+baseCommand: [python, relabel-seqs.py]
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py
new file mode 100644
index 0000000..32f2386
--- /dev/null
+++ b/workflows/pangenome-generate/relabel-seqs.py
@@ -0,0 +1,13 @@
+import sys
+
+reads = $(inputs.readsFA)
+subjects = $(inputs.subjects)
+
+for i, r in enumerate(reads):
+ with open(r["path"], "rt") as fa:
+ fa.readline()
+ print(">"+subjects[i])
+ data = fa.read(8096)
+ while data:
+ sys.stdout.write(data)
+ data = fa.read(8096)
diff --git a/workflows/pangenome-generate/seqkit-rmdup.cwl b/workflows/pangenome-generate/seqkit-rmdup.cwl
index d3626f5..07184c3 100644
--- a/workflows/pangenome-generate/seqkit-rmdup.cwl
+++ b/workflows/pangenome-generate/seqkit-rmdup.cwl
@@ -7,6 +7,10 @@ outputs:
type: File
outputBinding:
glob: readsMergeDedup.fasta
+ dups:
+ type: File
+ outputBinding:
+ glob: dups.txt
requirements:
InlineJavascriptRequirement: {}
hints:
@@ -28,5 +32,6 @@ baseCommand: seqkit
arguments: [rmdup,
--by-seq,
--ignore-case,
+ --dup-num-file, dups.txt,
-o, readsMergeDedup.fasta,
$(inputs.readsFA)]