aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/pangenome-generate.cwl
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 14:57:25 -0400
committerPeter Amstutz2020-04-20 14:57:25 -0400
commit9ddcfeacb3191638f42b08af999889d867f0f81c (patch)
tree4cfe4c2b1df38bf6e5c79f5f8c0700407f76a472 /workflows/pangenome-generate/pangenome-generate.cwl
parentd29dfd593233541b85c1cefb239650279d57d59f (diff)
downloadbh20-seq-resource-9ddcfeacb3191638f42b08af999889d867f0f81c.tar.gz
bh20-seq-resource-9ddcfeacb3191638f42b08af999889d867f0f81c.tar.lz
bh20-seq-resource-9ddcfeacb3191638f42b08af999889d867f0f81c.zip
Better handling of duplicate sequences
Also save original fasta label in metadata
Diffstat (limited to 'workflows/pangenome-generate/pangenome-generate.cwl')
-rw-r--r--workflows/pangenome-generate/pangenome-generate.cwl10
1 files changed, 4 insertions, 6 deletions
diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl
index 896f936..0cb1368 100644
--- a/workflows/pangenome-generate/pangenome-generate.cwl
+++ b/workflows/pangenome-generate/pangenome-generate.cwl
@@ -26,15 +26,11 @@ steps:
in:
readsFA: inputReads
subjects: subjects
- out: [relabeledSeqs]
+ out: [relabeledSeqs, originalLabels]
run: relabel-seqs.cwl
- common:
- in: {readsFA: relabel/relabeledSeqs}
- out: [duplicatedReads]
- run: seqkit-common.cwl
dedup:
in: {readsFA: relabel/relabeledSeqs}
- out: [readsMergeDedup]
+ out: [readsMergeDedup, dups]
run: seqkit-rmdup.cwl
overlapReads:
in: {readsFA: dedup/readsMergeDedup}
@@ -63,5 +59,7 @@ steps:
metadata: metadata
metadataSchema: metadataSchema
subjects: subjects
+ dups: dedup/dups
+ originalLabels: relabel/originalLabels
out: [merged]
run: merge-metadata.cwl