From 9ddcfeacb3191638f42b08af999889d867f0f81c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 20 Apr 2020 14:57:25 -0400 Subject: Better handling of duplicate sequences Also save original fasta label in metadata --- workflows/pangenome-generate/pangenome-generate.cwl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'workflows/pangenome-generate/pangenome-generate.cwl') diff --git a/workflows/pangenome-generate/pangenome-generate.cwl b/workflows/pangenome-generate/pangenome-generate.cwl index 896f936..0cb1368 100644 --- a/workflows/pangenome-generate/pangenome-generate.cwl +++ b/workflows/pangenome-generate/pangenome-generate.cwl @@ -26,15 +26,11 @@ steps: in: readsFA: inputReads subjects: subjects - out: [relabeledSeqs] + out: [relabeledSeqs, originalLabels] run: relabel-seqs.cwl - common: - in: {readsFA: relabel/relabeledSeqs} - out: [duplicatedReads] - run: seqkit-common.cwl dedup: in: {readsFA: relabel/relabeledSeqs} - out: [readsMergeDedup] + out: [readsMergeDedup, dups] run: seqkit-rmdup.cwl overlapReads: in: {readsFA: dedup/readsMergeDedup} @@ -63,5 +59,7 @@ steps: metadata: metadata metadataSchema: metadataSchema subjects: subjects + dups: dedup/dups + originalLabels: relabel/originalLabels out: [merged] run: merge-metadata.cwl -- cgit v1.2.3