aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/relabel-seqs.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-21 16:20:10 -0400
committerPeter Amstutz2020-04-21 16:20:10 -0400
commit61726edb9293fe529e6efbe5bb6f1cc953bb3c4e (patch)
tree49637a78fb9196221d4d6589c2e763b08687aadf /workflows/pangenome-generate/relabel-seqs.py
parentf4c3da88c1233802fea46cc972a81dc3b5b51185 (diff)
downloadbh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.tar.gz
bh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.tar.lz
bh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.zip
Workaround CWL limit by chunking file list
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r--workflows/pangenome-generate/relabel-seqs.py18
1 files changed, 10 insertions, 8 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py
index 970540f..6b022a0 100644
--- a/workflows/pangenome-generate/relabel-seqs.py
+++ b/workflows/pangenome-generate/relabel-seqs.py
@@ -1,15 +1,17 @@
import os
import json
-reads = []
-b = 1
-while os.path.exists("block%i" % b):
- with open("block%i" % b) as f:
- reads.extend(json.load(f))
- b += 1
+def readitems(stem):
+ items = []
+ b = 1
+ while os.path.exists("%s%i" % (stem, b)):
+ with open("%s%i" % (stem, b)) as f:
+ items.extend(json.load(f))
+ b += 1
+ return items
-with open("subjects") as f:
- subjects = json.load(f)
+reads = readitems("block")
+subjects = readitems("subs")
relabeled_fasta = open("relabeledSeqs.fasta", "wt")
original_labels = open("originalLabels.ttl", "wt")