about summary refs log tree commit diff
path: root/workflows/pangenome-generate/relabel-seqs.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-21 16:20:10 -0400
committerPeter Amstutz2020-04-21 16:20:10 -0400
commit61726edb9293fe529e6efbe5bb6f1cc953bb3c4e (patch)
tree49637a78fb9196221d4d6589c2e763b08687aadf /workflows/pangenome-generate/relabel-seqs.py
parentf4c3da88c1233802fea46cc972a81dc3b5b51185 (diff)
downloadbh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.tar.gz
bh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.tar.lz
bh20-seq-resource-61726edb9293fe529e6efbe5bb6f1cc953bb3c4e.zip
Workaround CWL limit by chunking file list
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r--workflows/pangenome-generate/relabel-seqs.py18
1 files changed, 10 insertions, 8 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py
index 970540f..6b022a0 100644
--- a/workflows/pangenome-generate/relabel-seqs.py
+++ b/workflows/pangenome-generate/relabel-seqs.py
@@ -1,15 +1,17 @@
 import os
 import json
 
-reads = []
-b = 1
-while os.path.exists("block%i" % b):
-    with open("block%i" % b) as f:
-        reads.extend(json.load(f))
-    b += 1
+def readitems(stem):
+    items = []
+    b = 1
+    while os.path.exists("%s%i" % (stem, b)):
+        with open("%s%i" % (stem, b)) as f:
+            items.extend(json.load(f))
+        b += 1
+    return items
 
-with open("subjects") as f:
-    subjects = json.load(f)
+reads = readitems("block")
+subjects = readitems("subs")
 
 relabeled_fasta = open("relabeledSeqs.fasta", "wt")
 original_labels = open("originalLabels.ttl", "wt")