aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/relabel-seqs.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-20 13:41:56 -0400
committerPeter Amstutz2020-04-20 13:41:56 -0400
commitd29dfd593233541b85c1cefb239650279d57d59f (patch)
tree51fa615a05a2606021bdf8c537ad8b36ae4e0a8c /workflows/pangenome-generate/relabel-seqs.py
parentd781e42c9adac07253cb928ae66e9b7314710267 (diff)
downloadbh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.tar.gz
bh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.tar.lz
bh20-seq-resource-d29dfd593233541b85c1cefb239650279d57d59f.zip
Relabel sequences to match metadata subjects.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r--workflows/pangenome-generate/relabel-seqs.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py
new file mode 100644
index 0000000..32f2386
--- /dev/null
+++ b/workflows/pangenome-generate/relabel-seqs.py
@@ -0,0 +1,13 @@
+import sys
+
+reads = $(inputs.readsFA)
+subjects = $(inputs.subjects)
+
+for i, r in enumerate(reads):
+ with open(r["path"], "rt") as fa:
+ fa.readline()
+ print(">"+subjects[i])
+ data = fa.read(8096)
+ while data:
+ sys.stdout.write(data)
+ data = fa.read(8096)