aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/relabel-seqs.py
diff options
context:
space:
mode:
authorLLTommy2020-04-23 20:18:36 +0200
committerGitHub2020-04-23 20:18:36 +0200
commit7049cd5d29acd601ccbbc9d04f001b84a51e9bd5 (patch)
tree34a1254d81c2e526427fedb1deaa9f8441e8b260 /workflows/pangenome-generate/relabel-seqs.py
parentf38b9c6f22b82327df9648938a5a4bcf863d8c41 (diff)
parentc7612e7eda5cd38bfbb2d293bebf732893a41b6c (diff)
downloadbh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.tar.gz
bh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.tar.lz
bh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.zip
Merge branch 'master' into patch-3
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r--workflows/pangenome-generate/relabel-seqs.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py
new file mode 100644
index 0000000..6b022a0
--- /dev/null
+++ b/workflows/pangenome-generate/relabel-seqs.py
@@ -0,0 +1,30 @@
+import os
+import json
+
+def readitems(stem):
+ items = []
+ b = 1
+ while os.path.exists("%s%i" % (stem, b)):
+ with open("%s%i" % (stem, b)) as f:
+ items.extend(json.load(f))
+ b += 1
+ return items
+
+reads = readitems("block")
+subjects = readitems("subs")
+
+relabeled_fasta = open("relabeledSeqs.fasta", "wt")
+original_labels = open("originalLabels.ttl", "wt")
+
+for i, r in enumerate(reads):
+ with open(r["path"], "rt") as fa:
+ label = fa.readline()
+ original_labels.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subjects[i], label[1:].strip().replace('"', '\\"')))
+ relabeled_fasta.write(">"+subjects[i]+"\n")
+ data = fa.read(8096)
+ while data:
+ relabeled_fasta.write(data)
+ endswithnewline = data.endswith("\n")
+ data = fa.read(8096)
+ if not endswithnewline:
+ relabeled_fasta.write("\n")