diff options
author | LLTommy | 2020-04-23 20:18:36 +0200 |
---|---|---|
committer | GitHub | 2020-04-23 20:18:36 +0200 |
commit | 7049cd5d29acd601ccbbc9d04f001b84a51e9bd5 (patch) | |
tree | 34a1254d81c2e526427fedb1deaa9f8441e8b260 /workflows/pangenome-generate/relabel-seqs.py | |
parent | f38b9c6f22b82327df9648938a5a4bcf863d8c41 (diff) | |
parent | c7612e7eda5cd38bfbb2d293bebf732893a41b6c (diff) | |
download | bh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.tar.gz bh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.tar.lz bh20-seq-resource-7049cd5d29acd601ccbbc9d04f001b84a51e9bd5.zip |
Merge branch 'master' into patch-3
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r-- | workflows/pangenome-generate/relabel-seqs.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py new file mode 100644 index 0000000..6b022a0 --- /dev/null +++ b/workflows/pangenome-generate/relabel-seqs.py @@ -0,0 +1,30 @@ +import os +import json + +def readitems(stem): + items = [] + b = 1 + while os.path.exists("%s%i" % (stem, b)): + with open("%s%i" % (stem, b)) as f: + items.extend(json.load(f)) + b += 1 + return items + +reads = readitems("block") +subjects = readitems("subs") + +relabeled_fasta = open("relabeledSeqs.fasta", "wt") +original_labels = open("originalLabels.ttl", "wt") + +for i, r in enumerate(reads): + with open(r["path"], "rt") as fa: + label = fa.readline() + original_labels.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subjects[i], label[1:].strip().replace('"', '\\"'))) + relabeled_fasta.write(">"+subjects[i]+"\n") + data = fa.read(8096) + while data: + relabeled_fasta.write(data) + endswithnewline = data.endswith("\n") + data = fa.read(8096) + if not endswithnewline: + relabeled_fasta.write("\n") |