diff options
Diffstat (limited to 'workflows/pangenome-generate/relabel-seqs.py')
-rw-r--r-- | workflows/pangenome-generate/relabel-seqs.py | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/workflows/pangenome-generate/relabel-seqs.py b/workflows/pangenome-generate/relabel-seqs.py index 6b022a0..25b4a08 100644 --- a/workflows/pangenome-generate/relabel-seqs.py +++ b/workflows/pangenome-generate/relabel-seqs.py @@ -1,5 +1,6 @@ import os import json +import sys def readitems(stem): items = [] @@ -16,15 +17,26 @@ subjects = readitems("subs") relabeled_fasta = open("relabeledSeqs.fasta", "wt") original_labels = open("originalLabels.ttl", "wt") +blacklist = set() +if len(sys.argv) > 1: + with open(sys.argv[1]) as bl: + for l in bl: + blacklist.add(l.strip()) + for i, r in enumerate(reads): with open(r["path"], "rt") as fa: - label = fa.readline() - original_labels.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subjects[i], label[1:].strip().replace('"', '\\"'))) - relabeled_fasta.write(">"+subjects[i]+"\n") + label = fa.readline().strip() + original_labels.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subjects[i], label[1:].replace('"', '\\"'))) + skip = (subjects[i] in blacklist or label[1:] in blacklist) + if skip: + original_labels.write("<%s> <http://biohackathon.org/bh20-seq-schema/excluded_from_graph> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean> .\n" % (subjects[i])) + if not skip: + relabeled_fasta.write(">"+subjects[i]+"\n") data = fa.read(8096) while data: - relabeled_fasta.write(data) + if not skip: + relabeled_fasta.write(data) endswithnewline = data.endswith("\n") data = fa.read(8096) - if not endswithnewline: + if not skip and not endswithnewline: relabeled_fasta.write("\n") |