aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/dups2metadata.py
blob: 89e72366bbf57809c72f3fae45840010abd6bfbd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import sys
import re

md = open(sys.argv[1], "rt")
for d in md:
    sys.stdout.write(d)

if len(sys.argv) < 3:
    exit(0)

sameseqs = open(sys.argv[2], "rt")
for d in sameseqs:
    g = re.match(r"\d+\t(.*)", d)
    sp = g.group(1).split(",")
    for n in sp[1:]:
        sys.stdout.write("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> .\n" % (n.strip(), sp[0].strip()))