aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/dups2metadata.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-08-19 15:19:07 -0400
committerPeter Amstutz2020-08-19 16:31:38 -0400
commitb3d2ccf951903ac0b7d717357fb1cccca26fbd15 (patch)
tree92adcacbd208546bf3f063e6c25765e660972244 /workflows/pangenome-generate/dups2metadata.py
parent592c921a3223c03d8a22f7a852641ac5d753fb31 (diff)
downloadbh20-seq-resource-b3d2ccf951903ac0b7d717357fb1cccca26fbd15.tar.gz
bh20-seq-resource-b3d2ccf951903ac0b7d717357fb1cccca26fbd15.tar.lz
bh20-seq-resource-b3d2ccf951903ac0b7d717357fb1cccca26fbd15.zip
Consolidate steps to scale graph generation workflow
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate/dups2metadata.py')
-rw-r--r--workflows/pangenome-generate/dups2metadata.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/dups2metadata.py b/workflows/pangenome-generate/dups2metadata.py
new file mode 100644
index 0000000..9bda10a
--- /dev/null
+++ b/workflows/pangenome-generate/dups2metadata.py
@@ -0,0 +1,17 @@
+import sys
+
+md = open(sys.argv[1], "rt")
+for d in md:
+ print(d)
+
+if len(sys.argv) < 3:
+ exit(0)
+
+sameseqs = open(sys.argv[2], "rt")
+for d in sameseqs:
+ logging.warn(d)
+ g = re.match(r"\d+\t(.*)", d)
+ logging.warn("%s", g.group(1))
+ sp = g.group(1).split(",")
+ for n in sp[1:]:
+ print("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> ." % (n.strip(), sp[0].strip()))