aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/dups2metadata.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-08-19 15:15:57 -0400
committerPeter Amstutz2020-08-19 16:31:38 -0400
commitd1e8809a15ae74d0b847abb80f9f63f53078e1d6 (patch)
tree77a62801b9e8928889d85dadccf1ffce85b2e0cb /workflows/pangenome-generate/dups2metadata.py
parentb3d2ccf951903ac0b7d717357fb1cccca26fbd15 (diff)
downloadbh20-seq-resource-d1e8809a15ae74d0b847abb80f9f63f53078e1d6.tar.gz
bh20-seq-resource-d1e8809a15ae74d0b847abb80f9f63f53078e1d6.tar.lz
bh20-seq-resource-d1e8809a15ae74d0b847abb80f9f63f53078e1d6.zip
Scaling pangenome generation
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate/dups2metadata.py')
-rw-r--r--workflows/pangenome-generate/dups2metadata.py7
1 files changed, 3 insertions, 4 deletions
diff --git a/workflows/pangenome-generate/dups2metadata.py b/workflows/pangenome-generate/dups2metadata.py
index 9bda10a..89e7236 100644
--- a/workflows/pangenome-generate/dups2metadata.py
+++ b/workflows/pangenome-generate/dups2metadata.py
@@ -1,17 +1,16 @@
import sys
+import re
md = open(sys.argv[1], "rt")
for d in md:
- print(d)
+ sys.stdout.write(d)
if len(sys.argv) < 3:
exit(0)
sameseqs = open(sys.argv[2], "rt")
for d in sameseqs:
- logging.warn(d)
g = re.match(r"\d+\t(.*)", d)
- logging.warn("%s", g.group(1))
sp = g.group(1).split(",")
for n in sp[1:]:
- print("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> ." % (n.strip(), sp[0].strip()))
+ sys.stdout.write("<%s> <http://biohackathon.org/bh20-seq-schema/has_duplicate_sequence> <%s> .\n" % (n.strip(), sp[0].strip()))