From b3d2ccf951903ac0b7d717357fb1cccca26fbd15 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 19 Aug 2020 15:19:07 -0400 Subject: Consolidate steps to scale graph generation workflow Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- workflows/pangenome-generate/dups2metadata.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 workflows/pangenome-generate/dups2metadata.py (limited to 'workflows/pangenome-generate/dups2metadata.py') diff --git a/workflows/pangenome-generate/dups2metadata.py b/workflows/pangenome-generate/dups2metadata.py new file mode 100644 index 0000000..9bda10a --- /dev/null +++ b/workflows/pangenome-generate/dups2metadata.py @@ -0,0 +1,17 @@ +import sys + +md = open(sys.argv[1], "rt") +for d in md: + print(d) + +if len(sys.argv) < 3: + exit(0) + +sameseqs = open(sys.argv[2], "rt") +for d in sameseqs: + logging.warn(d) + g = re.match(r"\d+\t(.*)", d) + logging.warn("%s", g.group(1)) + sp = g.group(1).split(",") + for n in sp[1:]: + print("<%s> <%s> ." % (n.strip(), sp[0].strip())) -- cgit v1.2.3