From 0b0fb1c8a68df989bb2e1f593d717ac62e31d952 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 19 Oct 2020 21:04:19 -0400 Subject: Extract subset of the all-sequences fasta by running a sparql query. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- workflows/pangenome-generate/from_sparql.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 workflows/pangenome-generate/from_sparql.py (limited to 'workflows/pangenome-generate/from_sparql.py') diff --git a/workflows/pangenome-generate/from_sparql.py b/workflows/pangenome-generate/from_sparql.py new file mode 100644 index 0000000..4610cad --- /dev/null +++ b/workflows/pangenome-generate/from_sparql.py @@ -0,0 +1,8 @@ +from rdflib import Graph +import sys +import subprocess +g = Graph() +g.parse(sys.argv[1], format="nt") +res = g.query(sys.argv[3]) +for r in res: + subprocess.run(["samtools", "faidx", sys.argv[2], r[0]]) -- cgit v1.2.3