aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate
diff options
context:
space:
mode:
authorPeter Amstutz2020-10-19 21:04:19 -0400
committerPeter Amstutz2020-11-09 16:45:33 -0500
commit0b0fb1c8a68df989bb2e1f593d717ac62e31d952 (patch)
tree0237427fed0c4919870c39a5026f5d3af52ed1e9 /workflows/pangenome-generate
parentb311e2ec0f1d02cf16152855dd8bdd760ed4578b (diff)
downloadbh20-seq-resource-0b0fb1c8a68df989bb2e1f593d717ac62e31d952.tar.gz
bh20-seq-resource-0b0fb1c8a68df989bb2e1f593d717ac62e31d952.tar.lz
bh20-seq-resource-0b0fb1c8a68df989bb2e1f593d717ac62e31d952.zip
Extract subset of the all-sequences fasta by running a sparql query.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'workflows/pangenome-generate')
-rw-r--r--workflows/pangenome-generate/from_sparql.cwl23
-rw-r--r--workflows/pangenome-generate/from_sparql.py8
2 files changed, 31 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/from_sparql.cwl b/workflows/pangenome-generate/from_sparql.cwl
new file mode 100644
index 0000000..5bc0792
--- /dev/null
+++ b/workflows/pangenome-generate/from_sparql.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+ DockerRequirement:
+ dockerFile: |
+ FROM debian:10
+ RUN apt-get update && apt-get -yq --no-install-recommends install samtools python3-rdflib
+ dockerImageId: rdflib-and-samtools
+inputs:
+ script:
+ type: File
+ default:
+ class: File
+ location: from_sparql.py
+ metadata: File
+ fasta:
+ type: File
+ secondaryFiles: [.fai]
+ query: string
+stdout: selected.fasta
+outputs:
+ selected: stdout
+arguments: [python3, $(inputs.script), $(inputs.metadata), $(inputs.fasta), $(inputs.query)]
diff --git a/workflows/pangenome-generate/from_sparql.py b/workflows/pangenome-generate/from_sparql.py
new file mode 100644
index 0000000..4610cad
--- /dev/null
+++ b/workflows/pangenome-generate/from_sparql.py
@@ -0,0 +1,8 @@
+from rdflib import Graph
+import sys
+import subprocess
+g = Graph()
+g.parse(sys.argv[1], format="nt")
+res = g.query(sys.argv[3])
+for r in res:
+ subprocess.run(["samtools", "faidx", sys.argv[2], r[0]])