aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-11-18 17:03:01 -0500
committerPeter Amstutz2020-11-18 17:05:12 -0500
commit81daa0acc4c4a0827e1c1198f054f4d4dacd1daf (patch)
treeb1370c876394e36cb65947dbb37b094536d117ed
parentee8ea88dd6b372143c6b08f56856666ce2d0b622 (diff)
downloadbh20-seq-resource-81daa0acc4c4a0827e1c1198f054f4d4dacd1daf.tar.gz
bh20-seq-resource-81daa0acc4c4a0827e1c1198f054f4d4dacd1daf.tar.lz
bh20-seq-resource-81daa0acc4c4a0827e1c1198f054f4d4dacd1daf.zip
Add query-to-gfa workflow
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
-rw-r--r--workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key30
-rw-r--r--workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile10
-rw-r--r--workflows/pangenome-generate/collect-seqs.cwl3
-rw-r--r--workflows/pangenome-generate/collect-seqs.py2
-rw-r--r--workflows/pangenome-generate/pangenome-generate_spoa.cwl54
-rw-r--r--workflows/pangenome-generate/query-to-gfa.cwl32
6 files changed, 103 insertions, 28 deletions
diff --git a/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key
new file mode 100644
index 0000000..edc62f4
--- /dev/null
+++ b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key
@@ -0,0 +1,30 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA
+ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE
+a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg
+ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2
+NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA
+wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j
+IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J
+ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW
+WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE
+TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx
+IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27
+HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm
+QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d
+Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7
+wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY
+/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3
+b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs
+NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S
+OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866
+8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1
+3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm
+kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX
+DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/
+ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1
+2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr
+yPet6wIKrtLGhAqZ
+=CLkG
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile
new file mode 100644
index 0000000..6f857ac
--- /dev/null
+++ b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile
@@ -0,0 +1,10 @@
+FROM debian:10
+ENV DEBIAN_FRONTEND noninteractive
+RUN apt-get update -q
+RUN apt-get install -yq --no-install-recommends gnupg
+ADD 1078ECD7.key /tmp/
+RUN cat /tmp/1078ECD7.key | apt-key add -
+RUN echo 'deb http://apt.arvados.org/ buster main' > /etc/apt/sources.list.d/apt.arvados.org-stable.list
+RUN apt-get update -q && apt-get install -yq --no-install-recommends samtools python3-python-client
+RUN rm -f /usr/bin/python && ln -s /usr/share/python3/dist/python3-python-client/bin/python /usr/bin/python
+RUN rm -f /usr/bin/python3 && ln -s /usr/share/python3/dist/python3-python-client/bin/python /usr/bin/python3
diff --git a/workflows/pangenome-generate/collect-seqs.cwl b/workflows/pangenome-generate/collect-seqs.cwl
index 635108f..268a08c 100644
--- a/workflows/pangenome-generate/collect-seqs.cwl
+++ b/workflows/pangenome-generate/collect-seqs.cwl
@@ -8,7 +8,7 @@ requirements:
arv:RuntimeConstraints:
outputDirType: keep_output_dir
DockerRequirement:
- dockerPull: arvados/jobs:2.0.3
+ dockerImageId: arvados-and-samtools
WorkReuse:
enableReuse: false
ResourceRequirement:
@@ -36,6 +36,7 @@ outputs:
type: File
outputBinding:
glob: relabeledSeqs.fasta
+ secondaryFiles: [.fai]
mergedMetadata:
type: File
outputBinding:
diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py
index cb5bc33..9a89549 100644
--- a/workflows/pangenome-generate/collect-seqs.py
+++ b/workflows/pangenome-generate/collect-seqs.py
@@ -3,6 +3,7 @@ import arvados
import json
import shutil
import logging
+import subprocess
import arvados.collection
import ruamel.yaml
import schema_salad.schema
@@ -71,5 +72,6 @@ for item in validated:
except Exception as e:
logging.exception("Error processing collection %s" % uuid)
+subprocess.run(["samtools", "faidx", "relabeledSeqs.fasta"])
shutil.rmtree(".cache")
diff --git a/workflows/pangenome-generate/pangenome-generate_spoa.cwl b/workflows/pangenome-generate/pangenome-generate_spoa.cwl
index fdf3d9f..a640d15 100644
--- a/workflows/pangenome-generate/pangenome-generate_spoa.cwl
+++ b/workflows/pangenome-generate/pangenome-generate_spoa.cwl
@@ -19,15 +19,15 @@ outputs:
odgiGraph:
type: File
outputSource: buildGraph/odgiGraph
-# odgiPNG:
-# type: File
-# outputSource: vizGraph/graph_image
+ odgiPNG:
+ type: File
+ outputSource: vizGraph/graph_image
spoaGFA:
type: File
outputSource: induceGraph/spoaGFA
- odgiRDF:
- type: File
- outputSource: odgi2rdf/rdf
+# odgiRDF:
+# type: File
+# outputSource: odgi2rdf/rdf
readsMergeDedupSortedByQualAndLen:
type: File
outputSource: dedup_and_sort_by_quality_and_len/sortedReadsFA
@@ -54,27 +54,27 @@ steps:
in: {inputGFA: induceGraph/spoaGFA}
out: [odgiGraph]
run: odgi-build-from-spoa-gfa.cwl
- # vizGraph:
- # in:
- # sparse_graph_index: buildGraph/odgiGraph
- # width:
- # default: 50000
- # height:
- # default: 500
- # path_per_row:
- # default: true
- # path_height:
- # default: 4
- # out: [graph_image]
- # requirements:
- # ResourceRequirement:
- # ramMin: $(15 * 1024)
- # outdirMin: 10
- # run: ../tools/odgi/odgi_viz.cwl
- odgi2rdf:
- in: {odgi: buildGraph/odgiGraph}
- out: [rdf]
- run: odgi_to_rdf.cwl
+ vizGraph:
+ in:
+ sparse_graph_index: buildGraph/odgiGraph
+ width:
+ default: 50000
+ height:
+ default: 500
+ path_per_row:
+ default: true
+ path_height:
+ default: 4
+ out: [graph_image]
+ requirements:
+ ResourceRequirement:
+ ramMin: $(15 * 1024)
+ outdirMin: 10
+ run: ../tools/odgi/odgi_viz.cwl
+ # odgi2rdf:
+ # in: {odgi: buildGraph/odgiGraph}
+ # out: [rdf]
+ # run: odgi_to_rdf.cwl
dups2metadata:
in:
metadata: metadata
diff --git a/workflows/pangenome-generate/query-to-gfa.cwl b/workflows/pangenome-generate/query-to-gfa.cwl
new file mode 100644
index 0000000..2f3a20a
--- /dev/null
+++ b/workflows/pangenome-generate/query-to-gfa.cwl
@@ -0,0 +1,32 @@
+cwlVersion: v1.1
+class: Workflow
+requirements:
+ SubworkflowFeatureRequirement: {}
+inputs:
+ metadata: File
+ fasta:
+ type: File
+ secondaryFiles: [.fai]
+ query: string
+outputs:
+ odgiGraph:
+ type: File
+ outputSource: make-gfa/odgiGraph
+ spoaGFA:
+ type: File
+ outputSource: make-gfa/spoaGFA
+ readsMergeDedupSortedByQualAndLen:
+ type: File
+ outputSource: make-gfa/readsMergeDedupSortedByQualAndLen
+ mergedMetadata:
+ type: File
+ outputSource: make-gfa/mergedMetadata
+steps:
+ get-subset:
+ run: from_sparql.cwl
+ in: {metadata: metadata, query: query, fasta: fasta}
+ out: [selected]
+ make-gfa:
+ run: pangenome-generate_spoa.cwl
+ in: {metadata: metadata, seqs: get-subset/selected}
+ out: [odgiGraph, spoaGFA, readsMergeDedupSortedByQualAndLen, mergedMetadata]