From 81daa0acc4c4a0827e1c1198f054f4d4dacd1daf Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 18 Nov 2020 17:03:01 -0500 Subject: Add query-to-gfa workflow Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .../arvados-and-samtools-dockerfile/1078ECD7.key | 30 ++++++++++++ .../arvados-and-samtools-dockerfile/Dockerfile | 10 ++++ workflows/pangenome-generate/collect-seqs.cwl | 3 +- workflows/pangenome-generate/collect-seqs.py | 2 + .../pangenome-generate/pangenome-generate_spoa.cwl | 54 +++++++++++----------- workflows/pangenome-generate/query-to-gfa.cwl | 32 +++++++++++++ 6 files changed, 103 insertions(+), 28 deletions(-) create mode 100644 workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key create mode 100644 workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile create mode 100644 workflows/pangenome-generate/query-to-gfa.cwl diff --git a/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key new file mode 100644 index 0000000..edc62f4 --- /dev/null +++ b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/1078ECD7.key @@ -0,0 +1,30 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA +ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE +a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg +ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2 +NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA +wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j +IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J +ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW +WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE +TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx +IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27 +HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm +QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d +Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7 +wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY +/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3 +b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs +NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S +OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866 +8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1 +3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm +kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX +DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/ +ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1 +2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr +yPet6wIKrtLGhAqZ +=CLkG +-----END PGP PUBLIC KEY BLOCK----- diff --git a/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile new file mode 100644 index 0000000..6f857ac --- /dev/null +++ b/workflows/pangenome-generate/arvados-and-samtools-dockerfile/Dockerfile @@ -0,0 +1,10 @@ +FROM debian:10 +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update -q +RUN apt-get install -yq --no-install-recommends gnupg +ADD 1078ECD7.key /tmp/ +RUN cat /tmp/1078ECD7.key | apt-key add - +RUN echo 'deb http://apt.arvados.org/ buster main' > /etc/apt/sources.list.d/apt.arvados.org-stable.list +RUN apt-get update -q && apt-get install -yq --no-install-recommends samtools python3-python-client +RUN rm -f /usr/bin/python && ln -s /usr/share/python3/dist/python3-python-client/bin/python /usr/bin/python +RUN rm -f /usr/bin/python3 && ln -s /usr/share/python3/dist/python3-python-client/bin/python /usr/bin/python3 diff --git a/workflows/pangenome-generate/collect-seqs.cwl b/workflows/pangenome-generate/collect-seqs.cwl index 635108f..268a08c 100644 --- a/workflows/pangenome-generate/collect-seqs.cwl +++ b/workflows/pangenome-generate/collect-seqs.cwl @@ -8,7 +8,7 @@ requirements: arv:RuntimeConstraints: outputDirType: keep_output_dir DockerRequirement: - dockerPull: arvados/jobs:2.0.3 + dockerImageId: arvados-and-samtools WorkReuse: enableReuse: false ResourceRequirement: @@ -36,6 +36,7 @@ outputs: type: File outputBinding: glob: relabeledSeqs.fasta + secondaryFiles: [.fai] mergedMetadata: type: File outputBinding: diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py index cb5bc33..9a89549 100644 --- a/workflows/pangenome-generate/collect-seqs.py +++ b/workflows/pangenome-generate/collect-seqs.py @@ -3,6 +3,7 @@ import arvados import json import shutil import logging +import subprocess import arvados.collection import ruamel.yaml import schema_salad.schema @@ -71,5 +72,6 @@ for item in validated: except Exception as e: logging.exception("Error processing collection %s" % uuid) +subprocess.run(["samtools", "faidx", "relabeledSeqs.fasta"]) shutil.rmtree(".cache") diff --git a/workflows/pangenome-generate/pangenome-generate_spoa.cwl b/workflows/pangenome-generate/pangenome-generate_spoa.cwl index fdf3d9f..a640d15 100644 --- a/workflows/pangenome-generate/pangenome-generate_spoa.cwl +++ b/workflows/pangenome-generate/pangenome-generate_spoa.cwl @@ -19,15 +19,15 @@ outputs: odgiGraph: type: File outputSource: buildGraph/odgiGraph -# odgiPNG: -# type: File -# outputSource: vizGraph/graph_image + odgiPNG: + type: File + outputSource: vizGraph/graph_image spoaGFA: type: File outputSource: induceGraph/spoaGFA - odgiRDF: - type: File - outputSource: odgi2rdf/rdf +# odgiRDF: +# type: File +# outputSource: odgi2rdf/rdf readsMergeDedupSortedByQualAndLen: type: File outputSource: dedup_and_sort_by_quality_and_len/sortedReadsFA @@ -54,27 +54,27 @@ steps: in: {inputGFA: induceGraph/spoaGFA} out: [odgiGraph] run: odgi-build-from-spoa-gfa.cwl - # vizGraph: - # in: - # sparse_graph_index: buildGraph/odgiGraph - # width: - # default: 50000 - # height: - # default: 500 - # path_per_row: - # default: true - # path_height: - # default: 4 - # out: [graph_image] - # requirements: - # ResourceRequirement: - # ramMin: $(15 * 1024) - # outdirMin: 10 - # run: ../tools/odgi/odgi_viz.cwl - odgi2rdf: - in: {odgi: buildGraph/odgiGraph} - out: [rdf] - run: odgi_to_rdf.cwl + vizGraph: + in: + sparse_graph_index: buildGraph/odgiGraph + width: + default: 50000 + height: + default: 500 + path_per_row: + default: true + path_height: + default: 4 + out: [graph_image] + requirements: + ResourceRequirement: + ramMin: $(15 * 1024) + outdirMin: 10 + run: ../tools/odgi/odgi_viz.cwl + # odgi2rdf: + # in: {odgi: buildGraph/odgiGraph} + # out: [rdf] + # run: odgi_to_rdf.cwl dups2metadata: in: metadata: metadata diff --git a/workflows/pangenome-generate/query-to-gfa.cwl b/workflows/pangenome-generate/query-to-gfa.cwl new file mode 100644 index 0000000..2f3a20a --- /dev/null +++ b/workflows/pangenome-generate/query-to-gfa.cwl @@ -0,0 +1,32 @@ +cwlVersion: v1.1 +class: Workflow +requirements: + SubworkflowFeatureRequirement: {} +inputs: + metadata: File + fasta: + type: File + secondaryFiles: [.fai] + query: string +outputs: + odgiGraph: + type: File + outputSource: make-gfa/odgiGraph + spoaGFA: + type: File + outputSource: make-gfa/spoaGFA + readsMergeDedupSortedByQualAndLen: + type: File + outputSource: make-gfa/readsMergeDedupSortedByQualAndLen + mergedMetadata: + type: File + outputSource: make-gfa/mergedMetadata +steps: + get-subset: + run: from_sparql.cwl + in: {metadata: metadata, query: query, fasta: fasta} + out: [selected] + make-gfa: + run: pangenome-generate_spoa.cwl + in: {metadata: metadata, seqs: get-subset/selected} + out: [odgiGraph, spoaGFA, readsMergeDedupSortedByQualAndLen, mergedMetadata] -- cgit v1.2.3