diff options
author | lltommy | 2020-11-11 09:56:12 +0100 |
---|---|---|
committer | lltommy | 2020-11-11 09:56:12 +0100 |
commit | d6aa323b6fc7a82e45cc1df51fc72c2d547146eb (patch) | |
tree | 6e8b77bde4dc34fab3fa8804906f3cb821f61dae /workflows | |
parent | c5fe5de7e4c77bfb48b1ae2f662c2d9cc120c06e (diff) | |
parent | c872248e43c1c66e5fed8ef341f7b4ac21d63e6f (diff) | |
download | bh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.tar.gz bh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.tar.lz bh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.zip |
Merge branch 'master' of https://github.com/arvados/bh20-seq-resource
Diffstat (limited to 'workflows')
-rw-r--r-- | workflows/pangenome-generate/arv-main.cwl | 2 | ||||
-rw-r--r-- | workflows/pangenome-generate/collect-seqs.cwl | 2 | ||||
-rw-r--r-- | workflows/pangenome-generate/collect-seqs.py | 5 | ||||
-rw-r--r-- | workflows/pangenome-generate/from_sparql.cwl | 23 | ||||
-rw-r--r-- | workflows/pangenome-generate/from_sparql.py | 8 | ||||
-rwxr-xr-x | workflows/update-workflows.sh | 2 |
6 files changed, 38 insertions, 4 deletions
diff --git a/workflows/pangenome-generate/arv-main.cwl b/workflows/pangenome-generate/arv-main.cwl index dae47e6..1d71ee3 100644 --- a/workflows/pangenome-generate/arv-main.cwl +++ b/workflows/pangenome-generate/arv-main.cwl @@ -36,7 +36,7 @@ steps: run: collect-seqs.cwl in: src_project: src_project - schema: metadataSchema + metadataSchema: metadataSchema exclude: exclude out: [relabeledSeqs, mergedMetadata] pangenome-generate: diff --git a/workflows/pangenome-generate/collect-seqs.cwl b/workflows/pangenome-generate/collect-seqs.cwl index 3511df1..635108f 100644 --- a/workflows/pangenome-generate/collect-seqs.cwl +++ b/workflows/pangenome-generate/collect-seqs.cwl @@ -25,7 +25,7 @@ inputs: src_project: type: string inputBinding: {position: 2} - schema: + metadataSchema: type: File inputBinding: {position: 3} exclude: diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py index af4a0dc..225a61f 100644 --- a/workflows/pangenome-generate/collect-seqs.py +++ b/workflows/pangenome-generate/collect-seqs.py @@ -36,11 +36,14 @@ if len(sys.argv) > 3: for item in validated: pdh = item["portable_data_hash"] + uuid = item["uuid"] with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col: with col.open("sequence.fasta", "rt") as fa: - subject = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % pdh + subject = "http://covid19.genenetwork.org/resource/%s" % uuid label = fa.readline().strip() merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"'))) + merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/collection_pdh> \"%s\" .\n" % (subject, pdh)) + merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/collection_version> \"%s\" .\n" % (subject, item["version"])) skip = (subject in blacklist or label[1:] in blacklist) if skip: merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/excluded_from_graph> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean> .\n" % subject) diff --git a/workflows/pangenome-generate/from_sparql.cwl b/workflows/pangenome-generate/from_sparql.cwl new file mode 100644 index 0000000..5bc0792 --- /dev/null +++ b/workflows/pangenome-generate/from_sparql.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.1 +class: CommandLineTool +requirements: + DockerRequirement: + dockerFile: | + FROM debian:10 + RUN apt-get update && apt-get -yq --no-install-recommends install samtools python3-rdflib + dockerImageId: rdflib-and-samtools +inputs: + script: + type: File + default: + class: File + location: from_sparql.py + metadata: File + fasta: + type: File + secondaryFiles: [.fai] + query: string +stdout: selected.fasta +outputs: + selected: stdout +arguments: [python3, $(inputs.script), $(inputs.metadata), $(inputs.fasta), $(inputs.query)] diff --git a/workflows/pangenome-generate/from_sparql.py b/workflows/pangenome-generate/from_sparql.py new file mode 100644 index 0000000..4610cad --- /dev/null +++ b/workflows/pangenome-generate/from_sparql.py @@ -0,0 +1,8 @@ +from rdflib import Graph +import sys +import subprocess +g = Graph() +g.parse(sys.argv[1], format="nt") +res = g.query(sys.argv[3]) +for r in res: + subprocess.run(["samtools", "faidx", sys.argv[2], r[0]]) diff --git a/workflows/update-workflows.sh b/workflows/update-workflows.sh index 3b69a58..5182ec4 100755 --- a/workflows/update-workflows.sh +++ b/workflows/update-workflows.sh @@ -1,3 +1,3 @@ #!/bin/sh arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-2zp9q4jo5xpif9y fastq2fasta/fastq2fasta.cwl -arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/arv-main.cwl +arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/collect-seqs.cwl |