about summary refs log tree commit diff
path: root/workflows
diff options
context:
space:
mode:
Diffstat (limited to 'workflows')
-rw-r--r--workflows/pangenome-generate/arv-main.cwl2
-rw-r--r--workflows/pangenome-generate/collect-seqs.cwl2
-rw-r--r--workflows/pangenome-generate/collect-seqs.py2
-rw-r--r--workflows/pangenome-generate/from_sparql.cwl23
-rw-r--r--workflows/pangenome-generate/from_sparql.py8
-rwxr-xr-xworkflows/update-workflows.sh2
6 files changed, 35 insertions, 4 deletions
diff --git a/workflows/pangenome-generate/arv-main.cwl b/workflows/pangenome-generate/arv-main.cwl
index dae47e6..1d71ee3 100644
--- a/workflows/pangenome-generate/arv-main.cwl
+++ b/workflows/pangenome-generate/arv-main.cwl
@@ -36,7 +36,7 @@ steps:
     run: collect-seqs.cwl
     in:
       src_project: src_project
-      schema: metadataSchema
+      metadataSchema: metadataSchema
       exclude: exclude
     out: [relabeledSeqs, mergedMetadata]
   pangenome-generate:
diff --git a/workflows/pangenome-generate/collect-seqs.cwl b/workflows/pangenome-generate/collect-seqs.cwl
index 3511df1..635108f 100644
--- a/workflows/pangenome-generate/collect-seqs.cwl
+++ b/workflows/pangenome-generate/collect-seqs.cwl
@@ -25,7 +25,7 @@ inputs:
   src_project:
     type: string
     inputBinding: {position: 2}
-  schema:
+  metadataSchema:
     type: File
     inputBinding: {position: 3}
   exclude:
diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py
index af4a0dc..1a0807c 100644
--- a/workflows/pangenome-generate/collect-seqs.py
+++ b/workflows/pangenome-generate/collect-seqs.py
@@ -38,7 +38,7 @@ for item in validated:
     pdh = item["portable_data_hash"]
     with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col:
         with col.open("sequence.fasta", "rt") as fa:
-            subject = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % pdh
+            subject = "http://covid19.genenetwork.org/resource/%s" % pdh
             label = fa.readline().strip()
             merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"')))
             skip = (subject in blacklist or label[1:] in blacklist)
diff --git a/workflows/pangenome-generate/from_sparql.cwl b/workflows/pangenome-generate/from_sparql.cwl
new file mode 100644
index 0000000..5bc0792
--- /dev/null
+++ b/workflows/pangenome-generate/from_sparql.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.1
+class: CommandLineTool
+requirements:
+  DockerRequirement:
+    dockerFile: |
+      FROM debian:10
+      RUN apt-get update && apt-get -yq --no-install-recommends install samtools python3-rdflib
+    dockerImageId: rdflib-and-samtools
+inputs:
+  script:
+    type: File
+    default:
+      class: File
+      location: from_sparql.py
+  metadata: File
+  fasta:
+    type: File
+    secondaryFiles: [.fai]
+  query: string
+stdout: selected.fasta
+outputs:
+  selected: stdout
+arguments: [python3, $(inputs.script), $(inputs.metadata), $(inputs.fasta), $(inputs.query)]
diff --git a/workflows/pangenome-generate/from_sparql.py b/workflows/pangenome-generate/from_sparql.py
new file mode 100644
index 0000000..4610cad
--- /dev/null
+++ b/workflows/pangenome-generate/from_sparql.py
@@ -0,0 +1,8 @@
+from rdflib import Graph
+import sys
+import subprocess
+g = Graph()
+g.parse(sys.argv[1], format="nt")
+res = g.query(sys.argv[3])
+for r in res:
+    subprocess.run(["samtools", "faidx", sys.argv[2], r[0]])
diff --git a/workflows/update-workflows.sh b/workflows/update-workflows.sh
index 3b69a58..5182ec4 100755
--- a/workflows/update-workflows.sh
+++ b/workflows/update-workflows.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-2zp9q4jo5xpif9y fastq2fasta/fastq2fasta.cwl
-arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/arv-main.cwl
+arvados-cwl-runner --project-uuid=lugli-j7d0g-5hswinmpyho8dju --update-workflow=lugli-7fd4e-mqfu9y3ofnpnho1 pangenome-generate/collect-seqs.cwl