aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-11-09 17:20:27 -0500
committerPeter Amstutz2020-11-09 17:20:27 -0500
commit1082b907d816f5da52aba6233073737632d0242f (patch)
tree8ad59d268234bbea42b3d6b42bd2d352eb56df28
parentb5143c79de268b844f3a6a63d92c6389b047f35e (diff)
downloadbh20-seq-resource-1082b907d816f5da52aba6233073737632d0242f.tar.gz
bh20-seq-resource-1082b907d816f5da52aba6233073737632d0242f.tar.lz
bh20-seq-resource-1082b907d816f5da52aba6233073737632d0242f.zip
Make resource link work for both portable data hashes and sample id
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
-rw-r--r--bh20simplewebuploader/main.py14
-rw-r--r--workflows/pangenome-generate/collect-seqs.py2
2 files changed, 15 insertions, 1 deletions
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 73503b4..405544c 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -675,13 +675,27 @@ sparqlURL='http://sparql.genenetwork.org/sparql/'
@app.route('/resource/<id>')
def resource(id):
"""Get a COVID19 resource using identifier"""
+
query=f"""
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
PREFIX sio: <http://semanticscience.org/resource/>
select distinct ?sample ?geoname ?date ?source ?geo ?sampletype ?institute ?sequenceuri
+where {{
{{
?sample sio:SIO_000115 "{id}" .
?sequenceuri pubseq:sample ?sample .
+}}
+union
+{{
+ <http://collections.lugli.arvadosapi.com/c={id}/sequence.fasta> pubseq:sample ?sample .
+ ?sequenceuri pubseq:sample ?sample .
+}}
+union
+{{
+ <http://covid19.genenetwork.org/resource/{id}> pubseq:sample ?sample .
+ ?sequenceuri pubseq:sample ?sample .
+}}
+
?sample <http://purl.obolibrary.org/obo/GAZ_00000448> ?geo .
?geo rdfs:label ?geoname .
?sample <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164> ?date .
diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py
index af4a0dc..1a0807c 100644
--- a/workflows/pangenome-generate/collect-seqs.py
+++ b/workflows/pangenome-generate/collect-seqs.py
@@ -38,7 +38,7 @@ for item in validated:
pdh = item["portable_data_hash"]
with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col:
with col.open("sequence.fasta", "rt") as fa:
- subject = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % pdh
+ subject = "http://covid19.genenetwork.org/resource/%s" % pdh
label = fa.readline().strip()
merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"')))
skip = (subject in blacklist or label[1:] in blacklist)