aboutsummaryrefslogtreecommitdiff
path: root/bh20simplewebuploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-11-09 17:32:05 -0500
committerGitHub2020-11-09 17:32:05 -0500
commit74d46196ae69c0c557f64593910ada48d84b3654 (patch)
tree57c7df946e1aa1f2054eb49379439ebee4d4c0e6 /bh20simplewebuploader
parentb311e2ec0f1d02cf16152855dd8bdd760ed4578b (diff)
parent98a80bd64d8a495b8fddffdef6e07e5a3fbea1e3 (diff)
downloadbh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.tar.gz
bh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.tar.lz
bh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.zip
Merge pull request #105 from arvados/fasta-subset-from-query
Extract subset of the all-sequences fasta by running a sparql query. Also includes some tweaks to the permalinks.
Diffstat (limited to 'bh20simplewebuploader')
-rw-r--r--bh20simplewebuploader/main.py32
-rw-r--r--bh20simplewebuploader/templates/resource.html4
2 files changed, 31 insertions, 5 deletions
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 73503b4..51048a4 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -675,13 +675,27 @@ sparqlURL='http://sparql.genenetwork.org/sparql/'
@app.route('/resource/<id>')
def resource(id):
"""Get a COVID19 resource using identifier"""
+
query=f"""
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
PREFIX sio: <http://semanticscience.org/resource/>
select distinct ?sample ?geoname ?date ?source ?geo ?sampletype ?institute ?sequenceuri
+where {{
{{
?sample sio:SIO_000115 "{id}" .
?sequenceuri pubseq:sample ?sample .
+}}
+union
+{{
+ <http://collections.lugli.arvadosapi.com/c={id}/sequence.fasta> pubseq:sample ?sample .
+ ?sequenceuri pubseq:sample ?sample .
+}}
+union
+{{
+ <http://covid19.genenetwork.org/resource/{id}> pubseq:sample ?sample .
+ ?sequenceuri pubseq:sample ?sample .
+}}
+
?sample <http://purl.obolibrary.org/obo/GAZ_00000448> ?geo .
?geo rdfs:label ?geoname .
?sample <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164> ?date .
@@ -699,8 +713,9 @@ select distinct ?sample ?geoname ?date ?source ?geo ?sampletype ?institute ?sequ
logging.info("^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
# return jsonify({'sequences': int(result[0]["num"]["value"])})
sequenceuri=sample['sequenceuri']['value']
- collectionuri=sequenceuri.split('sequence.fasta')[0]
- metauri=collectionuri+'metadata.yaml'
+ m = re.match(r"http://collections.lugli.arvadosapi.com/c=([^/]*)/sequence.fasta|http://covid19.genenetwork.org/resource/(.*)", sequenceuri)
+ fastauri = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % m.group(1)
+ metauri = "http://collections.lugli.arvadosapi.com/c=%s/metadata.yaml" % m.group(1)
locationuri=sample['geo']['value']
location=sample['geoname']['value']
date=sample['date']['value']
@@ -715,7 +730,18 @@ select distinct ?sample ?geoname ?date ?source ?geo ?sampletype ?institute ?sequ
institute=''
if 'institute' in sample:
institute=sample['institute']['value']
- return render_template('permalink.html',id=id,menu='',uri=f"http://covid19.genenetwork.org/resource/{id}",sequenceuri=sequenceuri,locationuri=locationuri,location=location,date=date,source=source,sampletype=sampletype,institute=institute,collectionuri=collectionuri,metauri=metauri)
+ return render_template('permalink.html',
+ id=id,
+ menu='',
+ uri=f"http://covid19.genenetwork.org/resource/{id}",
+ sequenceuri=fastauri,
+ locationuri=locationuri,
+ location=location,
+ date=date,
+ source=source,
+ sampletype=sampletype,
+ institute=institute,
+ metauri=metauri)
# http://covid19.genenetwork.org/location?label=http://www.wikidata.org/entity/Q114
# http://localhost:5067/location?label=http://www.wikidata.org/entity/Q114
diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html
index fc52f13..4c50fb9 100644
--- a/bh20simplewebuploader/templates/resource.html
+++ b/bh20simplewebuploader/templates/resource.html
@@ -10,8 +10,8 @@
<p><a href="https://workbench.lugli.arvadosapi.com/projects/lugli-j7d0g-5ct8p1i1wrgyjvp#Data_collections">All sequences project</a></p>
<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs.sorted_by_quality_and_len.fasta">All sequences (FASTA) relabled and deduplicated</a></p>
<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/mergedmetadata.ttl">Metadata (RDF) for all sequences</a></p>
- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs.sorted_by_quality_and_len.g6.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p>
- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs.sorted_by_quality_and_len.g6.unchop.sorted.odgi">All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI)</a> - <a href="https://github.com/vgteam/odgi">More about ODGI</a></p>
+ <!-- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs.sorted_by_quality_and_len.g6.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p> -->
+ <!-- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs.sorted_by_quality_and_len.g6.unchop.sorted.odgi">All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI)</a> - <a href="https://github.com/vgteam/odgi">More about ODGI</a></p> -->
<!-- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p> -->