diff options
author | Pjotr Prins | 2020-07-21 09:28:43 +0100 |
---|---|---|
committer | Pjotr Prins | 2020-07-21 09:28:43 +0100 |
commit | 56b5c444fd10cc569c4c0d7b76d034799ce679f9 (patch) | |
tree | 2ca3fdf27884c4dca9178a80f5916d04550209ea | |
parent | 2e96d0d87abd6357868114b0b59ee66b08985235 (diff) | |
download | bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.gz bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.lz bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.zip |
Working on search
-rw-r--r-- | bh20simplewebuploader/api.py | 37 | ||||
-rw-r--r-- | bh20simplewebuploader/main.py | 26 | ||||
-rw-r--r-- | bh20simplewebuploader/static/main.js | 4 | ||||
-rw-r--r-- | doc/web/export.org | 3 | ||||
-rw-r--r-- | test/rest-api.org | 35 |
5 files changed, 68 insertions, 37 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py index 8bd1a22..29fa2b8 100644 --- a/bh20simplewebuploader/api.py +++ b/bh20simplewebuploader/api.py @@ -1,10 +1,11 @@ # Public API for PubSeq -import sys +import os import requests +import sys from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify -from bh20simplewebuploader.main import app +from bh20simplewebuploader.main import app, baseURL @app.route('/api/version') def version(): @@ -14,3 +15,35 @@ def version(): def ebi_sample(id): page = render_template('ebi-sample.xml',**locals()) return page + +@app.route('/api/search', methods=['GET']) +def search(): + """ + Execute a 'global search' + """ + s = request.args.get('s') + if s == "": + s = "MT326090.1" + query = """ + PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> + PREFIX sio: <http://semanticscience.org/resource/> + PREFIX edam: <http://edamontology.org/> + select distinct ?id ?seq ?info + { + ?sample sio:SIO_000115 "%s" . + ?sample sio:SIO_000115 ?id . + ?seq pubseq:sample ?sample . + ?sample edam:data_2091 ?info . + } limit 100 + """ % s + payload = {'query': query, 'format': 'json'} + r = requests.get(baseURL, params=payload) + result = r.json()['results']['bindings'] + # metadata = file.name(seq)+"/metadata.yaml" + print(result) + return jsonify([{ + 'id': x['id']['value'], + 'fasta': x['seq']['value'], + 'collection': os.path.dirname(x['seq']['value']), + 'info': x['info']['value'], + } for x in result]) diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index c306749..62ec5cd 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -694,32 +694,6 @@ def getCountDB(): # print(result, file=sys.stderr) return jsonify({'sequences': int(result[0]["num"]["value"])}) -# Execute a 'global search' -@app.route('/api/search', methods=['GET']) -def search(): - s = request.args.get('s') - query = """ - PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> - PREFIX sio: <http://semanticscience.org/resource/> - PREFIX edam: <http://edamontology.org/> - select distinct ?id ?seq ?info - { - ?sample sio:SIO_000115 "%s" . - ?sample sio:SIO_000115 ?id . - ?seq pubseq:sample ?sample . - ?sample edam:data_2091 ?info . - } limit 100 - """ % s - payload = {'query': query, 'format': 'json'} - r = requests.get(baseURL, params=payload) - result = r.json()['results']['bindings'] - print(result,file=sys.stderr); - return jsonify([{ - 'id': x['id']['value'], - 'seq': x['seq']['value'], - 'info': x['info']['value'], - } for x in result]) - @app.route('/api/getAllaccessions', methods=['GET']) def getAllaccessions(): query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}""" diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js index a12311e..c0bc23f 100644 --- a/bh20simplewebuploader/static/main.js +++ b/bh20simplewebuploader/static/main.js @@ -19,11 +19,11 @@ function toDIVTable(rows) { html = '<div class="rTable">'; rows.forEach(row => { id = row['id']; - seq = row['seq']; info = row['info']; html += '<div class="rTableRow">'; html += cell('<a href="'+info+'">'+id+'</a>'); - html += cell('<a href="'+seq+'">FASTA</a>'); + html += cell('<a href="'+row['collection']+'">Collection</a>'); + html += cell('<a href="'+row['fasta']+'">FASTA</a>'); html += cell('<a href="/api/ebi/sample-'+id+'.xml">EBI/ENA export XML</a>'); html += '</div>'; }); diff --git a/doc/web/export.org b/doc/web/export.org index aa7d680..d76434e 100644 --- a/doc/web/export.org +++ b/doc/web/export.org @@ -28,5 +28,4 @@ is a query REST API - PubSeq exports its own Uploading data to EBI/ENA with PubSeq is described [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]]. -To export, -first search for an uploaded entry through its identifier: +To export, first search for an uploaded entry through its identifier: diff --git a/test/rest-api.org b/test/rest-api.org index 31fc792..1930d8b 100644 --- a/test/rest-api.org +++ b/test/rest-api.org @@ -10,8 +10,6 @@ #+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" /> - - * PubSeq REST API Here we document the public REST API that comes with PubSeq. The tests @@ -38,7 +36,9 @@ The Python3 version is #+begin_src python :session :exports both import requests -response = requests.get("http://covid19.genenetwork.org/api/version") +baseURL="http://localhost:5000" # for development +# baseURL="http://covid19.genenetwork.org" +response = requests.get(baseURL+"/api/version") response_body = response.json() assert response_body["service"] == "PubSeq", "PubSeq API not found" response_body @@ -47,9 +47,31 @@ response_body #+RESULTS: | service | : | PubSeq | version | : | 0.1 | +** Search for an entry + +When you use the search box on PubSeq it queries the REST end point +for information on the search items. For example + +#+begin_src python :session :exports both +requests.get(baseURL+"/api/search?s=MT326090.1").json() +#+end_src + +#+RESULTS: +| collection | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence | +| collection | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence | + +where collection is the raw uploaded data. The hash value in ~c=~ is +computed on the contents of the Arvados keep [[https://doc.arvados.org/v2.0/user/tutorials/tutorial-keep-mount-gnu-linux.html][collection]] and effectively +acts as a deduplication uuid. + ** Fetch metadata +#+begin_src python :session :exports both +#+end_src + + + ** Fetch EBI XML PubSeq provides an API that is used to export formats that are @@ -57,14 +79,14 @@ suitable for uploading data to EBI/ENA from our [[http://covid19.genenetwork.org documented [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]]. #+begin_src python :session :exports both -requests.get("http://covid19.genenetwork.org/api/ebi/sample-MT32690.1.xml").text +requests.get(baseURL+"/api/ebi/sample-MT326090.1.xml").text #+end_src #+RESULTS: #+begin_example <?xml version="1.0" encoding="UTF-8"?> <SAMPLE_SET> - <SAMPLE alias="MT32690.1" center_name="COVID-19 PubSeq"> + <SAMPLE alias="MT326090.1" center_name="COVID-19 PubSeq"> <TITLE>COVID-19 PubSeq Sample</TITLE> <SAMPLE_NAME> <TAXON_ID>2697049</TAXON_ID> @@ -125,6 +147,7 @@ block with C-c C-c. You may need to set 'org-babel-load-languages '((python . t))) (setq org-babel-python-command "python3") +(setq org-babel-eval-verbose t) #+end_src #+RESULTS: @@ -133,3 +156,5 @@ block with C-c C-c. You may need to set To skip confirmations you may also want to set : (setq org-confirm-babel-evaluate nil) + +To see output of the inpreter open then *Python* buffer. |