aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2020-07-21 09:28:43 +0100
committerPjotr Prins2020-07-21 09:28:43 +0100
commit56b5c444fd10cc569c4c0d7b76d034799ce679f9 (patch)
tree2ca3fdf27884c4dca9178a80f5916d04550209ea
parent2e96d0d87abd6357868114b0b59ee66b08985235 (diff)
downloadbh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.gz
bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.lz
bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.zip
Working on search
-rw-r--r--bh20simplewebuploader/api.py37
-rw-r--r--bh20simplewebuploader/main.py26
-rw-r--r--bh20simplewebuploader/static/main.js4
-rw-r--r--doc/web/export.org3
-rw-r--r--test/rest-api.org35
5 files changed, 68 insertions, 37 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py
index 8bd1a22..29fa2b8 100644
--- a/bh20simplewebuploader/api.py
+++ b/bh20simplewebuploader/api.py
@@ -1,10 +1,11 @@
# Public API for PubSeq
-import sys
+import os
import requests
+import sys
from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
-from bh20simplewebuploader.main import app
+from bh20simplewebuploader.main import app, baseURL
@app.route('/api/version')
def version():
@@ -14,3 +15,35 @@ def version():
def ebi_sample(id):
page = render_template('ebi-sample.xml',**locals())
return page
+
+@app.route('/api/search', methods=['GET'])
+def search():
+ """
+ Execute a 'global search'
+ """
+ s = request.args.get('s')
+ if s == "":
+ s = "MT326090.1"
+ query = """
+ PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+ PREFIX sio: <http://semanticscience.org/resource/>
+ PREFIX edam: <http://edamontology.org/>
+ select distinct ?id ?seq ?info
+ {
+ ?sample sio:SIO_000115 "%s" .
+ ?sample sio:SIO_000115 ?id .
+ ?seq pubseq:sample ?sample .
+ ?sample edam:data_2091 ?info .
+ } limit 100
+ """ % s
+ payload = {'query': query, 'format': 'json'}
+ r = requests.get(baseURL, params=payload)
+ result = r.json()['results']['bindings']
+ # metadata = file.name(seq)+"/metadata.yaml"
+ print(result)
+ return jsonify([{
+ 'id': x['id']['value'],
+ 'fasta': x['seq']['value'],
+ 'collection': os.path.dirname(x['seq']['value']),
+ 'info': x['info']['value'],
+ } for x in result])
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index c306749..62ec5cd 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -694,32 +694,6 @@ def getCountDB():
# print(result, file=sys.stderr)
return jsonify({'sequences': int(result[0]["num"]["value"])})
-# Execute a 'global search'
-@app.route('/api/search', methods=['GET'])
-def search():
- s = request.args.get('s')
- query = """
- PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
- PREFIX sio: <http://semanticscience.org/resource/>
- PREFIX edam: <http://edamontology.org/>
- select distinct ?id ?seq ?info
- {
- ?sample sio:SIO_000115 "%s" .
- ?sample sio:SIO_000115 ?id .
- ?seq pubseq:sample ?sample .
- ?sample edam:data_2091 ?info .
- } limit 100
- """ % s
- payload = {'query': query, 'format': 'json'}
- r = requests.get(baseURL, params=payload)
- result = r.json()['results']['bindings']
- print(result,file=sys.stderr);
- return jsonify([{
- 'id': x['id']['value'],
- 'seq': x['seq']['value'],
- 'info': x['info']['value'],
- } for x in result])
-
@app.route('/api/getAllaccessions', methods=['GET'])
def getAllaccessions():
query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}"""
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
index a12311e..c0bc23f 100644
--- a/bh20simplewebuploader/static/main.js
+++ b/bh20simplewebuploader/static/main.js
@@ -19,11 +19,11 @@ function toDIVTable(rows) {
html = '<div class="rTable">';
rows.forEach(row => {
id = row['id'];
- seq = row['seq'];
info = row['info'];
html += '<div class="rTableRow">';
html += cell('<a href="'+info+'">'+id+'</a>');
- html += cell('<a href="'+seq+'">FASTA</a>');
+ html += cell('<a href="'+row['collection']+'">Collection</a>');
+ html += cell('<a href="'+row['fasta']+'">FASTA</a>');
html += cell('<a href="/api/ebi/sample-'+id+'.xml">EBI/ENA export XML</a>');
html += '</div>';
});
diff --git a/doc/web/export.org b/doc/web/export.org
index aa7d680..d76434e 100644
--- a/doc/web/export.org
+++ b/doc/web/export.org
@@ -28,5 +28,4 @@ is a query REST API - PubSeq exports its own
Uploading data to EBI/ENA with PubSeq is described [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]].
-To export,
-first search for an uploaded entry through its identifier:
+To export, first search for an uploaded entry through its identifier:
diff --git a/test/rest-api.org b/test/rest-api.org
index 31fc792..1930d8b 100644
--- a/test/rest-api.org
+++ b/test/rest-api.org
@@ -10,8 +10,6 @@
#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
-
-
* PubSeq REST API
Here we document the public REST API that comes with PubSeq. The tests
@@ -38,7 +36,9 @@ The Python3 version is
#+begin_src python :session :exports both
import requests
-response = requests.get("http://covid19.genenetwork.org/api/version")
+baseURL="http://localhost:5000" # for development
+# baseURL="http://covid19.genenetwork.org"
+response = requests.get(baseURL+"/api/version")
response_body = response.json()
assert response_body["service"] == "PubSeq", "PubSeq API not found"
response_body
@@ -47,9 +47,31 @@ response_body
#+RESULTS:
| service | : | PubSeq | version | : | 0.1 |
+** Search for an entry
+
+When you use the search box on PubSeq it queries the REST end point
+for information on the search items. For example
+
+#+begin_src python :session :exports both
+requests.get(baseURL+"/api/search?s=MT326090.1").json()
+#+end_src
+
+#+RESULTS:
+| collection | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
+| collection | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
+
+where collection is the raw uploaded data. The hash value in ~c=~ is
+computed on the contents of the Arvados keep [[https://doc.arvados.org/v2.0/user/tutorials/tutorial-keep-mount-gnu-linux.html][collection]] and effectively
+acts as a deduplication uuid.
+
** Fetch metadata
+#+begin_src python :session :exports both
+#+end_src
+
+
+
** Fetch EBI XML
PubSeq provides an API that is used to export formats that are
@@ -57,14 +79,14 @@ suitable for uploading data to EBI/ENA from our [[http://covid19.genenetwork.org
documented [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]].
#+begin_src python :session :exports both
-requests.get("http://covid19.genenetwork.org/api/ebi/sample-MT32690.1.xml").text
+requests.get(baseURL+"/api/ebi/sample-MT326090.1.xml").text
#+end_src
#+RESULTS:
#+begin_example
<?xml version="1.0" encoding="UTF-8"?>
<SAMPLE_SET>
- <SAMPLE alias="MT32690.1" center_name="COVID-19 PubSeq">
+ <SAMPLE alias="MT326090.1" center_name="COVID-19 PubSeq">
<TITLE>COVID-19 PubSeq Sample</TITLE>
<SAMPLE_NAME>
<TAXON_ID>2697049</TAXON_ID>
@@ -125,6 +147,7 @@ block with C-c C-c. You may need to set
'org-babel-load-languages
'((python . t)))
(setq org-babel-python-command "python3")
+(setq org-babel-eval-verbose t)
#+end_src
#+RESULTS:
@@ -133,3 +156,5 @@ block with C-c C-c. You may need to set
To skip confirmations you may also want to set
: (setq org-confirm-babel-evaluate nil)
+
+To see output of the inpreter open then *Python* buffer.