From 56b5c444fd10cc569c4c0d7b76d034799ce679f9 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 21 Jul 2020 09:28:43 +0100 Subject: Working on search --- bh20simplewebuploader/api.py | 37 ++++++++++++++++++++++++++++++++++-- bh20simplewebuploader/main.py | 26 ------------------------- bh20simplewebuploader/static/main.js | 4 ++-- doc/web/export.org | 3 +-- test/rest-api.org | 35 +++++++++++++++++++++++++++++----- 5 files changed, 68 insertions(+), 37 deletions(-) diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py index 8bd1a22..29fa2b8 100644 --- a/bh20simplewebuploader/api.py +++ b/bh20simplewebuploader/api.py @@ -1,10 +1,11 @@ # Public API for PubSeq -import sys +import os import requests +import sys from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify -from bh20simplewebuploader.main import app +from bh20simplewebuploader.main import app, baseURL @app.route('/api/version') def version(): @@ -14,3 +15,35 @@ def version(): def ebi_sample(id): page = render_template('ebi-sample.xml',**locals()) return page + +@app.route('/api/search', methods=['GET']) +def search(): + """ + Execute a 'global search' + """ + s = request.args.get('s') + if s == "": + s = "MT326090.1" + query = """ + PREFIX pubseq: + PREFIX sio: + PREFIX edam: + select distinct ?id ?seq ?info + { + ?sample sio:SIO_000115 "%s" . + ?sample sio:SIO_000115 ?id . + ?seq pubseq:sample ?sample . + ?sample edam:data_2091 ?info . + } limit 100 + """ % s + payload = {'query': query, 'format': 'json'} + r = requests.get(baseURL, params=payload) + result = r.json()['results']['bindings'] + # metadata = file.name(seq)+"/metadata.yaml" + print(result) + return jsonify([{ + 'id': x['id']['value'], + 'fasta': x['seq']['value'], + 'collection': os.path.dirname(x['seq']['value']), + 'info': x['info']['value'], + } for x in result]) diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index c306749..62ec5cd 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -694,32 +694,6 @@ def getCountDB(): # print(result, file=sys.stderr) return jsonify({'sequences': int(result[0]["num"]["value"])}) -# Execute a 'global search' -@app.route('/api/search', methods=['GET']) -def search(): - s = request.args.get('s') - query = """ - PREFIX pubseq: - PREFIX sio: - PREFIX edam: - select distinct ?id ?seq ?info - { - ?sample sio:SIO_000115 "%s" . - ?sample sio:SIO_000115 ?id . - ?seq pubseq:sample ?sample . - ?sample edam:data_2091 ?info . - } limit 100 - """ % s - payload = {'query': query, 'format': 'json'} - r = requests.get(baseURL, params=payload) - result = r.json()['results']['bindings'] - print(result,file=sys.stderr); - return jsonify([{ - 'id': x['id']['value'], - 'seq': x['seq']['value'], - 'info': x['info']['value'], - } for x in result]) - @app.route('/api/getAllaccessions', methods=['GET']) def getAllaccessions(): query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ ?value ]}""" diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js index a12311e..c0bc23f 100644 --- a/bh20simplewebuploader/static/main.js +++ b/bh20simplewebuploader/static/main.js @@ -19,11 +19,11 @@ function toDIVTable(rows) { html = '
'; rows.forEach(row => { id = row['id']; - seq = row['seq']; info = row['info']; html += '
'; html += cell(''+id+''); - html += cell('FASTA'); + html += cell('Collection'); + html += cell('FASTA'); html += cell('EBI/ENA export XML'); html += '
'; }); diff --git a/doc/web/export.org b/doc/web/export.org index aa7d680..d76434e 100644 --- a/doc/web/export.org +++ b/doc/web/export.org @@ -28,5 +28,4 @@ is a query REST API - PubSeq exports its own Uploading data to EBI/ENA with PubSeq is described [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]]. -To export, -first search for an uploaded entry through its identifier: +To export, first search for an uploaded entry through its identifier: diff --git a/test/rest-api.org b/test/rest-api.org index 31fc792..1930d8b 100644 --- a/test/rest-api.org +++ b/test/rest-api.org @@ -10,8 +10,6 @@ #+HTML_HEAD: - - * PubSeq REST API Here we document the public REST API that comes with PubSeq. The tests @@ -38,7 +36,9 @@ The Python3 version is #+begin_src python :session :exports both import requests -response = requests.get("http://covid19.genenetwork.org/api/version") +baseURL="http://localhost:5000" # for development +# baseURL="http://covid19.genenetwork.org" +response = requests.get(baseURL+"/api/version") response_body = response.json() assert response_body["service"] == "PubSeq", "PubSeq API not found" response_body @@ -47,9 +47,31 @@ response_body #+RESULTS: | service | : | PubSeq | version | : | 0.1 | +** Search for an entry + +When you use the search box on PubSeq it queries the REST end point +for information on the search items. For example + +#+begin_src python :session :exports both +requests.get(baseURL+"/api/search?s=MT326090.1").json() +#+end_src + +#+RESULTS: +| collection | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence | +| collection | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence | + +where collection is the raw uploaded data. The hash value in ~c=~ is +computed on the contents of the Arvados keep [[https://doc.arvados.org/v2.0/user/tutorials/tutorial-keep-mount-gnu-linux.html][collection]] and effectively +acts as a deduplication uuid. + ** Fetch metadata +#+begin_src python :session :exports both +#+end_src + + + ** Fetch EBI XML PubSeq provides an API that is used to export formats that are @@ -57,14 +79,14 @@ suitable for uploading data to EBI/ENA from our [[http://covid19.genenetwork.org documented [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]]. #+begin_src python :session :exports both -requests.get("http://covid19.genenetwork.org/api/ebi/sample-MT32690.1.xml").text +requests.get(baseURL+"/api/ebi/sample-MT326090.1.xml").text #+end_src #+RESULTS: #+begin_example - + COVID-19 PubSeq Sample 2697049 @@ -125,6 +147,7 @@ block with C-c C-c. You may need to set 'org-babel-load-languages '((python . t))) (setq org-babel-python-command "python3") +(setq org-babel-eval-verbose t) #+end_src #+RESULTS: @@ -133,3 +156,5 @@ block with C-c C-c. You may need to set To skip confirmations you may also want to set : (setq org-confirm-babel-evaluate nil) + +To see output of the inpreter open then *Python* buffer. -- cgit v1.2.3