Working on search

author: Pjotr Prins 2020-07-21 09:28:43 +0100
committer: Pjotr Prins 2020-07-21 09:28:43 +0100
commit: 56b5c444fd10cc569c4c0d7b76d034799ce679f9 (patch)
tree: 2ca3fdf27884c4dca9178a80f5916d04550209ea
parent: 2e96d0d87abd6357868114b0b59ee66b08985235 (diff)
download: bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.gz
bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.lz
bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.zip
5 files changed, 68 insertions, 37 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py
index 8bd1a22..29fa2b8 100644
--- a/bh20simplewebuploader/api.py
+++ b/bh20simplewebuploader/api.py
@@ -1,10 +1,11 @@
 # Public API for PubSeq
 
-import sys
+import os
 import requests
+import sys
 
 from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
-from bh20simplewebuploader.main import app
+from bh20simplewebuploader.main import app, baseURL
 
 @app.route('/api/version')
 def version():
@@ -14,3 +15,35 @@ def version():
 def ebi_sample(id):
     page = render_template('ebi-sample.xml',**locals())
     return page
+
+@app.route('/api/search', methods=['GET'])
+def search():
+    """
+    Execute a 'global search'
+    """
+    s = request.args.get('s')
+    if s == "":
+        s = "MT326090.1"
+    query = """
+    PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+    PREFIX sio: <http://semanticscience.org/resource/>
+    PREFIX edam: <http://edamontology.org/>
+    select distinct ?id ?seq ?info
+    {
+    ?sample sio:SIO_000115 "%s" .
+    ?sample sio:SIO_000115 ?id .
+    ?seq pubseq:sample ?sample .
+    ?sample edam:data_2091 ?info .
+    } limit 100
+    """ % s
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    # metadata = file.name(seq)+"/metadata.yaml"
+    print(result)
+    return jsonify([{
+        'id': x['id']['value'],
+        'fasta': x['seq']['value'],
+        'collection': os.path.dirname(x['seq']['value']),
+        'info': x['info']['value'],
+    } for x in result])
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index c306749..62ec5cd 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -694,32 +694,6 @@ def getCountDB():
     # print(result, file=sys.stderr)
     return jsonify({'sequences': int(result[0]["num"]["value"])})
 
-# Execute a 'global search'
-@app.route('/api/search', methods=['GET'])
-def search():
-    s = request.args.get('s')
-    query = """
-    PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-    PREFIX sio: <http://semanticscience.org/resource/>
-    PREFIX edam: <http://edamontology.org/>
-    select distinct ?id ?seq ?info
-    {
-    ?sample sio:SIO_000115 "%s" .
-    ?sample sio:SIO_000115 ?id .
-    ?seq pubseq:sample ?sample .
-    ?sample edam:data_2091 ?info .
-    } limit 100
-    """ % s
-    payload = {'query': query, 'format': 'json'}
-    r = requests.get(baseURL, params=payload)
-    result = r.json()['results']['bindings']
-    print(result,file=sys.stderr);
-    return jsonify([{
-        'id': x['id']['value'],
-        'seq': x['seq']['value'],
-        'info': x['info']['value'],
-    } for x in result])
-
 @app.route('/api/getAllaccessions', methods=['GET'])
 def getAllaccessions():
     query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}"""
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
index a12311e..c0bc23f 100644
--- a/bh20simplewebuploader/static/main.js
+++ b/bh20simplewebuploader/static/main.js
@@ -19,11 +19,11 @@ function toDIVTable(rows) {
         html = '<div class="rTable">';
         rows.forEach(row => {
             id = row['id'];
-            seq = row['seq'];
             info = row['info'];
             html += '<div class="rTableRow">';
             html += cell('<a href="'+info+'">'+id+'</a>');
-            html += cell('<a href="'+seq+'">FASTA</a>');
+            html += cell('<a href="'+row['collection']+'">Collection</a>');
+            html += cell('<a href="'+row['fasta']+'">FASTA</a>');
             html += cell('<a href="/api/ebi/sample-'+id+'.xml">EBI/ENA export XML</a>');
             html += '</div>';
         });
diff --git a/doc/web/export.org b/doc/web/export.org
index aa7d680..d76434e 100644
--- a/doc/web/export.org
+++ b/doc/web/export.org
@@ -28,5 +28,4 @@ is a query REST API - PubSeq exports its own
 
 Uploading data to EBI/ENA with PubSeq is described [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]].
 
-To export,
-first search for an uploaded entry through its identifier:
+To export, first search for an uploaded entry through its identifier:
diff --git a/test/rest-api.org b/test/rest-api.org
index 31fc792..1930d8b 100644
--- a/test/rest-api.org
+++ b/test/rest-api.org
@@ -10,8 +10,6 @@
 
 #+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
 
-
-
 * PubSeq REST API
 
 Here we document the public REST API that comes with PubSeq. The tests
@@ -38,7 +36,9 @@ The Python3 version is
 
 #+begin_src python :session :exports both
 import requests
-response = requests.get("http://covid19.genenetwork.org/api/version")
+baseURL="http://localhost:5000" # for development
+# baseURL="http://covid19.genenetwork.org"
+response = requests.get(baseURL+"/api/version")
 response_body = response.json()
 assert response_body["service"] == "PubSeq", "PubSeq API not found"
 response_body
@@ -47,9 +47,31 @@ response_body
 #+RESULTS:
 | service | : | PubSeq | version | : | 0.1 |
 
+** Search for an entry
+
+When you use the search box on PubSeq it queries the REST end point
+for information on the search items. For example
+
+#+begin_src python :session :exports both
+requests.get(baseURL+"/api/search?s=MT326090.1").json()
+#+end_src
+
+#+RESULTS:
+| collection | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
+| collection | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
+
+where collection is the raw uploaded data. The hash value in ~c=~ is
+computed on the contents of the Arvados keep [[https://doc.arvados.org/v2.0/user/tutorials/tutorial-keep-mount-gnu-linux.html][collection]] and effectively
+acts as a deduplication uuid.
+
 ** Fetch metadata
 
 
+#+begin_src python :session :exports both
+#+end_src
+
+
+
 ** Fetch EBI XML
 
 PubSeq provides an API that is used to export formats that are
@@ -57,14 +79,14 @@ suitable for uploading data to EBI/ENA from our [[http://covid19.genenetwork.org
 documented [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part6][here]].
 
 #+begin_src python :session :exports both
-requests.get("http://covid19.genenetwork.org/api/ebi/sample-MT32690.1.xml").text
+requests.get(baseURL+"/api/ebi/sample-MT326090.1.xml").text
 #+end_src
 
 #+RESULTS:
 #+begin_example
 <?xml version="1.0" encoding="UTF-8"?>
 <SAMPLE_SET>
-  <SAMPLE alias="MT32690.1" center_name="COVID-19 PubSeq">
+  <SAMPLE alias="MT326090.1" center_name="COVID-19 PubSeq">
     <TITLE>COVID-19 PubSeq Sample</TITLE>
     <SAMPLE_NAME>
       <TAXON_ID>2697049</TAXON_ID>
@@ -125,6 +147,7 @@ block with C-c C-c. You may need to set
  'org-babel-load-languages
  '((python . t)))
 (setq org-babel-python-command "python3")
+(setq org-babel-eval-verbose t)
 #+end_src
 
 #+RESULTS:
@@ -133,3 +156,5 @@ block with C-c C-c. You may need to set
 To skip confirmations you may also want to set
 
 : (setq org-confirm-babel-evaluate nil)
+
+To see output of the inpreter open then *Python* buffer.
author	Pjotr Prins	2020-07-21 09:28:43 +0100
committer	Pjotr Prins	2020-07-21 09:28:43 +0100
commit	56b5c444fd10cc569c4c0d7b76d034799ce679f9 (patch)
tree	2ca3fdf27884c4dca9178a80f5916d04550209ea
parent	2e96d0d87abd6357868114b0b59ee66b08985235 (diff)
download	bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.gz bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.tar.lz bh20-seq-resource-56b5c444fd10cc569c4c0d7b76d034799ce679f9.zip