Add sequencer JSON/XML output

author: Pjotr Prins 2020-07-21 11:05:00 +0100
committer: Pjotr Prins 2020-07-21 11:05:00 +0100
commit: dceac420dffa3aa74ef49a50d9be01e450e9d339 (patch)
tree: 1d9e1d4f02c1b1b086494c6659d2fff06c77547e
parent: 56b5c444fd10cc569c4c0d7b76d034799ce679f9 (diff)
download: bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.tar.gz
bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.tar.lz
bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.zip
3 files changed, 52 insertions, 6 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py
index 29fa2b8..ff82b72 100644
--- a/bh20simplewebuploader/api.py
+++ b/bh20simplewebuploader/api.py
@@ -7,13 +7,51 @@ import sys
 from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
 from bh20simplewebuploader.main import app, baseURL
 
+# Helper functions
+
+def fetch_sample_metadata(id):
+    query = """
+    PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+    PREFIX sio: <http://semanticscience.org/resource/>
+    PREFIX edam: <http://edamontology.org/>
+    PREFIX efo: <http://www.ebi.ac.uk/efo/>
+    select distinct ?id ?seq ?info ?sequencer
+    {
+    ?sample sio:SIO_000115 "%s" .
+    ?sample sio:SIO_000115 ?id .
+    ?seq pubseq:technology ?tech .
+    ?seq pubseq:sample ?sample .
+    ?sample edam:data_2091 ?info .
+    ?tech efo:EFO_0002699 ?sequencer .
+    } limit 5
+    """ % id
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    return r.json()['results']['bindings']
+
+# Main API routes
+
 @app.route('/api/version')
 def version():
     return jsonify({ 'service': 'PubSeq', 'version': 0.10 })
 
+@app.route('/api/sample/<id>.json')
+def sample(id):
+    # metadata = file.name(seq)+"/metadata.yaml"
+    meta = fetch_sample_metadata(id)
+    return jsonify([{
+        'id': x['id']['value'],
+        'fasta': x['seq']['value'],
+        'collection': os.path.dirname(x['seq']['value']),
+        'info': x['info']['value'],
+        'sequencer': x['sequencer']['value'],
+    } for x in meta])
+
 @app.route('/api/ebi/sample-<id>.xml', methods=['GET'])
 def ebi_sample(id):
-    page = render_template('ebi-sample.xml',**locals())
+    meta = fetch_sample_metadata(id)[0]
+    print("HERE",meta,file=sys.stderr)
+    page = render_template('ebi-sample.xml',sampleid=id,sequencer=meta['sequencer']['value'])
     return page
 
 @app.route('/api/search', methods=['GET'])
diff --git a/bh20simplewebuploader/templates/ebi-sample.xml b/bh20simplewebuploader/templates/ebi-sample.xml
index 441e29e..0b6b39e 100644
--- a/bh20simplewebuploader/templates/ebi-sample.xml
+++ b/bh20simplewebuploader/templates/ebi-sample.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <SAMPLE_SET>
-  <SAMPLE alias="{{ id }}" center_name="COVID-19 PubSeq">
+  <SAMPLE alias="{{ sampleid }}" center_name="COVID-19 PubSeq">
     <TITLE>COVID-19 PubSeq Sample</TITLE>
     <SAMPLE_NAME>
       <TAXON_ID>2697049</TAXON_ID>
@@ -14,7 +14,7 @@
       </SAMPLE_ATTRIBUTE>
       <SAMPLE_ATTRIBUTE>
         <TAG>sequencing method</TAG>
-        <VALUE>{{ sequence_method }}</VALUE>
+        <VALUE>{{ sequencer }}</VALUE>
       </SAMPLE_ATTRIBUTE>
       <SAMPLE_ATTRIBUTE>
         <TAG>collection date</TAG>
diff --git a/test/rest-api.org b/test/rest-api.org
index 1930d8b..6dd6616 100644
--- a/test/rest-api.org
+++ b/test/rest-api.org
@@ -53,12 +53,11 @@ When you use the search box on PubSeq it queries the REST end point
 for information on the search items. For example
 
 #+begin_src python :session :exports both
-requests.get(baseURL+"/api/search?s=MT326090.1").json()
+requests.get(baseURL+"/api/search?s=MT533203.1").json()
 #+end_src
 
 #+RESULTS:
-| collection | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=10eaef75e0b875f81aa1f411c75370cf+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
-| collection | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=5a4c815f3e076ad7760a91864c39dd07+126/sequence.fasta | id | : | MT326090.1 | info | : | http://identifiers.org/insdc/MT326090.1#sequence |
+| collection | : | http://collections.lugli.arvadosapi.com/c=0015b0d65dfd2e82bb3cee4436bf2893+126 | fasta | : | http://collections.lugli.arvadosapi.com/c=0015b0d65dfd2e82bb3cee4436bf2893+126/sequence.fasta | id | : | MT533203.1 | info | : | http://identifiers.org/insdc/MT533203.1#sequence |
 
 where collection is the raw uploaded data. The hash value in ~c=~ is
 computed on the contents of the Arvados keep [[https://doc.arvados.org/v2.0/user/tutorials/tutorial-keep-mount-gnu-linux.html][collection]] and effectively
@@ -66,10 +65,19 @@ acts as a deduplication uuid.
 
 ** Fetch metadata
 
+Using above collection link you can fetch the metadata in JSON as it
+was uploaded originally from the SHeX expression, e.g. using
+https://collections.lugli.arvadosapi.com/c=0015b0d65dfd2e82bb3cee4436bf2893+126/
+
+But better to use the more advanced sample metadata fetcher
+because is does a bit more in terms of expansion
 
 #+begin_src python :session :exports both
+requests.get(baseURL+"/api/sample/MT533203.1.json").json()
 #+end_src
 
+#+RESULTS:
+
 
 
 ** Fetch EBI XML
author	Pjotr Prins	2020-07-21 11:05:00 +0100
committer	Pjotr Prins	2020-07-21 11:05:00 +0100
commit	dceac420dffa3aa74ef49a50d9be01e450e9d339 (patch)
tree	1d9e1d4f02c1b1b086494c6659d2fff06c77547e
parent	56b5c444fd10cc569c4c0d7b76d034799ce679f9 (diff)
download	bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.tar.gz bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.tar.lz bh20-seq-resource-dceac420dffa3aa74ef49a50d9be01e450e9d339.zip