diff options
author | Pjotr Prins | 2021-01-05 10:42:16 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-05 10:42:16 +0000 |
commit | b26133cdaff3bba64c4fc294510b16d57030d071 (patch) | |
tree | 46816a066e528e7ba824817ae830b42692b7438a | |
parent | 9d75ce088e6388bf23ae077fd06b2a3f51be1bda (diff) | |
download | bh20-seq-resource-b26133cdaff3bba64c4fc294510b16d57030d071.tar.gz bh20-seq-resource-b26133cdaff3bba64c4fc294510b16d57030d071.tar.lz bh20-seq-resource-b26133cdaff3bba64c4fc294510b16d57030d071.zip |
api: more cleanup
-rw-r--r-- | bh20simplewebuploader/api.py | 122 | ||||
-rw-r--r-- | bh20simplewebuploader/main.py | 3 |
2 files changed, 61 insertions, 64 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py index 11c74f2..761ad03 100644 --- a/bh20simplewebuploader/api.py +++ b/bh20simplewebuploader/api.py @@ -3,6 +3,7 @@ import os import requests import sys +import types from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify from bh20simplewebuploader.main import app, sparqlURL @@ -12,14 +13,16 @@ ARVADOS="https://collections.lugli.arvadosapi.com/c=" # Helper functions -def fetch_sample_metadata(id): - query = """ +def fetch_sample(id, query=None): + default_query = """ + PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> PREFIX sio: <http://semanticscience.org/resource/> PREFIX edam: <http://edamontology.org/> PREFIX efo: <http://www.ebi.ac.uk/efo/> PREFIX evs: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#> PREFIX obo: <http://purl.obolibrary.org/obo/> + select distinct ?id ?seq ?date ?info ?specimen ?sequencer ?mapper { ?sample sio:SIO_000115 "%s" ; @@ -27,15 +30,49 @@ def fetch_sample_metadata(id): evs:C25164 ?date . ?seq pubseq:technology ?tech ; pubseq:sample ?sample . - ?tech efo:EFO_0002699 ?mapper ; - obo:OBI_0600047 ?sequencer . + optional { ?tech efo:EFO_0002699 ?mapper } . + optional { ?tech obo:OBI_0600047 ?sequencer . } optional { ?sample edam:data_2091 ?info } . optional { ?sample obo:OBI_0001479 ?specimen } . } limit 5 + """ % id + if not query: query = default_query + print(query) payload = {'query': query, 'format': 'json'} r = requests.get(sparqlURL, params=payload) - return r.json()['results']['bindings'] + res = r.json() + print(res) + return res['results']['bindings'],res['head']['vars'] + +def fetch_one_sample(id, query=None): + """Get the top sample and return a SimpleNamespace""" + + result,varlist = fetch_sample(id,query) + h = {} + row = result[0] + for key in varlist: + if key in row: + h[key] = row[key]['value'] + print(h) + h['arv_id'] = os.path.basename(h['seq']) + return types.SimpleNamespace(**h) + +def fetch_one_record(id): + m = fetch_one_sample(id) + arv_id = m.arv_id + rec = { "id": id, + 'arv_id': arv_id, + "permalink": PUBSEQ+'/resource/'+id, + "collection": m.seq, + 'collection_date': m.date, + 'fasta': ARVADOS+arv_id+'/sequence.fasta', + 'metadata': ARVADOS+arv_id+'/metadata.yaml', + } + h = m.__dict__ # for optional items + if 'mapper' in h: rec['mapper'] = m.mapper + if 'sequencer' in h: rec['sequencer']= m.sequencer + return rec # Main API routes @@ -52,74 +89,33 @@ notably: permalink, original metadata record and the fasta data. curl http://localhost:5067/api/sample/MT533203.1.json -[ - { - "collection": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks", - "date": "2020-04-27", - "fasta": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/sequence.fasta", - "id": "MT533203.1", - "info": "http://identifiers.org/insdc/MT533203.1#sequence", - "mapper": "minimap v. 2.17", - "metadata": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml", - "permalink": "http://covid19.genenetwork.org/resource/MT533203.1", - "sequencer": "http://www.ebi.ac.uk/efo/EFO_0008632", - "specimen": "http://purl.obolibrary.org/obo/NCIT_C155831" - } -] - +{ + "id": "MT533203.1", + "permalink": "http://covid19.genenetwork.org/resource/MT533203.1", + "collection": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks", + "collection_date": "2020-04-27", + "fasta": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/sequence.fasta", + "metadata": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml", + "mapper": "minimap v. 2.17", + "sequencer": "http://www.ebi.ac.uk/efo/EFO_0008632" +} """ - # metadata = file.name(seq)+"/metadata.yaml" - meta = fetch_sample_metadata(id) - print(meta) - # http://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml - return jsonify([{ - 'id': x['id']['value'], - 'collection': x['seq']['value'], - 'permalink': PUBSEQ+'/resource/'+x['id']['value'], - 'fasta': ARVADOS+os.path.basename(x['seq']['value'])+'/sequence.fasta', - 'metadata': ARVADOS+os.path.basename(x['seq']['value'])+'/metadata.yaml', - 'date': x['date']['value'], - 'info': x['info']['value'], - 'specimen': x['specimen']['value'], - 'sequencer': x['sequencer']['value'], - 'mapper': x['mapper']['value'], - } for x in meta]) + + return jsonify([fetch_one_record(id)]) @app.route('/api/ebi/sample-<id>.xml', methods=['GET']) def ebi_sample(id): - meta = fetch_sample_metadata(id)[0] + meta,varlist = fetch_sample(id)[0] page = render_template('ebi-sample.xml',sampleid=id,sequencer=meta['sequencer']['value'],date=meta['date']['value'],specimen=meta['specimen']['value']) return page @app.route('/api/search', methods=['GET']) def search(): """ - Execute a 'global search' + Execute a 'global search'. Currently just duplicates fetch one + sample. Should be more flexible FIXME. """ s = request.args.get('s') - if s == "": - s = "MT326090.1" - query = """ - PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> - PREFIX sio: <http://semanticscience.org/resource/> - PREFIX edam: <http://edamontology.org/> - select distinct ?id ?seq ?info - { - ?sample sio:SIO_000115 "%s" . - ?sample sio:SIO_000115 ?id . - ?seq pubseq:sample ?sample . - ?sample edam:data_2091 ?info . - } limit 100 - """ % s - payload = {'query': query, 'format': 'json'} - r = requests.get(sparqlURL, params=payload) - result = r.json()['results']['bindings'] - # metadata = file.name(seq)+"/metadata.yaml" - print(result) - return jsonify([{ - 'id': x['id']['value'], - 'fasta': x['seq']['value'], - 'collection': os.path.dirname(x['seq']['value']), - 'info': x['info']['value'], - } for x in result]) + if s == "": s = "MT326090.1" + return jsonify([fetch_one_record(s)]) diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index b620946..b4b72d2 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -34,6 +34,7 @@ if not os.path.isfile('bh20sequploader/main.py'): print("WARNING: run FLASK from the root of the source repository!", file=sys.stderr) app = Flask(__name__, static_url_path='/static', static_folder='static') +app.config['JSON_SORT_KEYS'] = False # Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes. # We will enforce the limit ourselves and set a higher safety limit here. @@ -252,7 +253,7 @@ FORM_ITEMS = load_schema_generate_form() def get_feed_items(name, start=0, stop=9): redis_client = redis.Redis(host=os.environ.get('HOST', 'localhost'), port=os.environ.get('PORT', 6379), - db=os.environ.get('REDIS_DB', 0)) + db=os.environ.get('REDIS_DB', 0)) feed_items = [] try: for el in redis_client.zrevrange(name, start, stop): |