Merge branch 'master' into yamlfa2ttl

author: AndreaGuarracino 2021-01-07 23:50:01 +0100
committer: AndreaGuarracino 2021-01-07 23:50:01 +0100
commit: 4d841d279b2bf73da2ba815d53863c7f2861c956 (patch)
tree: 83b9ad136dabacbf7ed54e19b2db6df348bef904 /bh20simplewebuploader
parent: 141e619929cee17018417d71111063015e73c366 (diff)
parent: c080c3cffedcc0cc99496b5e70fcfdf998978f16 (diff)
download: bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.tar.gz
bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.tar.lz
bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.zip
4 files changed, 161 insertions, 53 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py
index b1b505f..761ad03 100644
--- a/bh20simplewebuploader/api.py
+++ b/bh20simplewebuploader/api.py
@@ -3,20 +3,26 @@
 import os
 import requests
 import sys
+import types
 
 from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
 from bh20simplewebuploader.main import app, sparqlURL
 
+PUBSEQ="http://covid19.genenetwork.org"
+ARVADOS="https://collections.lugli.arvadosapi.com/c="
+
 # Helper functions
 
-def fetch_sample_metadata(id):
-    query = """
+def fetch_sample(id, query=None):
+    default_query = """
+
     PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
     PREFIX sio: <http://semanticscience.org/resource/>
     PREFIX edam: <http://edamontology.org/>
     PREFIX efo: <http://www.ebi.ac.uk/efo/>
     PREFIX evs: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
     PREFIX obo: <http://purl.obolibrary.org/obo/>
+
     select distinct ?id ?seq ?date ?info ?specimen ?sequencer ?mapper
     {
       ?sample sio:SIO_000115 "%s" ;
@@ -24,15 +30,49 @@ def fetch_sample_metadata(id):
               evs:C25164 ?date .
       ?seq    pubseq:technology ?tech ;
               pubseq:sample ?sample .
-      ?tech   efo:EFO_0002699 ?mapper ;
-              obo:OBI_0600047 ?sequencer .
+      optional { ?tech   efo:EFO_0002699 ?mapper } .
+      optional { ?tech   obo:OBI_0600047 ?sequencer . }
       optional { ?sample edam:data_2091 ?info } .
       optional { ?sample obo:OBI_0001479 ?specimen } .
     } limit 5
+
     """ % id
+    if not query: query = default_query
+    print(query)
     payload = {'query': query, 'format': 'json'}
     r = requests.get(sparqlURL, params=payload)
-    return r.json()['results']['bindings']
+    res = r.json()
+    print(res)
+    return res['results']['bindings'],res['head']['vars']
+
+def fetch_one_sample(id, query=None):
+    """Get the top sample and return a SimpleNamespace"""
+
+    result,varlist = fetch_sample(id,query)
+    h = {}
+    row = result[0]
+    for key in varlist:
+        if key in row:
+            h[key] = row[key]['value']
+    print(h)
+    h['arv_id'] = os.path.basename(h['seq'])
+    return types.SimpleNamespace(**h)
+
+def fetch_one_record(id):
+    m = fetch_one_sample(id)
+    arv_id = m.arv_id
+    rec = { "id": id,
+            'arv_id': arv_id,
+            "permalink": PUBSEQ+'/resource/'+id,
+            "collection": m.seq,
+            'collection_date': m.date,
+            'fasta': ARVADOS+arv_id+'/sequence.fasta',
+            'metadata': ARVADOS+arv_id+'/metadata.yaml',
+    }
+    h = m.__dict__ # for optional items
+    if 'mapper' in h: rec['mapper'] = m.mapper
+    if 'sequencer' in h: rec['sequencer']= m.sequencer
+    return rec
 
 # Main API routes
 
@@ -42,54 +82,40 @@ def version():
 
 @app.route('/api/sample/<id>.json')
 def sample(id):
-    # metadata = file.name(seq)+"/metadata.yaml"
-    meta = fetch_sample_metadata(id)
-    print(meta)
-    return jsonify([{
-        'id': x['id']['value'],
-        'fasta': x['seq']['value'],
-        'collection': os.path.dirname(x['seq']['value']),
-        'date': x['date']['value'],
-        'info': x['info']['value'],
-        'specimen': x['specimen']['value'],
-        'sequencer': x['sequencer']['value'],
-        'mapper': x['mapper']['value'],
-    } for x in meta])
+    """
+
+API sample should return a record pointing to other resources,
+notably: permalink, original metadata record and the fasta
+data.
+
+curl http://localhost:5067/api/sample/MT533203.1.json
+{
+  "id": "MT533203.1",
+  "permalink": "http://covid19.genenetwork.org/resource/MT533203.1",
+  "collection": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks",
+  "collection_date": "2020-04-27",
+  "fasta": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/sequence.fasta",
+  "metadata": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml",
+  "mapper": "minimap v. 2.17",
+  "sequencer": "http://www.ebi.ac.uk/efo/EFO_0008632"
+}
+
+"""
+
+    return jsonify([fetch_one_record(id)])
 
 @app.route('/api/ebi/sample-<id>.xml', methods=['GET'])
 def ebi_sample(id):
-    meta = fetch_sample_metadata(id)[0]
+    meta,varlist = fetch_sample(id)[0]
     page = render_template('ebi-sample.xml',sampleid=id,sequencer=meta['sequencer']['value'],date=meta['date']['value'],specimen=meta['specimen']['value'])
     return page
 
 @app.route('/api/search', methods=['GET'])
 def search():
     """
-    Execute a 'global search'
+    Execute a 'global search'. Currently just duplicates fetch one
+    sample. Should be more flexible FIXME.
     """
     s = request.args.get('s')
-    if s == "":
-        s = "MT326090.1"
-    query = """
-    PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-    PREFIX sio: <http://semanticscience.org/resource/>
-    PREFIX edam: <http://edamontology.org/>
-    select distinct ?id ?seq ?info
-    {
-    ?sample sio:SIO_000115 "%s" .
-    ?sample sio:SIO_000115 ?id .
-    ?seq pubseq:sample ?sample .
-    ?sample edam:data_2091 ?info .
-    } limit 100
-    """ % s
-    payload = {'query': query, 'format': 'json'}
-    r = requests.get(sparqlURL, params=payload)
-    result = r.json()['results']['bindings']
-    # metadata = file.name(seq)+"/metadata.yaml"
-    print(result)
-    return jsonify([{
-        'id': x['id']['value'],
-        'fasta': x['seq']['value'],
-        'collection': os.path.dirname(x['seq']['value']),
-        'info': x['info']['value'],
-    } for x in result])
+    if s == "": s = "MT326090.1"
+    return jsonify([fetch_one_record(s)])
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index b620946..504f03c 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -34,6 +34,7 @@ if not os.path.isfile('bh20sequploader/main.py'):
     print("WARNING: run FLASK from the root of the source repository!", file=sys.stderr)
 
 app = Flask(__name__, static_url_path='/static', static_folder='static')
+app.config['JSON_SORT_KEYS'] = False
 
 # Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes.
 # We will enforce the limit ourselves and set a higher safety limit here.
@@ -252,7 +253,7 @@ FORM_ITEMS = load_schema_generate_form()
 def get_feed_items(name, start=0, stop=9):
     redis_client = redis.Redis(host=os.environ.get('HOST', 'localhost'),
                                port=os.environ.get('PORT', 6379),
-                               db=os.environ.get('REDIS_DB', 0))    
+                               db=os.environ.get('REDIS_DB', 0))
     feed_items = []
     try:
         for el in redis_client.zrevrange(name, start, stop):
@@ -272,12 +273,23 @@ def send_home():
     """
     Send the front page.
     """
+    (tweets,
+     commits,
+     pubmed_articles,
+     arxiv_articles) = [get_feed_items(x) for x in ["bh20-tweet-score:",
+                                                    "bh20-commit-score:",
+                                                    "bh20-pubmed-score:",
+                                                    "bh20-arxiv-score:"]]
     return render_template(
         'home.html', menu='HOME',
-        tweets=get_feed_items("bh20-tweet-score:"),
-        commits=get_feed_items("bh20-commit-score:"),
-        pubmed_articles=get_feed_items("bh20-pubmed-score:"),
-        arxiv_articles=get_feed_items("bh20-arxiv-score:"),
+        all_items=list(itertools.chain(tweets,
+                                       commits,
+                                       pubmed_articles,
+                                       arxiv_articles)),
+        tweets=tweets,
+        commits=commits,
+        pubmed_articles=pubmed_articles,
+        arxiv_articles=arxiv_articles,
         load_map=True)
 
 
@@ -750,8 +762,8 @@ union
     # http://covid19.genenetwork.org/resource/lugli-4zz18-gx0ifousk9yu0ql
     m = re.match(r"http://collections.lugli.arvadosapi.com/c=([^/]*)/sequence.fasta|http://covid19.genenetwork.org/resource/(.*)", sequenceuri)
     collection = m.group(1) or m.group(2)
-    fastauri = f"http://collections.lugli.arvadosapi.com/c={collection}/sequence.fasta"
-    metauri = f"http://collections.lugli.arvadosapi.com/c={collection}/metadata.yaml"
+    fastauri = f"https://collections.lugli.arvadosapi.com/c={collection}/sequence.fasta"
+    metauri = f"https://collections.lugli.arvadosapi.com/c={collection}/metadata.yaml"
     locationuri=sample['geo']['value']
     location=sample['geoname']['value']
     date=sample['date']['value']
diff --git a/bh20simplewebuploader/static/main.css b/bh20simplewebuploader/static/main.css
index fbc721e..e2f0c83 100644
--- a/bh20simplewebuploader/static/main.css
+++ b/bh20simplewebuploader/static/main.css
@@ -567,6 +567,7 @@ input[name="feed-tabs"] ~ .tab {
     display: none;
 }
 
+#tab-all-items:checked ~ .tab.content-all-items,
 #tab-pubmed-articles:checked ~ .tab.content-pubmed-articles,
 #tab-arxiv-articles:checked ~ .tab.content-arxiv-articles,
 #tab-tweets:checked ~ .tab.content-tweets,
diff --git a/bh20simplewebuploader/templates/home.html b/bh20simplewebuploader/templates/home.html
index a880f81..23f48bf 100644
--- a/bh20simplewebuploader/templates/home.html
+++ b/bh20simplewebuploader/templates/home.html
@@ -29,7 +29,9 @@
                       </div>
 
                       <div id="feed">
-                          <input name="feed-tabs" type="radio" id="tab-arxiv-articles" checked/>
+                          <input name="feed-tabs" type="radio" id="tab-all-items" checked/>
+                          <label for="tab-all-items">All Items</label>
+                          <input name="feed-tabs" type="radio" id="tab-arxiv-articles"/>
                           <label for="tab-arxiv-articles">Arxiv</label>
                           <input name="feed-tabs" type="radio" id="tab-pubmed-articles"/>
                           <label for="tab-pubmed-articles">Pubmed</label>
@@ -37,6 +39,73 @@
                           <label for="tab-tweets">Tweets</label>
                           <input name="feed-tabs" type="radio" id="tab-commits"/>
                           <label for="tab-commits">Commits</label>
+                          <ul class="tab content-all-items">
+                              <!-- Begin News -->
+                                  {% if all_items %}
+                                  {% for item in all_items|sort(reverse=true, attribute="score")%}
+                                  <li>
+                                      {% if item['authors'] %}
+                                      <!-- Arxiv article -->
+                                      <p>
+                                          <b>[arxiv]</b>
+                                          <a href="{{ item['url'] }}" target="_blank">
+                                              {{item['title']}}
+                                          </a>
+                                          <br/>
+                                          <b>Authors:</b> {{ item['authors'] }}
+                                          <br/>
+                                          <b>Abstract:</b> {{ item['abstract']}}...
+                                          <br/>
+                                          <b>Submitted:</b> {{ item['submission']}}
+                                      </p>
+
+                                      {% elif item['full-authors'] %}
+                                      <!-- Pubmed Article -->
+                                      <p><b>[Pubmed]:</b>
+                                          <a href="https://pubmed.ncbi.nlm.nih.gov/{{ item['docsum-pmid'] }}" target="_blank"><b>Summary:</b>
+                                              {{ item['summary'] }}
+                                          </a> <br/>
+                                          <b>Full Authors:</b> {{ item['full-authors'] }} <br/>
+                                          <b>Short Authors:</b> {{ item['short-authors'] }} <br/>
+                                          <b>Citation:</b> {{ item['citation'] }} <br/>
+                                          <b>Short Journal Citation:</b> {{ item['short-journal-citation'] }} <br/>
+                                      </p>
+
+                                      {% elif item['tweet'] %}
+                                      <!-- Tweets -->
+                                      <p>
+                                          <b>[Tweet]:</b>
+                                          {{ item['tweet']|urlize(40, target="_blank")}}
+                                          <small>
+                                              <a href="{{ item['url'] }}" target="_blank">source</a></small>
+                                          <br/>
+                                          by {{ item['author'] }}
+                                          <br/>
+                                          <small>{{ item['timeposted'] }}</small>
+                                      </p>
+
+                                      {% elif item['repository-url'] %}
+                                      <!-- Commits -->
+                                      <p>
+                                          <b>[Commit]:</b>
+                                          <a href="{{ item.url }}" target="_blank">
+                                              {{ item.hash.split(":")[-1][:7] }}: {{ item.content }}
+                                          </a>
+                                          <br/>
+                                          <small>
+                                              <a href="{{ item['repository-url'] }}" target="_blank"> {{ item.author }}/{{ item.repository }}</a>
+                                              on {{ item.timeposted }}
+                                          </small>
+                                      </p>
+                                      {% endif %}
+                                  </li>
+                                  {%endfor%}
+
+                                  {% else %}
+                                  There are no items to display :(
+                                  {% endif %}
+                              <!-- End News -->
+                          </ul>
                           <ul class="tab content-arxiv-articles">
                               {% if arxiv_articles %}
                               {% for article in arxiv_articles|sort(reverse=true, attribute="score")%}
author	AndreaGuarracino	2021-01-07 23:50:01 +0100
committer	AndreaGuarracino	2021-01-07 23:50:01 +0100
commit	4d841d279b2bf73da2ba815d53863c7f2861c956 (patch)
tree	83b9ad136dabacbf7ed54e19b2db6df348bef904 /bh20simplewebuploader
parent	141e619929cee17018417d71111063015e73c366 (diff)
parent	c080c3cffedcc0cc99496b5e70fcfdf998978f16 (diff)
download	bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.tar.gz bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.tar.lz bh20-seq-resource-4d841d279b2bf73da2ba815d53863c7f2861c956.zip