about summary refs log tree commit diff
path: root/bh20simplewebuploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-07-08 17:14:46 -0400
committerGitHub2020-07-08 17:14:46 -0400
commit6e0f9f18167377bac073d7715b89e7ddbf1fe72d (patch)
tree1b72a737b50e60346aefaf009ac2488d45c8abe0 /bh20simplewebuploader
parent6fa25708b46a590be82a6b84266c0a3f25a0d890 (diff)
parente821857e7a9403739f321feb7418d33d6bd8b2c7 (diff)
downloadbh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.gz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.lz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.zip
Merge pull request #92 from arvados/upload-download-status
Split upload tab.  Add upload status tab.  Also a bunch of QC and uploader improvements.
Diffstat (limited to 'bh20simplewebuploader')
-rw-r--r--bh20simplewebuploader/main.py119
-rw-r--r--bh20simplewebuploader/static/main.css26
-rw-r--r--bh20simplewebuploader/templates/footer.html14
-rw-r--r--bh20simplewebuploader/templates/form.html49
-rw-r--r--bh20simplewebuploader/templates/home.html50
-rw-r--r--bh20simplewebuploader/templates/menu.html4
-rw-r--r--bh20simplewebuploader/templates/resource.html27
-rw-r--r--bh20simplewebuploader/templates/status.html17
8 files changed, 233 insertions, 73 deletions
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 3100dfd..9132453 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -13,12 +13,20 @@ import pkg_resources
 from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
 import os.path
 import requests
+import io
+import arvados
+from markupsafe import Markup
+
+ARVADOS_API = 'lugli.arvadosapi.com'
+ANONYMOUS_TOKEN = '5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh'
+UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa'
+VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp'
 
 logging.basicConfig(level=logging.DEBUG)
 log = logging.getLogger(__name__ )
 log.debug("Entering web uploader")
 
-if not os.path.isfile('bh20sequploader/mainx.py'):
+if not os.path.isfile('bh20sequploader/main.py'):
     print("WARNING: run FLASK from the root of the source repository!", file=sys.stderr)
 
 app = Flask(__name__, static_url_path='/static', static_folder='static')
@@ -224,12 +232,21 @@ METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20
 FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS)
 
 @app.route('/')
+def send_home():
+    """
+    Send the front page.
+    """
+
+    return render_template('home.html', menu='HOME')
+
+
+@app.route('/upload')
 def send_form():
     """
     Send the file upload form/front page.
     """
 
-    return render_template('form.html', fields=FORM_ITEMS, menu='HOME')
+    return render_template('form.html', fields=FORM_ITEMS, menu='UPLOAD')
 
 class FileTooBigError(RuntimeError):
     """
@@ -405,7 +422,7 @@ def receive_files():
 
         # Try and upload files to Arvados using the sequence uploader CLI
 
-        cmd = ['python3','bh20sequploader/main.py', fasta_dest, metadata_dest]
+        cmd = ['python3','bh20sequploader/main.py', metadata_dest, fasta_dest]
         print(" ".join(cmd),file=sys.stderr)
         result = subprocess.run(cmd,
             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -439,7 +456,83 @@ def get_html_body(fn):
 @app.route('/download')
 def download_page():
     buf = get_html_body('doc/web/download.html')
-    return render_template('about.html',menu='DOWNLOAD',embed=buf)
+    return render_template('resource.html',menu='DOWNLOAD',embed=buf)
+
+def pending_table(output, items):
+    output.write(
+"""
+<table>
+<tr><th>Collection</th>
+<th>Sequence label</th></tr>
+""")
+    for r in items:
+        if r["status"] != "pending":
+            continue
+        output.write("<tr>")
+        output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+        output.write("<td>%s</td>" % Markup.escape(r["sequence_label"]))
+        output.write("</tr>")
+    output.write(
+"""
+</table>
+""")
+
+def rejected_table(output, items):
+    output.write(
+"""
+<table>
+<tr><th>Collection</th>
+<th>Sequence label</th>
+<th>Errors</th></tr>
+""")
+    for r in items:
+        if r["status"] != "rejected":
+            continue
+        output.write("<tr>")
+        output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+        output.write("<td>%s</td>" % Markup.escape(r["sequence_label"]))
+        output.write("<td><pre>%s</pre></td>" % Markup.escape("\n".join(r.get("errors", []))))
+        output.write("</tr>")
+    output.write(
+"""
+</table>
+""")
+
+
+@app.route('/status')
+def status_page():
+    """
+    Processing status
+    """
+
+    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+    pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", UPLOADER_PROJECT]])
+    out = []
+    status = {}
+    for p in pending:
+        prop = p["properties"]
+        out.append(prop)
+        if "status" not in prop:
+            prop["status"] = "pending"
+        prop["created_at"] = p["created_at"]
+        prop["uuid"] = p["uuid"]
+        status[prop["status"]] = status.get(prop["status"], 0) + 1
+
+    output = io.StringIO()
+
+    validated = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute()
+    status["passed"] = validated["items_available"]
+
+    for s in (("passed", "/download"), ("pending", "#pending"), ("rejected", "#rejected")):
+        output.write("<p><a href='%s'>%s sequences QC %s</a></p>" % (s[1], status.get(s[0], 0), s[0]))
+
+    output.write("<a id='pending'><h1>Pending</h1>")
+    pending_table(output, out)
+
+    output.write("<a id='rejected'><h1>Rejected</h1>")
+    rejected_table(output, out)
+
+    return render_template('status.html', table=Markup(output.getvalue()), menu='STATUS')
 
 @app.route('/demo')
 def demo_page():
@@ -474,20 +567,10 @@ baseURL='http://sparql.genenetwork.org/sparql/'
 
 @app.route('/api/getCount', methods=['GET'])
 def getCount():
-    query="""
-PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select (COUNT(distinct ?dataset) as ?num)
-{
-   ?dataset pubseq:submitter ?id .
-   ?id ?p ?submitter
-}
-"""
-    payload = {'query': query, 'format': 'json'}
-    r = requests.get(baseURL, params=payload)
-    result = r.json()['results']['bindings']
-    # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}]
-    # print(result, file=sys.stderr)
-    return jsonify({'sequences': int(result[0]["num"]["value"])})
+    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+    c = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute()
+
+    return jsonify({'sequences': c["items_available"]})
 
 @app.route('/api/getAllaccessions', methods=['GET'])
 def getAllaccessions():
diff --git a/bh20simplewebuploader/static/main.css b/bh20simplewebuploader/static/main.css
index 5a9f231..b9b27f4 100644
--- a/bh20simplewebuploader/static/main.css
+++ b/bh20simplewebuploader/static/main.css
@@ -168,11 +168,11 @@ span.dropt:hover {text-decoration: none; background: #ffffff; z-index: 6; }
     grid-template-rows: auto;
     row-gap:5px;
     grid-template-areas:
-        "a a b b"
-            "a a c c"
-            "a a d d"
-            "e e e e"
-            "f f f f";
+        "b b a a"
+	"b b c c"
+        "b b d d"
+        "e e e e"
+        "f f f f";
     grid-auto-flow: column;
 }
 
@@ -361,3 +361,19 @@ footer {
 .blog-table-body {
     display: table-row-group;
 }
+
+div.status {
+    margin: 1em;
+}
+
+.status table {
+    display: table;
+    width: 100%;
+}
+
+.status td, th {
+    padding-left: 1em;
+    padding-right: 1em;
+    vertical-align: top;
+    border-bottom: 1px solid #ddd;
+}
diff --git a/bh20simplewebuploader/templates/footer.html b/bh20simplewebuploader/templates/footer.html
index 9326b1e..a1dd4fd 100644
--- a/bh20simplewebuploader/templates/footer.html
+++ b/bh20simplewebuploader/templates/footer.html
@@ -41,3 +41,17 @@
   </div>
 </section>
 <script type="text/javascript" src="/static/main.js"></script>
+
+<script type="text/javascript">
+  document.addEventListener("DOMContentLoaded", function(){
+      var count = fetch("/api/getCount")
+          .then((resp) => resp.json())
+          .then(function (data) {
+              count = data["sequences"];
+              console.log(count);
+              span = document.getElementById("Counter");
+              txt = document.createTextNode(count);
+              span.appendChild(txt);
+      });
+  });
+</script>
diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html
index 0ad2080..b9b3776 100644
--- a/bh20simplewebuploader/templates/form.html
+++ b/bh20simplewebuploader/templates/form.html
@@ -7,43 +7,6 @@
 
         <section>
             <form action="/submit" method="POST" enctype="multipart/form-data" id="main_form" class="grid-container">
-                <div class="intro">
-                    <p>
-                        Make your sequence
-                        data <a href="https://en.wikipedia.org/wiki/FAIR_data">FAIR</a>. Upload
-                        your SARS-CoV-2 sequence (FASTA or FASTQ
-                        formats) with metadata (JSONLD) to
-                        the <a href="/about">public sequence
-                        resource</a>. The upload will trigger a
-                        recompute with all available sequences into a
-                        Pangenome available for
-                        <a href="/download">download</a>!
-                    </p>
-                    <p>
-                        Your uploaded sequence will automatically be
-                        processed and incorporated into the public
-                        pangenome with metadata using worklows from
-                        the High Performance Open Biology Lab
-                        defined <a href="https://github.com/hpobio-lab/viral-analysis/tree/master/cwl/pangenome-generate">here</a>. All
-                        data is published under
-                        a <a href="https://creativecommons.org/licenses/by/4.0/">Creative
-                        Commons license</a> You can take the published
-                        (GFA/RDF/FASTA) data and store it in a triple
-                        store for further processing.  Clinical
-                        data can be stored
-                        securely
-                        at <a href="https://redcap-covid19.elixir-luxembourg.org/redcap/">REDCap</a>.
-                    </p>
-                    <p>
-                      Note that form fields contain
-                      web <a href="https://en.wikipedia.org/wiki/Web_Ontology_Language">ontology
-                      URI's</a>
-                      for <a href="https://en.wikipedia.org/wiki/Wikipedia:Disambiguation">disambiguation</a>
-                      and machine readable metadata. For examples of
-                      use, see the <a href="/blog">BLOG</a>.
-                    </p>
-                </div>
-
                 <div class="fasta-file-select">
                     <h2><svg class="bi bi-cloud-upload" width="1.2em" height="1.2em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
                         <path d="M4.887 6.2l-.964-.165A2.5 2.5 0 103.5 11H6v1H3.5a3.5 3.5 0 11.59-6.95 5.002 5.002 0 119.804 1.98A2.501 2.501 0 0113.5 12H10v-1h3.5a1.5 1.5 0 00.237-2.981L12.7 7.854l.216-1.028a4 4 0 10-7.843-1.587l-.185.96z"/>
@@ -160,18 +123,6 @@
 
   setMode()
 
-  document.addEventListener("DOMContentLoaded", function(){
-      var count = fetch("/api/getCount")
-          .then((resp) => resp.json())
-          .then(function (data) {
-              count = data["sequences"];
-              console.log(count);
-              span = document.getElementById("Counter");
-              txt = document.createTextNode(count);
-              span.appendChild(txt);
-      });
-});
-
 </script>
 
    </body>
diff --git a/bh20simplewebuploader/templates/home.html b/bh20simplewebuploader/templates/home.html
new file mode 100644
index 0000000..b90a18d
--- /dev/null
+++ b/bh20simplewebuploader/templates/home.html
@@ -0,0 +1,50 @@
+<!DOCTYPE html>
+<html>
+  {% include 'header.html' %}
+    <body>
+      {% include 'banner.html' %}
+      {% include 'menu.html' %}
+
+      <section>
+                <div class="intro">
+                    <p>
+                        Make your sequence
+                        data <a href="https://en.wikipedia.org/wiki/FAIR_data">FAIR</a>. Upload
+                        your SARS-CoV-2 sequence (FASTA or FASTQ
+                        formats) with metadata (JSONLD) to
+                        the <a href="/about">public sequence
+                        resource</a>. The upload will trigger a
+                        recompute with all available sequences into a
+                        Pangenome available for
+                        <a href="/download">download</a>!
+                    </p>
+                    <p>
+                        Your uploaded sequence will automatically be
+                        processed and incorporated into the public
+                        pangenome with metadata using worklows from
+                        the High Performance Open Biology Lab
+                        defined <a href="https://github.com/hpobio-lab/viral-analysis/tree/master/cwl/pangenome-generate">here</a>. All
+                        data is published under
+                        a <a href="https://creativecommons.org/licenses/by/4.0/">Creative
+                        Commons license</a> You can take the published
+                        (GFA/RDF/FASTA) data and store it in a triple
+                        store for further processing.  Clinical
+                        data can be stored
+                        securely
+                        at <a href="https://redcap-covid19.elixir-luxembourg.org/redcap/">REDCap</a>.
+                    </p>
+                    <p>
+                      Note that form fields contain
+                      web <a href="https://en.wikipedia.org/wiki/Web_Ontology_Language">ontology
+                      URI's</a>
+                      for <a href="https://en.wikipedia.org/wiki/Wikipedia:Disambiguation">disambiguation</a>
+                      and machine readable metadata. For examples of
+                      use, see the <a href="/blog">BLOG</a>.
+                    </p>
+                </div>
+        </section>
+
+{% include 'footer.html' %}
+
+   </body>
+</html>
diff --git a/bh20simplewebuploader/templates/menu.html b/bh20simplewebuploader/templates/menu.html
index 6f97e19..0f6003f 100644
--- a/bh20simplewebuploader/templates/menu.html
+++ b/bh20simplewebuploader/templates/menu.html
@@ -1,7 +1,9 @@
 <section class="menu">
   <div class="topnav" id="myTopnav">
-    <a href="/" class="{{ 'active' if menu=='HOME' }}">COVID-19</a>
+    <a href="/" class="{{ 'active' if menu=='HOME' }}">PUBSEQ</a>
     <a href="/download" class="{{ 'active' if menu=='DOWNLOAD' }}">DOWNLOAD</a>
+    <a href="/upload" class="{{ 'active' if menu=='UPLOAD' }}">UPLOAD</a>
+    <a href="/status" class="{{ 'active' if menu=='STATUS' }}">STATUS</a>
     <a href="/demo" class="{{ 'active' if menu=='DEMO' }}">DEMO</a>
     <a href="/blog" class="{{ 'active' if menu=='BLOG' }}">BLOG</a>
     <a href="/about" class="{{ 'active' if menu=='ABOUT' }}">ABOUT</a>
diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html
new file mode 100644
index 0000000..91b6c20
--- /dev/null
+++ b/bh20simplewebuploader/templates/resource.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  {% include 'header.html' %}
+    <body>
+      {% include 'banner.html' %}
+      {% include 'menu.html' %}
+
+      <div class="status">
+	<p><img src="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.png" height="300px"></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/projects/lugli-j7d0g-5ct8p1i1wrgyjvp#Data_collections">All sequences project</a></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup.fasta">All sequences (FASTA) relabled and deduplicated</a></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/mergedmetadata.ttl">Metadata (RDF) for all sequences</a></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI)</a> - <a href="https://github.com/vgteam/odgi">More about ODGI</a></p>
+	<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p>
+
+
+	<p><a href="http://sparql.genenetwork.org/sparql/">SPARQL endpoint</a> - <a href="http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=SELECT+DISTINCT+%3Ffasta+%3Fvalue+WHERE+%7B%3Ffasta+%3Fx%5B+%3Chttp%3A%2F%2Fedamontology.org%2Fdata_2091%3E+%3Fvalue+%5D%7D%0D%0A&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+">Sample query for accessions</a>
+
+	{{ embed|safe }}
+
+	</div>
+
+{% include 'footer.html' %}
+
+   </body>
+</html>
diff --git a/bh20simplewebuploader/templates/status.html b/bh20simplewebuploader/templates/status.html
new file mode 100644
index 0000000..a1cf28f
--- /dev/null
+++ b/bh20simplewebuploader/templates/status.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html>
+  {% include 'header.html' %}
+    <body>
+      {% include 'banner.html' %}
+      {% include 'menu.html' %}
+
+      <h1>Sequence upload processing status</h1>
+
+        <div class="status">
+	  {{ table }}
+        </div>
+
+{% include 'footer.html' %}
+
+   </body>
+</html>