aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Novak2020-04-09 12:43:42 -0700
committerAdam Novak2020-04-09 12:43:42 -0700
commit03e857c1a477b04db11cf610760b1f2db7b859c5 (patch)
treeadb9a7ff2331faa8109c827ded3eb72cb3e3dbff
parent60420f991a5bd3502bc6b89747d408da0d922839 (diff)
downloadbh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.tar.gz
bh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.tar.lz
bh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.zip
Add auto-generated fillable metadata form
-rw-r--r--__pycache__/main.cpython-36.pycbin2716 -> 6764 bytes
-rw-r--r--main.py191
-rw-r--r--pages/index.html29
-rw-r--r--templates/form.html95
4 files changed, 272 insertions, 43 deletions
diff --git a/__pycache__/main.cpython-36.pyc b/__pycache__/main.cpython-36.pyc
index 250c562..0f929ad 100644
--- a/__pycache__/main.cpython-36.pyc
+++ b/__pycache__/main.cpython-36.pyc
Binary files differ
diff --git a/main.py b/main.py
index 630669c..d0f2793 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,12 @@
+import collections
import tempfile
import shutil
import subprocess
import os
+import re
+import string
+import yaml
+import urllib.request
from flask import Flask, request, redirect, send_file, send_from_directory, render_template
app = Flask(__name__, static_url_path='/static', static_folder='static')
@@ -16,12 +21,118 @@ def handle_large_file(e):
return (render_template('error.html',
error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413)
+
+def type_to_heading(type_name):
+ """
+ Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading.
+ """
+
+ # Remove camel case
+ decamel = re.sub('([A-Z])', r' \1', type_name)
+ # Split
+ parts = decamel.split()
+ # Capitalize words and remove unwanted components
+ filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')]
+ # Reassemble
+ return ' '.join(filtered)
+
+def name_to_label(field_name):
+ """
+ Turn a filed name like "host_health_status" from the metadata schema into a human-readable label.
+ """
+
+ return string.capwords(field_name.replace('_', ' '))
+
+def generate_form(schema):
+ """
+ Linearize the schema and send a bunch of dicts.
+ Each dict either has a 'heading' (in which case we put a heading for a
+ form section in the template) or an 'id', 'label', 'type', and 'required'
+ (in which case we make a form field in the template).
+ """
+
+ # Get the list of form components, one of which is the root
+ components = schema.get('$graph', [])
+
+ # Find the root
+ root_name = None
+ # And also index components by type name
+ by_name = {}
+ for component in components:
+ # Get the name of each
+ component_name = component.get('name', None)
+ if isinstance(component_name, str):
+ # And remember how to map back form it
+ by_name[component_name] = component
+ if component.get('documentRoot', False):
+ # Find whichever one is the root
+ root_name = component_name
+
+
+ def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False):
+ """
+ Do a traversal of the component tree.
+ Yield a bunch of form item dicts, in order.
+ Form IDs are .-separated keypaths for where they are in the structure.
+ parent_keys is the path of field names to where we are in the root record's document tree.
+ """
+
+ if len(parent_keys) > 1:
+ # First make a heading, if we aren't the very root of the form
+ yield {'heading': type_to_heading(type_name)}
+
+ for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items():
+ # For each field
+
+ ref_url = None
+ if not isinstance(field_type, str):
+ # If the type isn't a string
+ # See if it has a more info/what goes here URL
+ ref_url = field_type.get('jsonldPredicate', {}).get('_id', None)
+ # Grab out its type field
+ field_type = field_type.get('type', '')
+
+ # Decide if the field is optional (type ends in ?)
+ optional = False
+ if len(field_type) > 0 and field_type[-1] == '?':
+ # It's optional
+ optional = True
+ # Drop the ?
+ field_type = field_type[:-1]
+
+ if field_type in by_name:
+ # This is a subrecord. We need to recurse
+ for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional):
+ yield item
+ else:
+ # We know how to make a string input
+ record = {}
+ record['id'] = '.'.join(parent_keys + [field_name])
+ record['label'] = name_to_label(field_name)
+ record['required'] = not optional and not subtree_optional
+ if ref_url:
+ record['ref_url'] = ref_url
+ if field_type == 'string':
+ record['type'] = 'text' # HTML input type
+ elif field_type == 'int':
+ record['type'] = 'number'
+ else:
+ raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name))
+ yield record
+
+ return list(walk_fields(root_name))
+
+# At startup, we need to load the current metadata schema so we can make a form for it
+METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml'))
+FORM_ITEMS = generate_form(METADATA_SCHEMA)
+
@app.route('/')
def send_form():
"""
Send the file upload form/front page.
"""
- return send_from_directory('pages', 'index.html')
+
+ return render_template('form.html', fields=FORM_ITEMS)
class FileTooBigError(RuntimeError):
"""
@@ -46,6 +157,20 @@ def copy_with_limit(in_file, out_file, limit=1024*1024):
buf = in_file.read(buf_size)
bytes_used += len(buf)
+def parse_input(input_string, html_type):
+ """
+ Parse an input from the given HTML input type into a useful Python type.
+
+ Raise ValueError if something does not parse.
+ Raise NotImplementedError if we forgot to implement a type.
+ """
+
+ if html_type == 'text':
+ return input_string
+ elif html_type == 'number':
+ return int(input_string)
+ else:
+ raise NotImplementedError('Unimplemented input type: {}'.format(html_type))
@app.route('/submit', methods=['POST'])
def recieve_files():
@@ -55,30 +180,68 @@ def recieve_files():
# We're going to work in one directory per request
dest_dir = tempfile.mkdtemp()
+ fasta_dest = os.path.join(dest_dir, 'fasta.fa')
+ metadata_dest = os.path.join(dest_dir, 'metadata.json')
try:
-
- print(request)
- print(request.files)
-
if 'fasta' not in request.files:
return (render_template('error.html',
error_message="You did not include a FASTA file."), 403)
- if 'metadata' not in request.files:
- return (render_template('error.html',
- error_message="You did not include a metadata file."), 403)
-
- fasta_dest = os.path.join(dest_dir, 'fasta.fa')
- metadata_dest = os.path.join(dest_dir, 'metadata.json')
-
try:
with open(fasta_dest, 'wb') as out_stream:
copy_with_limit(request.files.get('fasta').stream, out_stream)
- with open(metadata_dest, 'wb') as out_stream:
- copy_with_limit(request.files.get('metadata').stream, out_stream)
except FileTooBigError as e:
# Delegate to the 413 error handler
return handle_large_file(e)
+ if request.form.get('metadata_type', None) == 'upload':
+ if 'metadata' not in request.files:
+ return (render_template('error.html',
+ error_message="You did not include a metadata file."), 403)
+ try:
+ with open(metadata_dest, 'wb') as out_stream:
+ copy_with_limit(request.files.get('metadata').stream, out_stream)
+ except FileTooBigError as e:
+ # Delegate to the 413 error handler
+ return handle_large_file(e)
+ elif request.form.get('metadata_type', None) == 'fill':
+ # Build a metadata dict
+ metadata = {}
+
+ for item in FORM_ITEMS:
+ # Pull all the field values we wanted from the form
+ if 'heading' in item:
+ continue
+
+ if item['id'] in request.form and len(request.form[item['id']]) > 0:
+ # We have this thing. Make a place in the dict tree for it.
+ parts = item['id'].split('.')
+ key = parts[-1]
+ # Remove leading 'metadata'
+ path = parts[1:-1]
+ dest_dict = metadata
+ for parent in path:
+ if parent not in dest_dict:
+ dest_dict[parent] = {}
+ dest_dict = dest_dict[parent]
+
+ try:
+ # Now finally add the item
+ dest_dict[key] = parse_input(request.form[item['id']], item['type'])
+ except ValueError:
+ # We don't like that input
+ return (render_template('error.html',
+ error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403)
+ elif item['required']:
+ return (render_template('error.html',
+ error_message="You omitted the required metadata item {}".format(item['id'])), 403)
+
+ # Now serialize the file with all the items
+ with open(metadata_dest, 'w') as out_stream:
+ yaml.dump(metadata, out_stream)
+ else:
+ return (render_template('error.html',
+ error_message="You did not include metadata."), 403)
+
# Try and upload files to Arvados
result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
diff --git a/pages/index.html b/pages/index.html
deleted file mode 100644
index 543ab7d..0000000
--- a/pages/index.html
+++ /dev/null
@@ -1,29 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1">
- <title>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</title>
- </head>
- <body>
- <h1>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</h1>
- <hr>
- <p>
- This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>. Your uploaded sequence will automatically be processed and incorporated into the public pangenome.
- </p>
- <hr>
- <form action="/submit" method="POST" enctype="multipart/form-data">
- <label for="fasta">Select FASTA file for assembled genome (max 1MB):</label>
- <br>
- <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna">
- <br>
- <label for="metadata">Select JSON metadata file following <a href="https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml">this schema</a> (max 1MB):</label>
- <br>
- <input type="file" id="metadata" name="metadata" accept=".json">
- <br>
- <input type="submit" value="Add to Pangenome">
- </form>
- <hr>
- <small><a href="https://github.com/adamnovak/bh20-simple-web-uploader">Source</a> &middot; Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a></small>
- </body>
-</html>
diff --git a/templates/form.html b/templates/form.html
new file mode 100644
index 0000000..ec54de5
--- /dev/null
+++ b/templates/form.html
@@ -0,0 +1,95 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <title>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</title>
+ </head>
+ <body>
+ <h1>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</h1>
+ <hr>
+ <p>
+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>. Your uploaded sequence will automatically be processed and incorporated into the public pangenome.
+ </p>
+ <hr>
+ <form action="/submit" method="POST" enctype="multipart/form-data" id="main_form">
+ <label for="fasta">Select FASTA file for assembled genome (max 1MB):</label>
+ <br>
+ <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna" required>
+ <br>
+
+ <label>Select metadata submission method:</label>
+ <br>
+ <input type="radio" id="metadata_upload" name="metadata_type" value="upload" onchange="setMode()" checked required>
+ <label for="metadata_upload">Upload metadata file</label>
+ <br>
+ <input type="radio" id="metadata_form" name="metadata_type" value="fill" onchange="setMode()" required>
+ <label for="metadata_form">Fill in metadata manually</label>
+ <br>
+
+ <div id="metadata_upload_form_spot">
+ <div id="metadata_upload_form">
+ <label for="metadata">Select JSON or YAML metadata file following <a href="https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml">this schema</a> (max 1MB):</label>
+ <br>
+ <input type="file" id="metadata" name="metadata" accept=".json,.yml,.yaml" required>
+ <br>
+ </div>
+ </div>
+
+ <div id="metadata_fill_form_spot">
+ <div id="metadata_fill_form">
+ {% for record in fields %}
+ {% if 'heading' in record %}
+ <h4>{{ record['heading'] }}</h4>
+ {% else %}
+ <label for="{{ record['id'] }}">
+ {{ record['label'] }}
+ {{ "*" if record['required'] else "" }}
+ {% if 'ref_url' in record %}
+ <a href="{{ record['ref_url'] }}" title="More Info" target="_blank">?</a>
+ {% endif %}
+ </label>
+ <br>
+ <input type="{{ record['type'] }}" id="{{ record['id'] }}" name="{{ record['id'] }}" {{ "required" if record['required'] else "" }}>
+ <br>
+ {% endif %}
+ {% endfor %}
+ </div>
+ </div>
+
+ <input type="submit" value="Add to Pangenome">
+ </form>
+ <hr>
+ <small><a href="https://github.com/adamnovak/bh20-simple-web-uploader">Source</a> &middot; Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a></small>
+ <script type="text/javascript">
+ let uploadForm = document.getElementById('metadata_upload_form')
+ let uploadFormSpot = document.getElementById('metadata_upload_form_spot')
+ let fillForm = document.getElementById('metadata_fill_form')
+ let fillFormSpot = document.getElementById('metadata_fill_form_spot')
+
+ function setUploadMode() {
+ // Make the upload form the one in use
+ uploadFormSpot.appendChild(uploadForm)
+ fillFormSpot.removeChild(fillForm)
+ }
+
+ function setFillMode() {
+ // Make the fillable form the one in use
+ uploadFormSpot.removeChild(uploadForm)
+ fillFormSpot.appendChild(fillForm)
+ }
+
+ function setMode() {
+ // Pick mode based on radio
+ if (document.getElementById('metadata_upload').checked) {
+ setUploadMode()
+ } else {
+ setFillMode()
+ }
+ }
+
+ // Start in mode appropriate to selected form item
+ setMode()
+ </script>
+ </body>
+</html>