diff options
author | Adam Novak | 2020-04-09 12:43:42 -0700 |
---|---|---|
committer | Adam Novak | 2020-04-09 12:43:42 -0700 |
commit | 03e857c1a477b04db11cf610760b1f2db7b859c5 (patch) | |
tree | adb9a7ff2331faa8109c827ded3eb72cb3e3dbff | |
parent | 60420f991a5bd3502bc6b89747d408da0d922839 (diff) | |
download | bh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.tar.gz bh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.tar.lz bh20-seq-resource-03e857c1a477b04db11cf610760b1f2db7b859c5.zip |
Add auto-generated fillable metadata form
-rw-r--r-- | __pycache__/main.cpython-36.pyc | bin | 2716 -> 6764 bytes | |||
-rw-r--r-- | main.py | 191 | ||||
-rw-r--r-- | pages/index.html | 29 | ||||
-rw-r--r-- | templates/form.html | 95 |
4 files changed, 272 insertions, 43 deletions
diff --git a/__pycache__/main.cpython-36.pyc b/__pycache__/main.cpython-36.pyc Binary files differindex 250c562..0f929ad 100644 --- a/__pycache__/main.cpython-36.pyc +++ b/__pycache__/main.cpython-36.pyc @@ -1,7 +1,12 @@ +import collections import tempfile import shutil import subprocess import os +import re +import string +import yaml +import urllib.request from flask import Flask, request, redirect, send_file, send_from_directory, render_template app = Flask(__name__, static_url_path='/static', static_folder='static') @@ -16,12 +21,118 @@ def handle_large_file(e): return (render_template('error.html', error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) + +def type_to_heading(type_name): + """ + Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. + """ + + # Remove camel case + decamel = re.sub('([A-Z])', r' \1', type_name) + # Split + parts = decamel.split() + # Capitalize words and remove unwanted components + filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')] + # Reassemble + return ' '.join(filtered) + +def name_to_label(field_name): + """ + Turn a filed name like "host_health_status" from the metadata schema into a human-readable label. + """ + + return string.capwords(field_name.replace('_', ' ')) + +def generate_form(schema): + """ + Linearize the schema and send a bunch of dicts. + Each dict either has a 'heading' (in which case we put a heading for a + form section in the template) or an 'id', 'label', 'type', and 'required' + (in which case we make a form field in the template). + """ + + # Get the list of form components, one of which is the root + components = schema.get('$graph', []) + + # Find the root + root_name = None + # And also index components by type name + by_name = {} + for component in components: + # Get the name of each + component_name = component.get('name', None) + if isinstance(component_name, str): + # And remember how to map back form it + by_name[component_name] = component + if component.get('documentRoot', False): + # Find whichever one is the root + root_name = component_name + + + def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False): + """ + Do a traversal of the component tree. + Yield a bunch of form item dicts, in order. + Form IDs are .-separated keypaths for where they are in the structure. + parent_keys is the path of field names to where we are in the root record's document tree. + """ + + if len(parent_keys) > 1: + # First make a heading, if we aren't the very root of the form + yield {'heading': type_to_heading(type_name)} + + for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): + # For each field + + ref_url = None + if not isinstance(field_type, str): + # If the type isn't a string + # See if it has a more info/what goes here URL + ref_url = field_type.get('jsonldPredicate', {}).get('_id', None) + # Grab out its type field + field_type = field_type.get('type', '') + + # Decide if the field is optional (type ends in ?) + optional = False + if len(field_type) > 0 and field_type[-1] == '?': + # It's optional + optional = True + # Drop the ? + field_type = field_type[:-1] + + if field_type in by_name: + # This is a subrecord. We need to recurse + for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): + yield item + else: + # We know how to make a string input + record = {} + record['id'] = '.'.join(parent_keys + [field_name]) + record['label'] = name_to_label(field_name) + record['required'] = not optional and not subtree_optional + if ref_url: + record['ref_url'] = ref_url + if field_type == 'string': + record['type'] = 'text' # HTML input type + elif field_type == 'int': + record['type'] = 'number' + else: + raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name)) + yield record + + return list(walk_fields(root_name)) + +# At startup, we need to load the current metadata schema so we can make a form for it +METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) +FORM_ITEMS = generate_form(METADATA_SCHEMA) + @app.route('/') def send_form(): """ Send the file upload form/front page. """ - return send_from_directory('pages', 'index.html') + + return render_template('form.html', fields=FORM_ITEMS) class FileTooBigError(RuntimeError): """ @@ -46,6 +157,20 @@ def copy_with_limit(in_file, out_file, limit=1024*1024): buf = in_file.read(buf_size) bytes_used += len(buf) +def parse_input(input_string, html_type): + """ + Parse an input from the given HTML input type into a useful Python type. + + Raise ValueError if something does not parse. + Raise NotImplementedError if we forgot to implement a type. + """ + + if html_type == 'text': + return input_string + elif html_type == 'number': + return int(input_string) + else: + raise NotImplementedError('Unimplemented input type: {}'.format(html_type)) @app.route('/submit', methods=['POST']) def recieve_files(): @@ -55,30 +180,68 @@ def recieve_files(): # We're going to work in one directory per request dest_dir = tempfile.mkdtemp() + fasta_dest = os.path.join(dest_dir, 'fasta.fa') + metadata_dest = os.path.join(dest_dir, 'metadata.json') try: - - print(request) - print(request.files) - if 'fasta' not in request.files: return (render_template('error.html', error_message="You did not include a FASTA file."), 403) - if 'metadata' not in request.files: - return (render_template('error.html', - error_message="You did not include a metadata file."), 403) - - fasta_dest = os.path.join(dest_dir, 'fasta.fa') - metadata_dest = os.path.join(dest_dir, 'metadata.json') - try: with open(fasta_dest, 'wb') as out_stream: copy_with_limit(request.files.get('fasta').stream, out_stream) - with open(metadata_dest, 'wb') as out_stream: - copy_with_limit(request.files.get('metadata').stream, out_stream) except FileTooBigError as e: # Delegate to the 413 error handler return handle_large_file(e) + if request.form.get('metadata_type', None) == 'upload': + if 'metadata' not in request.files: + return (render_template('error.html', + error_message="You did not include a metadata file."), 403) + try: + with open(metadata_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('metadata').stream, out_stream) + except FileTooBigError as e: + # Delegate to the 413 error handler + return handle_large_file(e) + elif request.form.get('metadata_type', None) == 'fill': + # Build a metadata dict + metadata = {} + + for item in FORM_ITEMS: + # Pull all the field values we wanted from the form + if 'heading' in item: + continue + + if item['id'] in request.form and len(request.form[item['id']]) > 0: + # We have this thing. Make a place in the dict tree for it. + parts = item['id'].split('.') + key = parts[-1] + # Remove leading 'metadata' + path = parts[1:-1] + dest_dict = metadata + for parent in path: + if parent not in dest_dict: + dest_dict[parent] = {} + dest_dict = dest_dict[parent] + + try: + # Now finally add the item + dest_dict[key] = parse_input(request.form[item['id']], item['type']) + except ValueError: + # We don't like that input + return (render_template('error.html', + error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403) + elif item['required']: + return (render_template('error.html', + error_message="You omitted the required metadata item {}".format(item['id'])), 403) + + # Now serialize the file with all the items + with open(metadata_dest, 'w') as out_stream: + yaml.dump(metadata, out_stream) + else: + return (render_template('error.html', + error_message="You did not include metadata."), 403) + # Try and upload files to Arvados result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/pages/index.html b/pages/index.html deleted file mode 100644 index 543ab7d..0000000 --- a/pages/index.html +++ /dev/null @@ -1,29 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <title>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</title> - </head> - <body> - <h1>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</h1> - <hr> - <p> - This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. - </p> - <hr> - <form action="/submit" method="POST" enctype="multipart/form-data"> - <label for="fasta">Select FASTA file for assembled genome (max 1MB):</label> - <br> - <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna"> - <br> - <label for="metadata">Select JSON metadata file following <a href="https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml">this schema</a> (max 1MB):</label> - <br> - <input type="file" id="metadata" name="metadata" accept=".json"> - <br> - <input type="submit" value="Add to Pangenome"> - </form> - <hr> - <small><a href="https://github.com/adamnovak/bh20-simple-web-uploader">Source</a> · Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a></small> - </body> -</html> diff --git a/templates/form.html b/templates/form.html new file mode 100644 index 0000000..ec54de5 --- /dev/null +++ b/templates/form.html @@ -0,0 +1,95 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</title> + </head> + <body> + <h1>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</h1> + <hr> + <p> + This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. + </p> + <hr> + <form action="/submit" method="POST" enctype="multipart/form-data" id="main_form"> + <label for="fasta">Select FASTA file for assembled genome (max 1MB):</label> + <br> + <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna" required> + <br> + + <label>Select metadata submission method:</label> + <br> + <input type="radio" id="metadata_upload" name="metadata_type" value="upload" onchange="setMode()" checked required> + <label for="metadata_upload">Upload metadata file</label> + <br> + <input type="radio" id="metadata_form" name="metadata_type" value="fill" onchange="setMode()" required> + <label for="metadata_form">Fill in metadata manually</label> + <br> + + <div id="metadata_upload_form_spot"> + <div id="metadata_upload_form"> + <label for="metadata">Select JSON or YAML metadata file following <a href="https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml">this schema</a> (max 1MB):</label> + <br> + <input type="file" id="metadata" name="metadata" accept=".json,.yml,.yaml" required> + <br> + </div> + </div> + + <div id="metadata_fill_form_spot"> + <div id="metadata_fill_form"> + {% for record in fields %} + {% if 'heading' in record %} + <h4>{{ record['heading'] }}</h4> + {% else %} + <label for="{{ record['id'] }}"> + {{ record['label'] }} + {{ "*" if record['required'] else "" }} + {% if 'ref_url' in record %} + <a href="{{ record['ref_url'] }}" title="More Info" target="_blank">?</a> + {% endif %} + </label> + <br> + <input type="{{ record['type'] }}" id="{{ record['id'] }}" name="{{ record['id'] }}" {{ "required" if record['required'] else "" }}> + <br> + {% endif %} + {% endfor %} + </div> + </div> + + <input type="submit" value="Add to Pangenome"> + </form> + <hr> + <small><a href="https://github.com/adamnovak/bh20-simple-web-uploader">Source</a> · Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a></small> + <script type="text/javascript"> + let uploadForm = document.getElementById('metadata_upload_form') + let uploadFormSpot = document.getElementById('metadata_upload_form_spot') + let fillForm = document.getElementById('metadata_fill_form') + let fillFormSpot = document.getElementById('metadata_fill_form_spot') + + function setUploadMode() { + // Make the upload form the one in use + uploadFormSpot.appendChild(uploadForm) + fillFormSpot.removeChild(fillForm) + } + + function setFillMode() { + // Make the fillable form the one in use + uploadFormSpot.removeChild(uploadForm) + fillFormSpot.appendChild(fillForm) + } + + function setMode() { + // Pick mode based on radio + if (document.getElementById('metadata_upload').checked) { + setUploadMode() + } else { + setFillMode() + } + } + + // Start in mode appropriate to selected form item + setMode() + </script> + </body> +</html> |