From 414c308b8860d1b20481a2ec3b2f6381e4f6061b Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 8 Apr 2020 14:11:39 -0700 Subject: Initial commit of working frontend --- __pycache__/main.cpython-36.pyc | Bin 0 -> 2716 bytes main.py | 98 ++++++++++++++++++++++++++++++++++++++++ pages/index.html | 28 ++++++++++++ templates/error.html | 19 ++++++++ templates/success.html | 24 ++++++++++ 5 files changed, 169 insertions(+) create mode 100644 __pycache__/main.cpython-36.pyc create mode 100644 main.py create mode 100644 pages/index.html create mode 100644 templates/error.html create mode 100644 templates/success.html diff --git a/__pycache__/main.cpython-36.pyc b/__pycache__/main.cpython-36.pyc new file mode 100644 index 0000000..250c562 Binary files /dev/null and b/__pycache__/main.cpython-36.pyc differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..630669c --- /dev/null +++ b/main.py @@ -0,0 +1,98 @@ +import tempfile +import shutil +import subprocess +import os +from flask import Flask, request, redirect, send_file, send_from_directory, render_template + +app = Flask(__name__, static_url_path='/static', static_folder='static') + +# Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes. +# We will enforce the limit ourselves and set a higher safety limit here. +app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 + +# When a file is too big we get a 413. +@app.errorhandler(413) +def handle_large_file(e): + return (render_template('error.html', + error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) + +@app.route('/') +def send_form(): + """ + Send the file upload form/front page. + """ + return send_from_directory('pages', 'index.html') + +class FileTooBigError(RuntimeError): + """ + Raised when the user gives a file that is too large. + """ + pass + +def copy_with_limit(in_file, out_file, limit=1024*1024): + """ + Copy a file stream, and raise FileTooBigError if the file is too big. + """ + + bytes_used = 0 + buf_size = 65536 + + buf = in_file.read(buf_size) + bytes_used += len(buf) + while buf: + if bytes_used > limit: + raise FileTooBigError('Hit file length limit') + out_file.write(buf) + buf = in_file.read(buf_size) + bytes_used += len(buf) + + +@app.route('/submit', methods=['POST']) +def recieve_files(): + """ + Recieve the uploaded files. + """ + + # We're going to work in one directory per request + dest_dir = tempfile.mkdtemp() + try: + + print(request) + print(request.files) + + if 'fasta' not in request.files: + return (render_template('error.html', + error_message="You did not include a FASTA file."), 403) + if 'metadata' not in request.files: + return (render_template('error.html', + error_message="You did not include a metadata file."), 403) + + fasta_dest = os.path.join(dest_dir, 'fasta.fa') + metadata_dest = os.path.join(dest_dir, 'metadata.json') + + try: + with open(fasta_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('fasta').stream, out_stream) + with open(metadata_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('metadata').stream, out_stream) + except FileTooBigError as e: + # Delegate to the 413 error handler + return handle_large_file(e) + + # Try and upload files to Arvados + result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if result.returncode != 0: + # It didn't work. Complain. + error_message="Upload failed. Uploader returned {} and said:\n{}".format(result.returncode, result.stderr) + return (render_template('error.html', error_message=error_message), 403) + else: + # It worked. Say so. + return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace')) + finally: + shutil.rmtree(dest_dir) + + + + diff --git a/pages/index.html b/pages/index.html new file mode 100644 index 0000000..2269791 --- /dev/null +++ b/pages/index.html @@ -0,0 +1,28 @@ + + + + + + Simple Web Uploader for Public SARS-CoV-2 Sequence Resource + + +

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

+
+

+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. +

+
+
+ +
+ +
+ +
+ +
+ +
+
+ + diff --git a/templates/error.html b/templates/error.html new file mode 100644 index 0000000..c2ab0a4 --- /dev/null +++ b/templates/error.html @@ -0,0 +1,19 @@ + + + + + + Upload Failed + + +

Upload Failed

+
+

+ Your upload has failed. {{error_message}} +

+

+ Click here to try again. +

+
+ + diff --git a/templates/success.html b/templates/success.html new file mode 100644 index 0000000..1be7861 --- /dev/null +++ b/templates/success.html @@ -0,0 +1,24 @@ + + + + + + Upload Successful + + +

Upload Successful

+
+

+ Your files have been uploaded. They should soon appear as part of the Public SARS-CoV-2 Sequence Resource. +

+

+ The upload log was: +

+
{{log}}
+
+

+ Click here to upload more files. +

+
+ + -- cgit v1.2.3 From ce80c29ef5c93aed80ab3b98a3c2eedb740e32b6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 8 Apr 2020 15:07:39 -0700 Subject: Don't assert that the metadata is really JSON-LD --- pages/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pages/index.html b/pages/index.html index 2269791..c2e5b64 100644 --- a/pages/index.html +++ b/pages/index.html @@ -17,7 +17,7 @@

- +

-- cgit v1.2.3 From 60420f991a5bd3502bc6b89747d408da0d922839 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 8 Apr 2020 15:11:51 -0700 Subject: Add context links --- pages/index.html | 1 + 1 file changed, 1 insertion(+) diff --git a/pages/index.html b/pages/index.html index c2e5b64..543ab7d 100644 --- a/pages/index.html +++ b/pages/index.html @@ -24,5 +24,6 @@
+ Source · Made for COVID-19-BH20 -- cgit v1.2.3 From 03e857c1a477b04db11cf610760b1f2db7b859c5 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 9 Apr 2020 12:43:42 -0700 Subject: Add auto-generated fillable metadata form --- __pycache__/main.cpython-36.pyc | Bin 2716 -> 6764 bytes main.py | 191 +++++++++++++++++++++++++++++++++++++--- pages/index.html | 29 ------ templates/form.html | 95 ++++++++++++++++++++ 4 files changed, 272 insertions(+), 43 deletions(-) delete mode 100644 pages/index.html create mode 100644 templates/form.html diff --git a/__pycache__/main.cpython-36.pyc b/__pycache__/main.cpython-36.pyc index 250c562..0f929ad 100644 Binary files a/__pycache__/main.cpython-36.pyc and b/__pycache__/main.cpython-36.pyc differ diff --git a/main.py b/main.py index 630669c..d0f2793 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,12 @@ +import collections import tempfile import shutil import subprocess import os +import re +import string +import yaml +import urllib.request from flask import Flask, request, redirect, send_file, send_from_directory, render_template app = Flask(__name__, static_url_path='/static', static_folder='static') @@ -16,12 +21,118 @@ def handle_large_file(e): return (render_template('error.html', error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) + +def type_to_heading(type_name): + """ + Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. + """ + + # Remove camel case + decamel = re.sub('([A-Z])', r' \1', type_name) + # Split + parts = decamel.split() + # Capitalize words and remove unwanted components + filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')] + # Reassemble + return ' '.join(filtered) + +def name_to_label(field_name): + """ + Turn a filed name like "host_health_status" from the metadata schema into a human-readable label. + """ + + return string.capwords(field_name.replace('_', ' ')) + +def generate_form(schema): + """ + Linearize the schema and send a bunch of dicts. + Each dict either has a 'heading' (in which case we put a heading for a + form section in the template) or an 'id', 'label', 'type', and 'required' + (in which case we make a form field in the template). + """ + + # Get the list of form components, one of which is the root + components = schema.get('$graph', []) + + # Find the root + root_name = None + # And also index components by type name + by_name = {} + for component in components: + # Get the name of each + component_name = component.get('name', None) + if isinstance(component_name, str): + # And remember how to map back form it + by_name[component_name] = component + if component.get('documentRoot', False): + # Find whichever one is the root + root_name = component_name + + + def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False): + """ + Do a traversal of the component tree. + Yield a bunch of form item dicts, in order. + Form IDs are .-separated keypaths for where they are in the structure. + parent_keys is the path of field names to where we are in the root record's document tree. + """ + + if len(parent_keys) > 1: + # First make a heading, if we aren't the very root of the form + yield {'heading': type_to_heading(type_name)} + + for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): + # For each field + + ref_url = None + if not isinstance(field_type, str): + # If the type isn't a string + # See if it has a more info/what goes here URL + ref_url = field_type.get('jsonldPredicate', {}).get('_id', None) + # Grab out its type field + field_type = field_type.get('type', '') + + # Decide if the field is optional (type ends in ?) + optional = False + if len(field_type) > 0 and field_type[-1] == '?': + # It's optional + optional = True + # Drop the ? + field_type = field_type[:-1] + + if field_type in by_name: + # This is a subrecord. We need to recurse + for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): + yield item + else: + # We know how to make a string input + record = {} + record['id'] = '.'.join(parent_keys + [field_name]) + record['label'] = name_to_label(field_name) + record['required'] = not optional and not subtree_optional + if ref_url: + record['ref_url'] = ref_url + if field_type == 'string': + record['type'] = 'text' # HTML input type + elif field_type == 'int': + record['type'] = 'number' + else: + raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name)) + yield record + + return list(walk_fields(root_name)) + +# At startup, we need to load the current metadata schema so we can make a form for it +METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) +FORM_ITEMS = generate_form(METADATA_SCHEMA) + @app.route('/') def send_form(): """ Send the file upload form/front page. """ - return send_from_directory('pages', 'index.html') + + return render_template('form.html', fields=FORM_ITEMS) class FileTooBigError(RuntimeError): """ @@ -46,6 +157,20 @@ def copy_with_limit(in_file, out_file, limit=1024*1024): buf = in_file.read(buf_size) bytes_used += len(buf) +def parse_input(input_string, html_type): + """ + Parse an input from the given HTML input type into a useful Python type. + + Raise ValueError if something does not parse. + Raise NotImplementedError if we forgot to implement a type. + """ + + if html_type == 'text': + return input_string + elif html_type == 'number': + return int(input_string) + else: + raise NotImplementedError('Unimplemented input type: {}'.format(html_type)) @app.route('/submit', methods=['POST']) def recieve_files(): @@ -55,30 +180,68 @@ def recieve_files(): # We're going to work in one directory per request dest_dir = tempfile.mkdtemp() + fasta_dest = os.path.join(dest_dir, 'fasta.fa') + metadata_dest = os.path.join(dest_dir, 'metadata.json') try: - - print(request) - print(request.files) - if 'fasta' not in request.files: return (render_template('error.html', error_message="You did not include a FASTA file."), 403) - if 'metadata' not in request.files: - return (render_template('error.html', - error_message="You did not include a metadata file."), 403) - - fasta_dest = os.path.join(dest_dir, 'fasta.fa') - metadata_dest = os.path.join(dest_dir, 'metadata.json') - try: with open(fasta_dest, 'wb') as out_stream: copy_with_limit(request.files.get('fasta').stream, out_stream) - with open(metadata_dest, 'wb') as out_stream: - copy_with_limit(request.files.get('metadata').stream, out_stream) except FileTooBigError as e: # Delegate to the 413 error handler return handle_large_file(e) + if request.form.get('metadata_type', None) == 'upload': + if 'metadata' not in request.files: + return (render_template('error.html', + error_message="You did not include a metadata file."), 403) + try: + with open(metadata_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('metadata').stream, out_stream) + except FileTooBigError as e: + # Delegate to the 413 error handler + return handle_large_file(e) + elif request.form.get('metadata_type', None) == 'fill': + # Build a metadata dict + metadata = {} + + for item in FORM_ITEMS: + # Pull all the field values we wanted from the form + if 'heading' in item: + continue + + if item['id'] in request.form and len(request.form[item['id']]) > 0: + # We have this thing. Make a place in the dict tree for it. + parts = item['id'].split('.') + key = parts[-1] + # Remove leading 'metadata' + path = parts[1:-1] + dest_dict = metadata + for parent in path: + if parent not in dest_dict: + dest_dict[parent] = {} + dest_dict = dest_dict[parent] + + try: + # Now finally add the item + dest_dict[key] = parse_input(request.form[item['id']], item['type']) + except ValueError: + # We don't like that input + return (render_template('error.html', + error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403) + elif item['required']: + return (render_template('error.html', + error_message="You omitted the required metadata item {}".format(item['id'])), 403) + + # Now serialize the file with all the items + with open(metadata_dest, 'w') as out_stream: + yaml.dump(metadata, out_stream) + else: + return (render_template('error.html', + error_message="You did not include metadata."), 403) + # Try and upload files to Arvados result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/pages/index.html b/pages/index.html deleted file mode 100644 index 543ab7d..0000000 --- a/pages/index.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - Simple Web Uploader for Public SARS-CoV-2 Sequence Resource - - -

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

-
-

- This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. -

-
-
- -
- -
- -
- -
- -
-
- Source · Made for COVID-19-BH20 - - diff --git a/templates/form.html b/templates/form.html new file mode 100644 index 0000000..ec54de5 --- /dev/null +++ b/templates/form.html @@ -0,0 +1,95 @@ + + + + + + Simple Web Uploader for Public SARS-CoV-2 Sequence Resource + + +

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

+
+

+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. +

+
+
+ +
+ +
+ + +
+ + +
+ + +
+ +
+
+ +
+ +
+
+
+ +
+
+ {% for record in fields %} + {% if 'heading' in record %} +

{{ record['heading'] }}

+ {% else %} + +
+ +
+ {% endif %} + {% endfor %} +
+
+ + +
+
+ Source · Made for COVID-19-BH20 + + + -- cgit v1.2.3 From 51b5686f1df140628f1b39ecf40b45fbc0d0a59a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 9 Apr 2020 12:45:10 -0700 Subject: Don't include pyc --- __pycache__/main.cpython-36.pyc | Bin 6764 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 __pycache__/main.cpython-36.pyc diff --git a/__pycache__/main.cpython-36.pyc b/__pycache__/main.cpython-36.pyc deleted file mode 100644 index 0f929ad..0000000 Binary files a/__pycache__/main.cpython-36.pyc and /dev/null differ -- cgit v1.2.3 From 062230b12bb71c4b906318f1de3d67c0fd26f3ba Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 9 Apr 2020 12:57:49 -0700 Subject: Make schema link nicer and add example files --- templates/form.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/form.html b/templates/form.html index ec54de5..4ad41e2 100644 --- a/templates/form.html +++ b/templates/form.html @@ -29,7 +29,7 @@
- +

-- cgit v1.2.3 From b71cbe74aca99426872447b6dd343a962fe0a528 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 9 Apr 2020 16:25:34 -0500 Subject: Spacing and typo --- main.py | 70 +++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/main.py b/main.py index d0f2793..0d9b37a 100644 --- a/main.py +++ b/main.py @@ -26,7 +26,7 @@ def type_to_heading(type_name): """ Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. """ - + # Remove camel case decamel = re.sub('([A-Z])', r' \1', type_name) # Split @@ -35,12 +35,12 @@ def type_to_heading(type_name): filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')] # Reassemble return ' '.join(filtered) - + def name_to_label(field_name): """ Turn a filed name like "host_health_status" from the metadata schema into a human-readable label. """ - + return string.capwords(field_name.replace('_', ' ')) def generate_form(schema): @@ -50,10 +50,10 @@ def generate_form(schema): form section in the template) or an 'id', 'label', 'type', and 'required' (in which case we make a form field in the template). """ - + # Get the list of form components, one of which is the root components = schema.get('$graph', []) - + # Find the root root_name = None # And also index components by type name @@ -67,8 +67,8 @@ def generate_form(schema): if component.get('documentRoot', False): # Find whichever one is the root root_name = component_name - - + + def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False): """ Do a traversal of the component tree. @@ -76,14 +76,14 @@ def generate_form(schema): Form IDs are .-separated keypaths for where they are in the structure. parent_keys is the path of field names to where we are in the root record's document tree. """ - + if len(parent_keys) > 1: # First make a heading, if we aren't the very root of the form yield {'heading': type_to_heading(type_name)} - + for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): # For each field - + ref_url = None if not isinstance(field_type, str): # If the type isn't a string @@ -91,7 +91,7 @@ def generate_form(schema): ref_url = field_type.get('jsonldPredicate', {}).get('_id', None) # Grab out its type field field_type = field_type.get('type', '') - + # Decide if the field is optional (type ends in ?) optional = False if len(field_type) > 0 and field_type[-1] == '?': @@ -99,7 +99,7 @@ def generate_form(schema): optional = True # Drop the ? field_type = field_type[:-1] - + if field_type in by_name: # This is a subrecord. We need to recurse for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): @@ -119,9 +119,9 @@ def generate_form(schema): else: raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name)) yield record - + return list(walk_fields(root_name)) - + # At startup, we need to load the current metadata schema so we can make a form for it METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) FORM_ITEMS = generate_form(METADATA_SCHEMA) @@ -131,23 +131,23 @@ def send_form(): """ Send the file upload form/front page. """ - + return render_template('form.html', fields=FORM_ITEMS) - + class FileTooBigError(RuntimeError): """ Raised when the user gives a file that is too large. """ pass - + def copy_with_limit(in_file, out_file, limit=1024*1024): """ Copy a file stream, and raise FileTooBigError if the file is too big. """ - + bytes_used = 0 buf_size = 65536 - + buf = in_file.read(buf_size) bytes_used += len(buf) while buf: @@ -156,28 +156,28 @@ def copy_with_limit(in_file, out_file, limit=1024*1024): out_file.write(buf) buf = in_file.read(buf_size) bytes_used += len(buf) - + def parse_input(input_string, html_type): """ Parse an input from the given HTML input type into a useful Python type. - + Raise ValueError if something does not parse. Raise NotImplementedError if we forgot to implement a type. """ - + if html_type == 'text': return input_string elif html_type == 'number': return int(input_string) else: raise NotImplementedError('Unimplemented input type: {}'.format(html_type)) - + @app.route('/submit', methods=['POST']) -def recieve_files(): +def receive_files(): """ - Recieve the uploaded files. + Receive the uploaded files. """ - + # We're going to work in one directory per request dest_dir = tempfile.mkdtemp() fasta_dest = os.path.join(dest_dir, 'fasta.fa') @@ -192,7 +192,7 @@ def recieve_files(): except FileTooBigError as e: # Delegate to the 413 error handler return handle_large_file(e) - + if request.form.get('metadata_type', None) == 'upload': if 'metadata' not in request.files: return (render_template('error.html', @@ -206,12 +206,12 @@ def recieve_files(): elif request.form.get('metadata_type', None) == 'fill': # Build a metadata dict metadata = {} - + for item in FORM_ITEMS: # Pull all the field values we wanted from the form if 'heading' in item: continue - + if item['id'] in request.form and len(request.form[item['id']]) > 0: # We have this thing. Make a place in the dict tree for it. parts = item['id'].split('.') @@ -223,7 +223,7 @@ def recieve_files(): if parent not in dest_dict: dest_dict[parent] = {} dest_dict = dest_dict[parent] - + try: # Now finally add the item dest_dict[key] = parse_input(request.form[item['id']], item['type']) @@ -234,18 +234,18 @@ def recieve_files(): elif item['required']: return (render_template('error.html', error_message="You omitted the required metadata item {}".format(item['id'])), 403) - + # Now serialize the file with all the items with open(metadata_dest, 'w') as out_stream: yaml.dump(metadata, out_stream) else: return (render_template('error.html', error_message="You did not include metadata."), 403) - + # Try and upload files to Arvados result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - + if result.returncode != 0: # It didn't work. Complain. error_message="Upload failed. Uploader returned {} and said:\n{}".format(result.returncode, result.stderr) @@ -255,7 +255,3 @@ def recieve_files(): return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace')) finally: shutil.rmtree(dest_dir) - - - - -- cgit v1.2.3 From 02615e46e56376302ef99f7223f447a070248214 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 9 Apr 2020 17:11:25 -0500 Subject: Notes --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 0d9b37a..b4e8681 100644 --- a/main.py +++ b/main.py @@ -242,7 +242,7 @@ def receive_files(): return (render_template('error.html', error_message="You did not include metadata."), 403) - # Try and upload files to Arvados + # Try and upload files to Arvados using the sequence uploader CLI result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], stdout=subprocess.PIPE, stderr=subprocess.PIPE) -- cgit v1.2.3 From 7d26be925f37b1f98cac23b018dd1a72fa506a3f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 9 Apr 2020 15:41:29 -0700 Subject: Put back in directory --- bh20simplewebuploader/main.py | 257 +++++++++++++++++++++++++++ bh20simplewebuploader/templates/error.html | 19 ++ bh20simplewebuploader/templates/form.html | 95 ++++++++++ bh20simplewebuploader/templates/success.html | 24 +++ main.py | 257 --------------------------- templates/error.html | 19 -- templates/form.html | 95 ---------- templates/success.html | 24 --- 8 files changed, 395 insertions(+), 395 deletions(-) create mode 100644 bh20simplewebuploader/main.py create mode 100644 bh20simplewebuploader/templates/error.html create mode 100644 bh20simplewebuploader/templates/form.html create mode 100644 bh20simplewebuploader/templates/success.html delete mode 100644 main.py delete mode 100644 templates/error.html delete mode 100644 templates/form.html delete mode 100644 templates/success.html diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py new file mode 100644 index 0000000..b4e8681 --- /dev/null +++ b/bh20simplewebuploader/main.py @@ -0,0 +1,257 @@ +import collections +import tempfile +import shutil +import subprocess +import os +import re +import string +import yaml +import urllib.request +from flask import Flask, request, redirect, send_file, send_from_directory, render_template + +app = Flask(__name__, static_url_path='/static', static_folder='static') + +# Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes. +# We will enforce the limit ourselves and set a higher safety limit here. +app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 + +# When a file is too big we get a 413. +@app.errorhandler(413) +def handle_large_file(e): + return (render_template('error.html', + error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) + + +def type_to_heading(type_name): + """ + Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. + """ + + # Remove camel case + decamel = re.sub('([A-Z])', r' \1', type_name) + # Split + parts = decamel.split() + # Capitalize words and remove unwanted components + filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')] + # Reassemble + return ' '.join(filtered) + +def name_to_label(field_name): + """ + Turn a filed name like "host_health_status" from the metadata schema into a human-readable label. + """ + + return string.capwords(field_name.replace('_', ' ')) + +def generate_form(schema): + """ + Linearize the schema and send a bunch of dicts. + Each dict either has a 'heading' (in which case we put a heading for a + form section in the template) or an 'id', 'label', 'type', and 'required' + (in which case we make a form field in the template). + """ + + # Get the list of form components, one of which is the root + components = schema.get('$graph', []) + + # Find the root + root_name = None + # And also index components by type name + by_name = {} + for component in components: + # Get the name of each + component_name = component.get('name', None) + if isinstance(component_name, str): + # And remember how to map back form it + by_name[component_name] = component + if component.get('documentRoot', False): + # Find whichever one is the root + root_name = component_name + + + def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False): + """ + Do a traversal of the component tree. + Yield a bunch of form item dicts, in order. + Form IDs are .-separated keypaths for where they are in the structure. + parent_keys is the path of field names to where we are in the root record's document tree. + """ + + if len(parent_keys) > 1: + # First make a heading, if we aren't the very root of the form + yield {'heading': type_to_heading(type_name)} + + for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): + # For each field + + ref_url = None + if not isinstance(field_type, str): + # If the type isn't a string + # See if it has a more info/what goes here URL + ref_url = field_type.get('jsonldPredicate', {}).get('_id', None) + # Grab out its type field + field_type = field_type.get('type', '') + + # Decide if the field is optional (type ends in ?) + optional = False + if len(field_type) > 0 and field_type[-1] == '?': + # It's optional + optional = True + # Drop the ? + field_type = field_type[:-1] + + if field_type in by_name: + # This is a subrecord. We need to recurse + for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): + yield item + else: + # We know how to make a string input + record = {} + record['id'] = '.'.join(parent_keys + [field_name]) + record['label'] = name_to_label(field_name) + record['required'] = not optional and not subtree_optional + if ref_url: + record['ref_url'] = ref_url + if field_type == 'string': + record['type'] = 'text' # HTML input type + elif field_type == 'int': + record['type'] = 'number' + else: + raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name)) + yield record + + return list(walk_fields(root_name)) + +# At startup, we need to load the current metadata schema so we can make a form for it +METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) +FORM_ITEMS = generate_form(METADATA_SCHEMA) + +@app.route('/') +def send_form(): + """ + Send the file upload form/front page. + """ + + return render_template('form.html', fields=FORM_ITEMS) + +class FileTooBigError(RuntimeError): + """ + Raised when the user gives a file that is too large. + """ + pass + +def copy_with_limit(in_file, out_file, limit=1024*1024): + """ + Copy a file stream, and raise FileTooBigError if the file is too big. + """ + + bytes_used = 0 + buf_size = 65536 + + buf = in_file.read(buf_size) + bytes_used += len(buf) + while buf: + if bytes_used > limit: + raise FileTooBigError('Hit file length limit') + out_file.write(buf) + buf = in_file.read(buf_size) + bytes_used += len(buf) + +def parse_input(input_string, html_type): + """ + Parse an input from the given HTML input type into a useful Python type. + + Raise ValueError if something does not parse. + Raise NotImplementedError if we forgot to implement a type. + """ + + if html_type == 'text': + return input_string + elif html_type == 'number': + return int(input_string) + else: + raise NotImplementedError('Unimplemented input type: {}'.format(html_type)) + +@app.route('/submit', methods=['POST']) +def receive_files(): + """ + Receive the uploaded files. + """ + + # We're going to work in one directory per request + dest_dir = tempfile.mkdtemp() + fasta_dest = os.path.join(dest_dir, 'fasta.fa') + metadata_dest = os.path.join(dest_dir, 'metadata.json') + try: + if 'fasta' not in request.files: + return (render_template('error.html', + error_message="You did not include a FASTA file."), 403) + try: + with open(fasta_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('fasta').stream, out_stream) + except FileTooBigError as e: + # Delegate to the 413 error handler + return handle_large_file(e) + + if request.form.get('metadata_type', None) == 'upload': + if 'metadata' not in request.files: + return (render_template('error.html', + error_message="You did not include a metadata file."), 403) + try: + with open(metadata_dest, 'wb') as out_stream: + copy_with_limit(request.files.get('metadata').stream, out_stream) + except FileTooBigError as e: + # Delegate to the 413 error handler + return handle_large_file(e) + elif request.form.get('metadata_type', None) == 'fill': + # Build a metadata dict + metadata = {} + + for item in FORM_ITEMS: + # Pull all the field values we wanted from the form + if 'heading' in item: + continue + + if item['id'] in request.form and len(request.form[item['id']]) > 0: + # We have this thing. Make a place in the dict tree for it. + parts = item['id'].split('.') + key = parts[-1] + # Remove leading 'metadata' + path = parts[1:-1] + dest_dict = metadata + for parent in path: + if parent not in dest_dict: + dest_dict[parent] = {} + dest_dict = dest_dict[parent] + + try: + # Now finally add the item + dest_dict[key] = parse_input(request.form[item['id']], item['type']) + except ValueError: + # We don't like that input + return (render_template('error.html', + error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403) + elif item['required']: + return (render_template('error.html', + error_message="You omitted the required metadata item {}".format(item['id'])), 403) + + # Now serialize the file with all the items + with open(metadata_dest, 'w') as out_stream: + yaml.dump(metadata, out_stream) + else: + return (render_template('error.html', + error_message="You did not include metadata."), 403) + + # Try and upload files to Arvados using the sequence uploader CLI + result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if result.returncode != 0: + # It didn't work. Complain. + error_message="Upload failed. Uploader returned {} and said:\n{}".format(result.returncode, result.stderr) + return (render_template('error.html', error_message=error_message), 403) + else: + # It worked. Say so. + return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace')) + finally: + shutil.rmtree(dest_dir) diff --git a/bh20simplewebuploader/templates/error.html b/bh20simplewebuploader/templates/error.html new file mode 100644 index 0000000..c2ab0a4 --- /dev/null +++ b/bh20simplewebuploader/templates/error.html @@ -0,0 +1,19 @@ + + + + + + Upload Failed + + +

Upload Failed

+
+

+ Your upload has failed. {{error_message}} +

+

+ Click here to try again. +

+
+ + diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html new file mode 100644 index 0000000..4ad41e2 --- /dev/null +++ b/bh20simplewebuploader/templates/form.html @@ -0,0 +1,95 @@ + + + + + + Simple Web Uploader for Public SARS-CoV-2 Sequence Resource + + +

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

+
+

+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. +

+
+
+ +
+ +
+ + +
+ + +
+ + +
+ +
+
+ +
+ +
+
+
+ +
+
+ {% for record in fields %} + {% if 'heading' in record %} +

{{ record['heading'] }}

+ {% else %} + +
+ +
+ {% endif %} + {% endfor %} +
+
+ + +
+
+ Source · Made for COVID-19-BH20 + + + diff --git a/bh20simplewebuploader/templates/success.html b/bh20simplewebuploader/templates/success.html new file mode 100644 index 0000000..1be7861 --- /dev/null +++ b/bh20simplewebuploader/templates/success.html @@ -0,0 +1,24 @@ + + + + + + Upload Successful + + +

Upload Successful

+
+

+ Your files have been uploaded. They should soon appear as part of the Public SARS-CoV-2 Sequence Resource. +

+

+ The upload log was: +

+
{{log}}
+
+

+ Click here to upload more files. +

+
+ + diff --git a/main.py b/main.py deleted file mode 100644 index b4e8681..0000000 --- a/main.py +++ /dev/null @@ -1,257 +0,0 @@ -import collections -import tempfile -import shutil -import subprocess -import os -import re -import string -import yaml -import urllib.request -from flask import Flask, request, redirect, send_file, send_from_directory, render_template - -app = Flask(__name__, static_url_path='/static', static_folder='static') - -# Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes. -# We will enforce the limit ourselves and set a higher safety limit here. -app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 - -# When a file is too big we get a 413. -@app.errorhandler(413) -def handle_large_file(e): - return (render_template('error.html', - error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) - - -def type_to_heading(type_name): - """ - Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. - """ - - # Remove camel case - decamel = re.sub('([A-Z])', r' \1', type_name) - # Split - parts = decamel.split() - # Capitalize words and remove unwanted components - filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')] - # Reassemble - return ' '.join(filtered) - -def name_to_label(field_name): - """ - Turn a filed name like "host_health_status" from the metadata schema into a human-readable label. - """ - - return string.capwords(field_name.replace('_', ' ')) - -def generate_form(schema): - """ - Linearize the schema and send a bunch of dicts. - Each dict either has a 'heading' (in which case we put a heading for a - form section in the template) or an 'id', 'label', 'type', and 'required' - (in which case we make a form field in the template). - """ - - # Get the list of form components, one of which is the root - components = schema.get('$graph', []) - - # Find the root - root_name = None - # And also index components by type name - by_name = {} - for component in components: - # Get the name of each - component_name = component.get('name', None) - if isinstance(component_name, str): - # And remember how to map back form it - by_name[component_name] = component - if component.get('documentRoot', False): - # Find whichever one is the root - root_name = component_name - - - def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False): - """ - Do a traversal of the component tree. - Yield a bunch of form item dicts, in order. - Form IDs are .-separated keypaths for where they are in the structure. - parent_keys is the path of field names to where we are in the root record's document tree. - """ - - if len(parent_keys) > 1: - # First make a heading, if we aren't the very root of the form - yield {'heading': type_to_heading(type_name)} - - for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): - # For each field - - ref_url = None - if not isinstance(field_type, str): - # If the type isn't a string - # See if it has a more info/what goes here URL - ref_url = field_type.get('jsonldPredicate', {}).get('_id', None) - # Grab out its type field - field_type = field_type.get('type', '') - - # Decide if the field is optional (type ends in ?) - optional = False - if len(field_type) > 0 and field_type[-1] == '?': - # It's optional - optional = True - # Drop the ? - field_type = field_type[:-1] - - if field_type in by_name: - # This is a subrecord. We need to recurse - for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): - yield item - else: - # We know how to make a string input - record = {} - record['id'] = '.'.join(parent_keys + [field_name]) - record['label'] = name_to_label(field_name) - record['required'] = not optional and not subtree_optional - if ref_url: - record['ref_url'] = ref_url - if field_type == 'string': - record['type'] = 'text' # HTML input type - elif field_type == 'int': - record['type'] = 'number' - else: - raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name)) - yield record - - return list(walk_fields(root_name)) - -# At startup, we need to load the current metadata schema so we can make a form for it -METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) -FORM_ITEMS = generate_form(METADATA_SCHEMA) - -@app.route('/') -def send_form(): - """ - Send the file upload form/front page. - """ - - return render_template('form.html', fields=FORM_ITEMS) - -class FileTooBigError(RuntimeError): - """ - Raised when the user gives a file that is too large. - """ - pass - -def copy_with_limit(in_file, out_file, limit=1024*1024): - """ - Copy a file stream, and raise FileTooBigError if the file is too big. - """ - - bytes_used = 0 - buf_size = 65536 - - buf = in_file.read(buf_size) - bytes_used += len(buf) - while buf: - if bytes_used > limit: - raise FileTooBigError('Hit file length limit') - out_file.write(buf) - buf = in_file.read(buf_size) - bytes_used += len(buf) - -def parse_input(input_string, html_type): - """ - Parse an input from the given HTML input type into a useful Python type. - - Raise ValueError if something does not parse. - Raise NotImplementedError if we forgot to implement a type. - """ - - if html_type == 'text': - return input_string - elif html_type == 'number': - return int(input_string) - else: - raise NotImplementedError('Unimplemented input type: {}'.format(html_type)) - -@app.route('/submit', methods=['POST']) -def receive_files(): - """ - Receive the uploaded files. - """ - - # We're going to work in one directory per request - dest_dir = tempfile.mkdtemp() - fasta_dest = os.path.join(dest_dir, 'fasta.fa') - metadata_dest = os.path.join(dest_dir, 'metadata.json') - try: - if 'fasta' not in request.files: - return (render_template('error.html', - error_message="You did not include a FASTA file."), 403) - try: - with open(fasta_dest, 'wb') as out_stream: - copy_with_limit(request.files.get('fasta').stream, out_stream) - except FileTooBigError as e: - # Delegate to the 413 error handler - return handle_large_file(e) - - if request.form.get('metadata_type', None) == 'upload': - if 'metadata' not in request.files: - return (render_template('error.html', - error_message="You did not include a metadata file."), 403) - try: - with open(metadata_dest, 'wb') as out_stream: - copy_with_limit(request.files.get('metadata').stream, out_stream) - except FileTooBigError as e: - # Delegate to the 413 error handler - return handle_large_file(e) - elif request.form.get('metadata_type', None) == 'fill': - # Build a metadata dict - metadata = {} - - for item in FORM_ITEMS: - # Pull all the field values we wanted from the form - if 'heading' in item: - continue - - if item['id'] in request.form and len(request.form[item['id']]) > 0: - # We have this thing. Make a place in the dict tree for it. - parts = item['id'].split('.') - key = parts[-1] - # Remove leading 'metadata' - path = parts[1:-1] - dest_dict = metadata - for parent in path: - if parent not in dest_dict: - dest_dict[parent] = {} - dest_dict = dest_dict[parent] - - try: - # Now finally add the item - dest_dict[key] = parse_input(request.form[item['id']], item['type']) - except ValueError: - # We don't like that input - return (render_template('error.html', - error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403) - elif item['required']: - return (render_template('error.html', - error_message="You omitted the required metadata item {}".format(item['id'])), 403) - - # Now serialize the file with all the items - with open(metadata_dest, 'w') as out_stream: - yaml.dump(metadata, out_stream) - else: - return (render_template('error.html', - error_message="You did not include metadata."), 403) - - # Try and upload files to Arvados using the sequence uploader CLI - result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - if result.returncode != 0: - # It didn't work. Complain. - error_message="Upload failed. Uploader returned {} and said:\n{}".format(result.returncode, result.stderr) - return (render_template('error.html', error_message=error_message), 403) - else: - # It worked. Say so. - return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace')) - finally: - shutil.rmtree(dest_dir) diff --git a/templates/error.html b/templates/error.html deleted file mode 100644 index c2ab0a4..0000000 --- a/templates/error.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - Upload Failed - - -

Upload Failed

-
-

- Your upload has failed. {{error_message}} -

-

- Click here to try again. -

-
- - diff --git a/templates/form.html b/templates/form.html deleted file mode 100644 index 4ad41e2..0000000 --- a/templates/form.html +++ /dev/null @@ -1,95 +0,0 @@ - - - - - - Simple Web Uploader for Public SARS-CoV-2 Sequence Resource - - -

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

-
-

- This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. -

-
-
- -
- -
- - -
- - -
- - -
- -
-
- -
- -
-
-
- -
-
- {% for record in fields %} - {% if 'heading' in record %} -

{{ record['heading'] }}

- {% else %} - -
- -
- {% endif %} - {% endfor %} -
-
- - -
-
- Source · Made for COVID-19-BH20 - - - diff --git a/templates/success.html b/templates/success.html deleted file mode 100644 index 1be7861..0000000 --- a/templates/success.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - Upload Successful - - -

Upload Successful

-
-

- Your files have been uploaded. They should soon appear as part of the Public SARS-CoV-2 Sequence Resource. -

-

- The upload log was: -

-
{{log}}
-
-

- Click here to upload more files. -

-
- - -- cgit v1.2.3