aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Dockerfile19
-rw-r--r--README.md31
-rw-r--r--bh20sequploader/bh20seq-schema.yml145
-rw-r--r--bh20sequploader/main.py2
-rw-r--r--bh20simplewebuploader/main.py261
-rw-r--r--bh20simplewebuploader/templates/error.html19
-rw-r--r--bh20simplewebuploader/templates/form.html95
-rw-r--r--bh20simplewebuploader/templates/success.html24
-rw-r--r--doc/INSTALL.md32
-rw-r--r--example/metadata.yaml4
-rw-r--r--example/minimal_example.yaml6
-rw-r--r--setup.py6
12 files changed, 610 insertions, 34 deletions
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..43fa8f2
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+# Dockerfile for containerizing the web interface
+FROM python:3.6-jessie
+WORKDIR /app
+
+RUN pip3 install gunicorn
+
+ADD LICENSE /app/
+ADD gittaggers.py /app/
+ADD setup.py /app/
+ADD README.md /app/
+ADD example /app/example
+ADD bh20seqanalyzer /app/bh20simplewebuploader
+ADD bh20sequploader /app/bh20sequploader
+ADD bh20simplewebuploader /app/bh20simplewebuploader
+
+RUN pip3 install -e .
+
+ENV PORT 8080
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8080", "bh20simplewebuploader.main:app"]
diff --git a/README.md b/README.md
index 3a8e5f0..8a5a6dd 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
This repository provides a sequence uploader for the COVID-19 Virtual Biohackathon's Public Sequence Resource project. You can use it to upload the genomes of SARS-CoV-2 samples to make them publicly and freely available to other researchers.
-To get started, first [install the uploader](#installation), and use the `bh20-seq-uploader` command to [uplaod your data](#usage).
+To get started, first [install the uploader](#installation), and use the `bh20-seq-uploader` command to [upload your data](#usage).
# Installation
@@ -149,3 +149,32 @@ Here we convert such a pipeline into the Common Workflow Language (CWL) and
sources can be found [here](https://github.com/hpobio-lab/viral-analysis/tree/master/cwl/pangenome-generate).
For more information on building pangenome models, [see this wiki page](https://github.com/virtual-biohackathons/covid-19-bh20/wiki/Pangenome#pangenome-model-from-available-genomes).
+
+# Web Interface
+
+This project comes with a simple web server that lets you use the sequence uploader from a browser. It will work as long as you install the packager with the `web` extra.
+
+To run it locally:
+
+```
+virtualenv --python python3 venv
+. venv/bin/activate
+pip install -e .[web]
+env FLASK_APP=bh20simplewebuploader/main.py flask run
+```
+
+Then visit [http://127.0.0.1:5000/](http://127.0.0.1:5000/).
+
+## Production
+
+For production deployment, you can use [gunicorn](https://flask.palletsprojects.com/en/1.1.x/deploying/wsgi-standalone/#gunicorn):
+
+```
+pip3 install gunicorn
+gunicorn bh20simplewebuploader.main:app
+```
+
+This runs on [http://127.0.0.1:8000/](http://127.0.0.1:8000/) by default, but can be adjusted with various [gunicorn options](http://docs.gunicorn.org/en/latest/run.html#commonly-used-arguments)
+
+
+
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 5c962d1..cf9b015 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -3,6 +3,10 @@ $namespaces:
sch: https://schema.org/
efo: http://www.ebi.ac.uk/efo/
obo: http://purl.obolibrary.org/obo/
+ sio: http://semanticscience.org/resource/
+ edam: http://edamontology.org/
+ evs: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
+
$graph:
- name: hostSchema
@@ -12,37 +16,93 @@ $graph:
type: string
jsonldPredicate:
_id: http://www.ebi.ac.uk/efo/EFO_0000532
- host_id: string
- host_common_name: string?
- host_sex: string?
- host_age: int?
- host_age_unit: string?
- host_health_status: string?
+ host_id:
+ type: string
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000115
+ host_common_name:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NOMEN_0000037
+ host_sex:
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/PATO_0000047
+ host_age:
+ type: int?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/PATO_0000011
+ host_age_unit:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/UO_0000036
+ host_health_status:
+ type: string?
+ jsonldPredicate: http://purl.obolibrary.org/obo/NCIT_C25688
host_treatment:
type: string?
jsonldPredicate:
_id: http://www.ebi.ac.uk/efo/EFO_0000727
- additional_host_information: string?
+ host_vaccination:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/VO_0000001
+ additional_host_information:
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001167
- name: sampleSchema
type: record
fields:
- collector_name: string
- collecting_institution: string
- specimen_source: string?
- collection_date: string?
+ collector_name:
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/OBI_0001895
+ collecting_institution:
+ type: string
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001167
+ specimen_source:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/OBI_0001479
+ collection_date:
+ type: string?
+ jsonldPredicate:
+ _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
collection_location:
type: string?
jsonldPredicate:
- _id: https://schema.org/fromLocation
- sample_storage_conditions: string?
- additional_collection_information: string?
+ _id: http://purl.obolibrary.org/obo/GAZ_00000448
+ sample_storage_conditions:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/OBI_0001472
+ additional_collection_information:
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001167
+ sample_id:
+ type: string
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000115
+ source_database_accession:
+ type: string?
+ jsonldPredicate:
+ _id: http://edamontology.org/data_2091
- name: virusSchema
type: record
fields:
- virus_species: string?
- virus_strain: string?
+ virus_species:
+ type: string?
+ jsonldPredicate:
+ _id: http://edamontology.org/data_1875
+ virus_strain:
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_010055
- name: technologySchema
type: record
@@ -50,7 +110,7 @@ $graph:
sample_sequencing_technology:
type: string
jsonldPredicate:
- _id: http://www.ebi.ac.uk/efo/EFO_0000532
+ _id: http://purl.obolibrary.org/obo/OBI_0600047
sequence_assembly_method:
type: string?
jsonldPredicate:
@@ -63,14 +123,42 @@ $graph:
- name: submitterSchema
type: record
fields:
- submitter_name: string
- submitter_address: string?
- originating_lab: string
- lab_address: string?
- provider_sample_id: string?
- submitter_sample_id: string?
- authors: string?
- submitter_id: string?
+ submitter_name:
+ type: string
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000116
+ submitter_date:
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C94162
+ submitter_address:
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000172
+ originating_lab:
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C37984
+ lab_address:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/OBI_0600047
+ provider_sample_id:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C37900
+ submitter_sample_id:
+ type: string?
+ jsonldPredicate:
+ _id: http://www.ebi.ac.uk/efo/EFO_0001741
+ authors:
+ type: string?
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C42781
+ submitter_id:
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000115
- name: MainSchema
type: record
@@ -81,9 +169,14 @@ $graph:
virus: virusSchema?
technology: technologySchema
submitter: submitterSchema
+ submission:
+ type: string
+ jsonldPredicate:
+ _id: "@id"
+ #_type: "@id"
sequencefile:
doc: The subject (eg the fasta/fastq file) that this metadata describes
type: string?
jsonldPredicate:
_id: "@id"
- _type: "@id"
+ _type: "@id" \ No newline at end of file
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 1d5b9c3..bfb8c51 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -8,7 +8,7 @@ from pathlib import Path
import urllib.request
import socket
import getpass
-import qc_metadata
+from qc_metadata import qc_metadata
ARVADOS_API_HOST='lugli.arvadosapi.com'
ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
new file mode 100644
index 0000000..bfc7762
--- /dev/null
+++ b/bh20simplewebuploader/main.py
@@ -0,0 +1,261 @@
+import collections
+import tempfile
+import shutil
+import subprocess
+import os
+import re
+import string
+import yaml
+import urllib.request
+from flask import Flask, request, redirect, send_file, send_from_directory, render_template
+
+app = Flask(__name__, static_url_path='/static', static_folder='static')
+
+# Limit file upload size. We shouldn't be working with anything over 1 MB; these are small genomes.
+# We will enforce the limit ourselves and set a higher safety limit here.
+app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024
+
+# When a file is too big we get a 413.
+@app.errorhandler(413)
+def handle_large_file(e):
+ return (render_template('error.html',
+ error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413)
+
+
+def type_to_heading(type_name):
+ """
+ Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading.
+ """
+
+ # Remove camel case
+ decamel = re.sub('([A-Z])', r' \1', type_name)
+ # Split
+ parts = decamel.split()
+ # Capitalize words and remove unwanted components
+ filtered = [part.capitalize() for part in parts if (part.lower() != 'schema' and part != '')]
+ # Reassemble
+ return ' '.join(filtered)
+
+def name_to_label(field_name):
+ """
+ Turn a filed name like "host_health_status" from the metadata schema into a human-readable label.
+ """
+
+ return string.capwords(field_name.replace('_', ' '))
+
+def generate_form(schema):
+ """
+ Linearize the schema and send a bunch of dicts.
+ Each dict either has a 'heading' (in which case we put a heading for a
+ form section in the template) or an 'id', 'label', 'type', and 'required'
+ (in which case we make a form field in the template).
+ """
+
+ # Get the list of form components, one of which is the root
+ components = schema.get('$graph', [])
+
+ # Find the root
+ root_name = None
+ # And also index components by type name
+ by_name = {}
+ for component in components:
+ # Get the name of each
+ component_name = component.get('name', None)
+ if isinstance(component_name, str):
+ # And remember how to map back form it
+ by_name[component_name] = component
+ if component.get('documentRoot', False):
+ # Find whichever one is the root
+ root_name = component_name
+
+
+ def walk_fields(type_name, parent_keys=['metadata'], subtree_optional=False):
+ """
+ Do a traversal of the component tree.
+ Yield a bunch of form item dicts, in order.
+ Form IDs are .-separated keypaths for where they are in the structure.
+ parent_keys is the path of field names to where we are in the root record's document tree.
+ """
+
+ if len(parent_keys) > 1:
+ # First make a heading, if we aren't the very root of the form
+ yield {'heading': type_to_heading(type_name)}
+
+ for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items():
+ # For each field
+
+ ref_url = None
+ if not isinstance(field_type, str):
+ # If the type isn't a string
+ # See if it has a more info/what goes here URL
+ predicate = field_type.get('jsonldPredicate', {})
+ if not isinstance(predicate, str):
+ ref_url = predicate.get('_id', None)
+ else:
+ ref_url = predicate # not sure this is correct
+ # Grab out its type field
+ field_type = field_type.get('type', '')
+
+ # Decide if the field is optional (type ends in ?)
+ optional = False
+ if len(field_type) > 0 and field_type[-1] == '?':
+ # It's optional
+ optional = True
+ # Drop the ?
+ field_type = field_type[:-1]
+
+ if field_type in by_name:
+ # This is a subrecord. We need to recurse
+ for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional):
+ yield item
+ else:
+ # We know how to make a string input
+ record = {}
+ record['id'] = '.'.join(parent_keys + [field_name])
+ record['label'] = name_to_label(field_name)
+ record['required'] = not optional and not subtree_optional
+ if ref_url:
+ record['ref_url'] = ref_url
+ if field_type == 'string':
+ record['type'] = 'text' # HTML input type
+ elif field_type == 'int':
+ record['type'] = 'number'
+ else:
+ raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name))
+ yield record
+
+ return list(walk_fields(root_name))
+
+# At startup, we need to load the current metadata schema so we can make a form for it
+METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml'))
+FORM_ITEMS = generate_form(METADATA_SCHEMA)
+
+@app.route('/')
+def send_form():
+ """
+ Send the file upload form/front page.
+ """
+
+ return render_template('form.html', fields=FORM_ITEMS)
+
+class FileTooBigError(RuntimeError):
+ """
+ Raised when the user gives a file that is too large.
+ """
+ pass
+
+def copy_with_limit(in_file, out_file, limit=1024*1024):
+ """
+ Copy a file stream, and raise FileTooBigError if the file is too big.
+ """
+
+ bytes_used = 0
+ buf_size = 65536
+
+ buf = in_file.read(buf_size)
+ bytes_used += len(buf)
+ while buf:
+ if bytes_used > limit:
+ raise FileTooBigError('Hit file length limit')
+ out_file.write(buf)
+ buf = in_file.read(buf_size)
+ bytes_used += len(buf)
+
+def parse_input(input_string, html_type):
+ """
+ Parse an input from the given HTML input type into a useful Python type.
+
+ Raise ValueError if something does not parse.
+ Raise NotImplementedError if we forgot to implement a type.
+ """
+
+ if html_type == 'text':
+ return input_string
+ elif html_type == 'number':
+ return int(input_string)
+ else:
+ raise NotImplementedError('Unimplemented input type: {}'.format(html_type))
+
+@app.route('/submit', methods=['POST'])
+def receive_files():
+ """
+ Receive the uploaded files.
+ """
+
+ # We're going to work in one directory per request
+ dest_dir = tempfile.mkdtemp()
+ fasta_dest = os.path.join(dest_dir, 'fasta.fa')
+ metadata_dest = os.path.join(dest_dir, 'metadata.json')
+ try:
+ if 'fasta' not in request.files:
+ return (render_template('error.html',
+ error_message="You did not include a FASTA file."), 403)
+ try:
+ with open(fasta_dest, 'wb') as out_stream:
+ copy_with_limit(request.files.get('fasta').stream, out_stream)
+ except FileTooBigError as e:
+ # Delegate to the 413 error handler
+ return handle_large_file(e)
+
+ if request.form.get('metadata_type', None) == 'upload':
+ if 'metadata' not in request.files:
+ return (render_template('error.html',
+ error_message="You did not include a metadata file."), 403)
+ try:
+ with open(metadata_dest, 'wb') as out_stream:
+ copy_with_limit(request.files.get('metadata').stream, out_stream)
+ except FileTooBigError as e:
+ # Delegate to the 413 error handler
+ return handle_large_file(e)
+ elif request.form.get('metadata_type', None) == 'fill':
+ # Build a metadata dict
+ metadata = {}
+
+ for item in FORM_ITEMS:
+ # Pull all the field values we wanted from the form
+ if 'heading' in item:
+ continue
+
+ if item['id'] in request.form and len(request.form[item['id']]) > 0:
+ # We have this thing. Make a place in the dict tree for it.
+ parts = item['id'].split('.')
+ key = parts[-1]
+ # Remove leading 'metadata'
+ path = parts[1:-1]
+ dest_dict = metadata
+ for parent in path:
+ if parent not in dest_dict:
+ dest_dict[parent] = {}
+ dest_dict = dest_dict[parent]
+
+ try:
+ # Now finally add the item
+ dest_dict[key] = parse_input(request.form[item['id']], item['type'])
+ except ValueError:
+ # We don't like that input
+ return (render_template('error.html',
+ error_message="You provided an unacceptable value for the metadata item {}".format(item['id'])), 403)
+ elif item['required']:
+ return (render_template('error.html',
+ error_message="You omitted the required metadata item {}".format(item['id'])), 403)
+
+ # Now serialize the file with all the items
+ with open(metadata_dest, 'w') as out_stream:
+ yaml.dump(metadata, out_stream)
+ else:
+ return (render_template('error.html',
+ error_message="You did not include metadata."), 403)
+
+ # Try and upload files to Arvados using the sequence uploader CLI
+ result = subprocess.run(['bh20-seq-uploader', fasta_dest, metadata_dest],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ if result.returncode != 0:
+ # It didn't work. Complain.
+ error_message="Upload failed. Uploader returned {} and said:\n{}".format(result.returncode, result.stderr)
+ return (render_template('error.html', error_message=error_message), 403)
+ else:
+ # It worked. Say so.
+ return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace'))
+ finally:
+ shutil.rmtree(dest_dir)
diff --git a/bh20simplewebuploader/templates/error.html b/bh20simplewebuploader/templates/error.html
new file mode 100644
index 0000000..c2ab0a4
--- /dev/null
+++ b/bh20simplewebuploader/templates/error.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <title>Upload Failed</title>
+ </head>
+ <body>
+ <h1>Upload Failed</h1>
+ <hr>
+ <p>
+ Your upload has failed. {{error_message}}
+ </p>
+ <p>
+ <a href="/">Click here to try again.</a>
+ </p>
+ <hr>
+ </body>
+</html>
diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html
new file mode 100644
index 0000000..2934a7c
--- /dev/null
+++ b/bh20simplewebuploader/templates/form.html
@@ -0,0 +1,95 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <title>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</title>
+ </head>
+ <body>
+ <h1>Simple Web Uploader for Public SARS-CoV-2 Sequence Resource</h1>
+ <hr>
+ <p>
+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>. Your uploaded sequence will automatically be processed and incorporated into the public pangenome.
+ </p>
+ <hr>
+ <form action="/submit" method="POST" enctype="multipart/form-data" id="main_form">
+ <label for="fasta">Select FASTA file for assembled genome (max 1MB):</label>
+ <br>
+ <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna" required>
+ <br>
+
+ <label>Select metadata submission method:</label>
+ <br>
+ <input type="radio" id="metadata_upload" name="metadata_type" value="upload" onchange="setMode()" checked required>
+ <label for="metadata_upload">Upload metadata file</label>
+ <br>
+ <input type="radio" id="metadata_form" name="metadata_type" value="fill" onchange="setMode()" required>
+ <label for="metadata_form">Fill in metadata manually</label>
+ <br>
+
+ <div id="metadata_upload_form_spot">
+ <div id="metadata_upload_form">
+ <label for="metadata">Select JSON or YAML metadata file following <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml" target="_blank">this schema</a> (<a href="https://github.com/arvados/bh20-seq-resource/blob/master/example/metadata.yaml" target="_blank">Example 1</a>, <a href="https://github.com/arvados/bh20-seq-resource/blob/master/example/minimal_example.yaml" target="_blank">Example 2</a>, max 1MB):</label>
+ <br>
+ <input type="file" id="metadata" name="metadata" accept=".json,.yml,.yaml" required>
+ <br>
+ </div>
+ </div>
+
+ <div id="metadata_fill_form_spot">
+ <div id="metadata_fill_form">
+ {% for record in fields %}
+ {% if 'heading' in record %}
+ <h4>{{ record['heading'] }}</h4>
+ {% else %}
+ <label for="{{ record['id'] }}">
+ {{ record['label'] }}
+ {{ "*" if record['required'] else "" }}
+ {% if 'ref_url' in record %}
+ <a href="{{ record['ref_url'] }}" title="More Info" target="_blank">?</a>
+ {% endif %}
+ </label>
+ <br>
+ <input type="{{ record['type'] }}" id="{{ record['id'] }}" name="{{ record['id'] }}" {{ "required" if record['required'] else "" }}>
+ <br>
+ {% endif %}
+ {% endfor %}
+ </div>
+ </div>
+
+ <input type="submit" value="Add to Pangenome">
+ </form>
+ <hr>
+ <small><a href="https://github.com/arvados/bh20-seq-resource">Source</a> &middot; Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a></small>
+ <script type="text/javascript">
+ let uploadForm = document.getElementById('metadata_upload_form')
+ let uploadFormSpot = document.getElementById('metadata_upload_form_spot')
+ let fillForm = document.getElementById('metadata_fill_form')
+ let fillFormSpot = document.getElementById('metadata_fill_form_spot')
+
+ function setUploadMode() {
+ // Make the upload form the one in use
+ uploadFormSpot.appendChild(uploadForm)
+ fillFormSpot.removeChild(fillForm)
+ }
+
+ function setFillMode() {
+ // Make the fillable form the one in use
+ uploadFormSpot.removeChild(uploadForm)
+ fillFormSpot.appendChild(fillForm)
+ }
+
+ function setMode() {
+ // Pick mode based on radio
+ if (document.getElementById('metadata_upload').checked) {
+ setUploadMode()
+ } else {
+ setFillMode()
+ }
+ }
+
+ // Start in mode appropriate to selected form item
+ setMode()
+ </script>
+ </body>
+</html>
diff --git a/bh20simplewebuploader/templates/success.html b/bh20simplewebuploader/templates/success.html
new file mode 100644
index 0000000..1be7861
--- /dev/null
+++ b/bh20simplewebuploader/templates/success.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <title>Upload Successful</title>
+ </head>
+ <body>
+ <h1>Upload Successful</h1>
+ <hr>
+ <p>
+ Your files have been uploaded. They should soon appear as part of the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">Public SARS-CoV-2 Sequence Resource</a>.
+ </p>
+ <p>
+ The upload log was:
+ </p>
+ <pre>{{log}}</pre>
+ <hr>
+ <p>
+ <a href="/">Click here to upload more files.</a>
+ </p>
+ <hr>
+ </body>
+</html>
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index c5c486c..5e9e7e9 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -4,12 +4,14 @@ Other options for running this tool.
## GNU Guix
-Another way to install this tool is inside a [GNU Guix Environment](https://guix.gnu.org/manual/en/html_node/Invoking-guix-environment.html), which can handle installing dependencies for you even when you don't have root access on an Ubuntu system.
+### Running the CLI uploader
-1. **Set up and enter a container with the necessary dependencies.** After installing Guix as `~/opt/guix/bin/guix`, run:
+Another way to install this tool is inside a [GNU Guix Environment](https://guix.gnu.org/manual/en/html_node/Invoking-guix-environment.html), which can handle installing dependencies for you
+
+1. **Set up and enter a Guix environment with the necessary dependencies.** After installing Guix run:
```sh
-~/opt/guix/bin/guix environment -C guix --ad-hoc git python openssl python-pycurl nss-certs
+guix environment -C guix --ad-hoc git python openssl python-pycurl nss-certs
```
2. **Install the tool.** From there you can follow the [user installation instructions](#installation-with-pip3---user). In brief:
@@ -27,5 +29,27 @@ arvados-python-client-2.0.1 ciso8601-2.1.3 future-0.18.2 google-api-python-clien
3. Run the tool directly with
```sh
-~/opt/guix/bin/guix environment guix --ad-hoc git python openssl python-pycurl nss-certs -- python3 bh20sequploader/main.py
+guix environment guix --ad-hoc git python openssl python-pycurl nss-certs -- python3 bh20sequploader/main.py
+```
+
+### Using the Web Uploader
+
+To run the web uploader in a GNU Guix environment
+
+```
+guix environment guix --ad-hoc git python python-flask python-pyyaml nss-certs --network openssl -- env FLASK_APP=bh20simplewebuploader/main.py flask run
```
+
+The containerized version looks like
+
+```
+guix environment -C guix --ad-hoc git python python-flask python-pyyaml nss-certs --network openssl
+```
+
+and
+
+```
+env FLASK_APP=bh20simplewebuploader/main.py flask run
+```
+
+WIP: add gunicorn container
diff --git a/example/metadata.yaml b/example/metadata.yaml
index 41ff93e..a2f6e57 100644
--- a/example/metadata.yaml
+++ b/example/metadata.yaml
@@ -1,3 +1,5 @@
+submission: publicSequenceResource
+
host:
host_id: XX1
host_species: string
@@ -10,6 +12,7 @@ host:
additional_host_information: string
sample:
+ sample_id: XXX
collector_name: XXX
collecting_institution: XXX
specimen_source: XXX
@@ -36,3 +39,4 @@ submitter:
submitter_sample_id: string
authors: testAuthor
submitter_id: X12
+ submitter_date: Subdate
diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml
index 201b080..f312ab7 100644
--- a/example/minimal_example.yaml
+++ b/example/minimal_example.yaml
@@ -1,8 +1,11 @@
+submission: publicSequenceResource
+
host:
host_id: XX
host_species: string
sample:
+ sample_id: XXX
collector_name: XXX
collecting_institution: XXX
@@ -11,4 +14,5 @@ technology:
submitter:
submitter_name: tester
- originating_lab: testLab \ No newline at end of file
+ originating_lab: testLab
+ submitter_date: Subdate \ No newline at end of file
diff --git a/setup.py b/setup.py
index 48c25aa..41ace7b 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,7 @@ except ImportError:
tagger = egg_info_cmd.egg_info
install_requires = ["arvados-python-client", "schema-salad"]
+web_requires = ["flask", "pyyaml"]
needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv)
pytest_runner = ["pytest < 6", "pytest-runner < 5"] if needs_pytest else []
@@ -29,9 +30,12 @@ setup(
author="Peter Amstutz",
author_email="peter.amstutz@curii.com",
license="Apache 2.0",
- packages=["bh20sequploader", "bh20seqanalyzer"],
+ packages=["bh20sequploader", "bh20seqanalyzer", "bh20simplewebuploader"],
package_data={"bh20sequploader": ["bh20seq-schema.yml"]},
install_requires=install_requires,
+ extras_require={
+ 'web': web_requires
+ },
setup_requires=[] + pytest_runner,
tests_require=["pytest<5"],
entry_points={