From 10ccb97cab69cb704c154387d544a74cd38d3cdf Mon Sep 17 00:00:00 2001 From: Michael R. Crusoe Date: Thu, 9 Apr 2020 09:15:27 +0200 Subject: update my affiliation & ORCID --- paper/paper.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index caa9903..212858f 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -19,9 +19,9 @@ authors: - name: Erik Garrison orcid: 0000 affiliation: 5 - - name: Michael Crusoe - orcid: 0000 - affiliation: 6 + - name: Michael R. Crusoe + orcid: 0000-0002-2961-9670 + affiliation: 6, 2 - name: Rutger Vos orcid: 0000 affiliation: 7 @@ -34,6 +34,8 @@ affiliations: index: 1 - name: Curii, Boston, USA index: 2 + - name: Department of Computer Science, Faculty of Sciences, Vrije Universiteit Amsterdam, The Netherlands + index: 6 date: 11 April 2020 bibliography: paper.bib --- -- cgit v1.2.3 From fc872f15da426926414fb7629bf6660d9880ed1e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 10 Apr 2020 17:16:35 -0500 Subject: Draft --- paper/paper.bib | 16 ++++++ paper/paper.md | 160 +++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 151 insertions(+), 25 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index e69de29..bcb9c0b 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -0,0 +1,16 @@ +@book{CWL, +title = "Common Workflow Language, v1.0", +abstract = "The Common Workflow Language (CWL) is an informal, multi-vendor working group consisting of various organizations and individuals that have an interest in portability of data analysis workflows. Our goal is to create specifications that enable data scientists to describe analysis tools and workflows that are powerful, easy to use, portable, and support reproducibility.CWL builds on technologies such as JSON-LD and Avro for data modeling and Docker for portable runtime environments. CWL is designed to express workflows for data-intensive science, such as Bioinformatics, Medical Imaging, Chemistry, Physics, and Astronomy.This is v1.0 of the CWL tool and workflow specification, released on 2016-07-08", +keywords = "cwl, workflow, specification", +author = "Brad Chapman and John Chilton and Michael Heuer and Andrey Kartashov and Dan Leehr and Herv{\'e} M{\'e}nager and Maya Nedeljkovich and Matt Scales and Stian Soiland-Reyes and Luka Stojanovic", +editor = "Peter Amstutz and Crusoe, {Michael R.} and Nebojša Tijanić", +note = "Specification, product of the Common Workflow Language working group. http://www.commonwl.org/v1.0/", +year = "2016", +month = "7", +day = "8", +doi = "10.6084/m9.figshare.3115156.v2", +language = "English", +publisher = "figshare", +address = "United States", + +} \ No newline at end of file diff --git a/paper/paper.md b/paper/paper.md index caa9903..813c91b 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -1,8 +1,9 @@ --- -title: 'Public Sequence Resource for COVID-19' +title: 'CPSR: COVID-19 Public Sequence Resource' +title_short: 'CPSR: COVID-19 Public Sequence Resource' tags: - Sequencing - - COVID + - COVID-19 authors: - name: Pjotr Prins orcid: 0000-0002-8021-9162 @@ -25,16 +26,30 @@ authors: - name: Rutger Vos orcid: 0000 affiliation: 7 - - Michael Heuer + - name: Michael Heuer orcid: 0000 affiliation: 8 - + - name: Adam Novak + orcid: 0000 + affiliation: 9 + - name: Alex Kanitz + orcid: 0000 + affiliation: 10 + - name: Jerven Bolleman + orcid: 0000 + affiliation: 11 + - name: Joep de Ligt + orcid: 0000 + affiliation: 12 affiliations: - name: Department of Genetics, Genomics and Informatics, The University of Tennessee Health Science Center, Memphis, TN, USA. index: 1 - name: Curii, Boston, USA index: 2 date: 11 April 2020 +event: COVID2020 +group: Public Sequence Uploader +authors_short: Pjotr Prins & Peter Amstutz \emph{et al.} bibliography: paper.bib --- @@ -49,13 +64,48 @@ pasting above link (or yours) with https://github.com/biohackrxiv/bhxiv-gen-pdf +Note that author order will change! + --> # Introduction -As part of the one week COVID-19 Biohackathion 2020, we formed a -working group on creating a public sequence resource for Corona virus. - +As part of the COVID-19 Biohackathion 2020 we formed a working +group to create a COVID-19 Public Sequence Resource (CPSR) for +Corona virus sequences. The general idea was to create a +repository that has a low barrier to entry for uploading sequence +data using best practices. I.e., data published with a creative +commons 4.0 (CC-4.0) license with metadata using state-of-the art +standards and, perhaps most importantly, providing standardized +workflows that get triggered on upload, so that results are +immediately available in standardized data formats. + +Existing data repositories for viral data include GISAID, EBI ENA +and NCBI. These repositories allow for free sharing of data, but +do not add value in terms of running immediate +computations. Also, GISAID, at this point, has the most complete +collection of genetic sequence data of influenza viruses and +related clinical and epidemiological data through its +database. But, due to a restricted license, data submitted to +GISAID can not be used for online web services and on-the-fly +computation. In addition GISAID registration which can take weeks +and, painfully, forces users to download sequences one at a time +to do any type of analysis. In our opinion this does not fit a +pandemic scenario where fast turnaround times are key and data +analysis has to be agile. + +We managed to create a useful sequence uploader utility within +one week by leveraging existing technologies, such as the Arvados +Cloud platform [@Arvados], the Common Workflow Langauge (CWL) +[@CWL], Docker images built with Debian packages, and the many +free and open source software packages that are available for +bioinformatics. + +The source code for the CLI uploader and web uploader can be +found [here](https://github.com/arvados/bh20-seq-resource) +(FIXME: we'll have a full page). The CWL workflow definitions can +be found [here](https://github.com/hpobio-lab/viral-analysis) and +on CWL hub (FIXME). + +We aim to add more workflows to CPSR, for example to prepare +sequence data for submitting in other public repositories, such +as EBI ENA and GISAID. This will allow researchers to share data +in multiple systems without pain, circumventing current sharing +restrictions. + +# Acknowledgements + +We thank the COVID-19 BioHackathon 2020 and ELIXIR for creating a +unique event that triggered many collaborations. We thank Curii +Corporation for their financial support for creating and running +Arvados instances. We thank Amazon AWS for their financial +support to run COVID-19 workflows. We also want to thank the +other working groups in the BioHackathon who generously +contributed onthologies, workflows and software. + # References -- cgit v1.2.3 From dcd7f12d10e7f6399a0d515606148f85358d9dc7 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Fri, 10 Apr 2020 17:52:45 -0500 Subject: Add author and affiliation --- paper/paper.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 813c91b..bc7e835 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -27,7 +27,7 @@ authors: orcid: 0000 affiliation: 7 - name: Michael Heuer - orcid: 0000 + orcid: 0000-0002-9052-6000 affiliation: 8 - name: Adam Novak orcid: 0000 @@ -46,6 +46,8 @@ affiliations: index: 1 - name: Curii, Boston, USA index: 2 + - name: RISE Lab, University of California Berkeley, Berkeley, CA, USA. + index: 8 date: 11 April 2020 event: COVID2020 group: Public Sequence Uploader -- cgit v1.2.3 From 89f996912240cfb2f5adcf95f401dd59319dac3b Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 10 Apr 2020 16:28:08 -0700 Subject: Add affiliation info --- paper/paper.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 813c91b..b789f60 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -29,8 +29,8 @@ authors: - name: Michael Heuer orcid: 0000 affiliation: 8 - - name: Adam Novak - orcid: 0000 + - name: Adam M Novak + orcid: 0000-0001-5828-047X affiliation: 9 - name: Alex Kanitz orcid: 0000 @@ -46,6 +46,8 @@ affiliations: index: 1 - name: Curii, Boston, USA index: 2 + - name: UC Santa Cruz Genomics Institute, University of California, Santa Cruz, CA 95064, USA. + index: 9 date: 11 April 2020 event: COVID2020 group: Public Sequence Uploader -- cgit v1.2.3 From fcd45e42942750950076553ac995d738c863aa7a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 10 Apr 2020 16:30:21 -0700 Subject: Grab Erik --- paper/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index b789f60..e7678dc 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -31,7 +31,7 @@ authors: affiliation: 8 - name: Adam M Novak orcid: 0000-0001-5828-047X - affiliation: 9 + affiliation: 5 - name: Alex Kanitz orcid: 0000 affiliation: 10 @@ -47,7 +47,7 @@ affiliations: - name: Curii, Boston, USA index: 2 - name: UC Santa Cruz Genomics Institute, University of California, Santa Cruz, CA 95064, USA. - index: 9 + index: 5 date: 11 April 2020 event: COVID2020 group: Public Sequence Uploader -- cgit v1.2.3 From d2698e74c6f5f2977568005232f1b76142ee217f Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Sat, 11 Apr 2020 02:32:02 +0300 Subject: Add custom css styling - Update font - Make layout look more aesthetic --- bh20simplewebuploader/templates/form.html | 259 +++++++++++++++++++++--------- 1 file changed, 187 insertions(+), 72 deletions(-) diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index 2934a7c..6720ec7 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -1,95 +1,210 @@ + + + Simple Web Uploader for Public SARS-CoV-2 Sequence Resource

Simple Web Uploader for Public SARS-CoV-2 Sequence Resource


-

- This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. -

-
-
- -
- -
- - -
- - -
- - -
- -
-
- +
+ +

+ This tool can be used to upload sequenced genomes of SARS-CoV-2 samples to the Public SARS-CoV-2 Sequence Resource. Your uploaded sequence will automatically be processed and incorporated into the public pangenome. +

+
+ +
+ +
+
+ + -
- -
-
- {% for record in fields %} + +
+
+ +
+ +
+
+
+ +
+
+ {{ record }} + {% for record in fields %} + {% if 'heading' in record %} -

{{ record['heading'] }}

+ {% if loop.index > 1 %} +
+ {% endif %} +
+

{{ record['heading'] }}

{% else %} - -
- -
+ + {% endif %} + {% if loop.index == loop.length %} +
+ {% endif %} {% endfor %}
-
- - - +
+ + + + +
- Source · Made for COVID-19-BH20 + Source · Made for COVID-19-BH20 + -- cgit v1.2.3 From a0feaff212f5e4c030dc231a23f8df704ac3aa53 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 10 Apr 2020 16:37:48 -0700 Subject: Fix README typos --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8a5a6dd..2a1657d 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ sudo apt install -y virtualenv git libcurl4-openssl-dev build-essential python3- pip3 install --user git+https://github.com/arvados/bh20-seq-resource.git@master ``` -3. **Make sure the tool is on your `PATH`.** THe `pip3` command will install the uploader in `.local/bin` inside your home directory. Your shell may not know to look for commands there by default. To fix this for the terminal you currently have open, run: +3. **Make sure the tool is on your `PATH`.** The `pip3` command will install the uploader in `.local/bin` inside your home directory. Your shell may not know to look for commands there by default. To fix this for the terminal you currently have open, run: ```sh export PATH=$PATH:$HOME/.local/bin @@ -174,7 +174,7 @@ pip3 install gunicorn gunicorn bh20simplewebuploader.main:app ``` -This runs on [http://127.0.0.1:8000/](http://127.0.0.1:8000/) by default, but can be adjusted with various [gunicorn options](http://docs.gunicorn.org/en/latest/run.html#commonly-used-arguments) +This runs on [http://127.0.0.1:8000/](http://127.0.0.1:8000/) by default, but can be adjusted with various [gunicorn options](http://docs.gunicorn.org/en/latest/run.html#commonly-used-arguments). -- cgit v1.2.3 From 04912c58cdab27962429be56971afde189d702c4 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 10 Apr 2020 16:43:05 -0700 Subject: Clarify supported formats and make web UI take FASTQ --- README.md | 4 ++-- bh20simplewebuploader/main.py | 6 ++++-- bh20simplewebuploader/templates/form.html | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2a1657d..db4fe52 100644 --- a/README.md +++ b/README.md @@ -126,10 +126,10 @@ For running/developing the uploader with GNU Guix see [INSTALL.md](./doc/INSTALL # Usage -Run the uploader with a FASTA file and accompanying metadata file in [JSON-LD format](https://json-ld.org/): +Run the uploader with a FASTA or FASTQ file and accompanying metadata file in JSON or YAML: ```sh -bh20-seq-uploader example/sequence.fasta example/metadata.json +bh20-seq-uploader example/sequence.fasta example/metadata.yaml ``` ## Workflow for Generating a Pangenome diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index bfc7762..383ef84 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -184,15 +184,17 @@ def receive_files(): # We're going to work in one directory per request dest_dir = tempfile.mkdtemp() + # The uploader will happily accept a FASTQ with this name fasta_dest = os.path.join(dest_dir, 'fasta.fa') metadata_dest = os.path.join(dest_dir, 'metadata.json') try: if 'fasta' not in request.files: return (render_template('error.html', - error_message="You did not include a FASTA file."), 403) + error_message="You did not include a FASTA or FASTQ file."), 403) try: with open(fasta_dest, 'wb') as out_stream: - copy_with_limit(request.files.get('fasta').stream, out_stream) + # Use a plausible file size limit for a little FASTQ + copy_with_limit(request.files.get('fasta').stream, out_stream, limit=50*1024*1024) except FileTooBigError as e: # Delegate to the 413 error handler return handle_large_file(e) diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index 2934a7c..e722ab3 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -13,9 +13,9 @@


- +
- +
-- cgit v1.2.3 From 54efd8a9e08032929f3f779c438f51472b5aabda Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Sat, 11 Apr 2020 02:56:20 +0300 Subject: Make CSS responsive for smaller screens --- bh20simplewebuploader/templates/form.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index 6720ec7..8ce6f6f 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -99,6 +99,13 @@ width: 17em; justify-self: center; } + + @media only screen and (max-device-width: 480px) { + .grid-container { + display: flex; + flex-direction: column; + } + } -- cgit v1.2.3 From a764f0496f47f5c93f2a6915d00c8a1e89df0712 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 10 Apr 2020 17:13:03 -0700 Subject: Fix source link again --- bh20simplewebuploader/templates/form.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index e870b66..986581f 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -174,7 +174,7 @@

- Source · Made for COVID-19-BH20 + Source · Made for COVID-19-BH20