diff options
author | Pjotr Prins | 2020-05-30 18:13:48 -0500 |
---|---|---|
committer | Pjotr Prins | 2020-05-30 18:13:48 -0500 |
commit | 264be797c55aaff6eb9639d5a15d9081e2256253 (patch) | |
tree | 1ee90ad507d3faec99b50a74536dd9f6d1f094e4 /doc/blog/using-covid-19-pubseq-part2.html | |
parent | ac7a79bb2aa6480a2ee3e881732ae314e8ccbf7d (diff) | |
download | bh20-seq-resource-264be797c55aaff6eb9639d5a15d9081e2256253.tar.gz bh20-seq-resource-264be797c55aaff6eb9639d5a15d9081e2256253.tar.lz bh20-seq-resource-264be797c55aaff6eb9639d5a15d9081e2256253.zip |
BLOG
Diffstat (limited to 'doc/blog/using-covid-19-pubseq-part2.html')
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part2.html | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/doc/blog/using-covid-19-pubseq-part2.html b/doc/blog/using-covid-19-pubseq-part2.html new file mode 100644 index 0000000..c047441 --- /dev/null +++ b/doc/blog/using-covid-19-pubseq-part2.html @@ -0,0 +1,394 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> +<head> +<!-- 2020-05-30 Sat 11:50 --> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> +<meta name="viewport" content="width=device-width, initial-scale=1" /> +<title>COVID-19 PubSeq (part 2)</title> +<meta name="generator" content="Org mode" /> +<meta name="author" content="Pjotr Prins" /> +<style type="text/css"> + <!--/*--><![CDATA[/*><!--*/ + .title { text-align: center; + margin-bottom: .2em; } + .subtitle { text-align: center; + font-size: medium; + font-weight: bold; + margin-top:0; } + .todo { font-family: monospace; color: red; } + .done { font-family: monospace; color: green; } + .priority { font-family: monospace; color: orange; } + .tag { background-color: #eee; font-family: monospace; + padding: 2px; font-size: 80%; font-weight: normal; } + .timestamp { color: #bebebe; } + .timestamp-kwd { color: #5f9ea0; } + .org-right { margin-left: auto; margin-right: 0px; text-align: right; } + .org-left { margin-left: 0px; margin-right: auto; text-align: left; } + .org-center { margin-left: auto; margin-right: auto; text-align: center; } + .underline { text-decoration: underline; } + #postamble p, #preamble p { font-size: 90%; margin: .2em; } + p.verse { margin-left: 3%; } + pre { + border: 1px solid #ccc; + box-shadow: 3px 3px 3px #eee; + padding: 8pt; + font-family: monospace; + overflow: auto; + margin: 1.2em; + } + pre.src { + position: relative; + overflow: visible; + padding-top: 1.2em; + } + pre.src:before { + display: none; + position: absolute; + background-color: white; + top: -10px; + right: 10px; + padding: 3px; + border: 1px solid black; + } + pre.src:hover:before { display: inline;} + /* Languages per Org manual */ + pre.src-asymptote:before { content: 'Asymptote'; } + pre.src-awk:before { content: 'Awk'; } + pre.src-C:before { content: 'C'; } + /* pre.src-C++ doesn't work in CSS */ + pre.src-clojure:before { content: 'Clojure'; } + pre.src-css:before { content: 'CSS'; } + pre.src-D:before { content: 'D'; } + pre.src-ditaa:before { content: 'ditaa'; } + pre.src-dot:before { content: 'Graphviz'; } + pre.src-calc:before { content: 'Emacs Calc'; } + pre.src-emacs-lisp:before { content: 'Emacs Lisp'; } + pre.src-fortran:before { content: 'Fortran'; } + pre.src-gnuplot:before { content: 'gnuplot'; } + pre.src-haskell:before { content: 'Haskell'; } + pre.src-hledger:before { content: 'hledger'; } + pre.src-java:before { content: 'Java'; } + pre.src-js:before { content: 'Javascript'; } + pre.src-latex:before { content: 'LaTeX'; } + pre.src-ledger:before { content: 'Ledger'; } + pre.src-lisp:before { content: 'Lisp'; } + pre.src-lilypond:before { content: 'Lilypond'; } + pre.src-lua:before { content: 'Lua'; } + pre.src-matlab:before { content: 'MATLAB'; } + pre.src-mscgen:before { content: 'Mscgen'; } + pre.src-ocaml:before { content: 'Objective Caml'; } + pre.src-octave:before { content: 'Octave'; } + pre.src-org:before { content: 'Org mode'; } + pre.src-oz:before { content: 'OZ'; } + pre.src-plantuml:before { content: 'Plantuml'; } + pre.src-processing:before { content: 'Processing.js'; } + pre.src-python:before { content: 'Python'; } + pre.src-R:before { content: 'R'; } + pre.src-ruby:before { content: 'Ruby'; } + pre.src-sass:before { content: 'Sass'; } + pre.src-scheme:before { content: 'Scheme'; } + pre.src-screen:before { content: 'Gnu Screen'; } + pre.src-sed:before { content: 'Sed'; } + pre.src-sh:before { content: 'shell'; } + pre.src-sql:before { content: 'SQL'; } + pre.src-sqlite:before { content: 'SQLite'; } + /* additional languages in org.el's org-babel-load-languages alist */ + pre.src-forth:before { content: 'Forth'; } + pre.src-io:before { content: 'IO'; } + pre.src-J:before { content: 'J'; } + pre.src-makefile:before { content: 'Makefile'; } + pre.src-maxima:before { content: 'Maxima'; } + pre.src-perl:before { content: 'Perl'; } + pre.src-picolisp:before { content: 'Pico Lisp'; } + pre.src-scala:before { content: 'Scala'; } + pre.src-shell:before { content: 'Shell Script'; } + pre.src-ebnf2ps:before { content: 'ebfn2ps'; } + /* additional language identifiers per "defun org-babel-execute" + in ob-*.el */ + pre.src-cpp:before { content: 'C++'; } + pre.src-abc:before { content: 'ABC'; } + pre.src-coq:before { content: 'Coq'; } + pre.src-groovy:before { content: 'Groovy'; } + /* additional language identifiers from org-babel-shell-names in + ob-shell.el: ob-shell is the only babel language using a lambda to put + the execution function name together. */ + pre.src-bash:before { content: 'bash'; } + pre.src-csh:before { content: 'csh'; } + pre.src-ash:before { content: 'ash'; } + pre.src-dash:before { content: 'dash'; } + pre.src-ksh:before { content: 'ksh'; } + pre.src-mksh:before { content: 'mksh'; } + pre.src-posh:before { content: 'posh'; } + /* Additional Emacs modes also supported by the LaTeX listings package */ + pre.src-ada:before { content: 'Ada'; } + pre.src-asm:before { content: 'Assembler'; } + pre.src-caml:before { content: 'Caml'; } + pre.src-delphi:before { content: 'Delphi'; } + pre.src-html:before { content: 'HTML'; } + pre.src-idl:before { content: 'IDL'; } + pre.src-mercury:before { content: 'Mercury'; } + pre.src-metapost:before { content: 'MetaPost'; } + pre.src-modula-2:before { content: 'Modula-2'; } + pre.src-pascal:before { content: 'Pascal'; } + pre.src-ps:before { content: 'PostScript'; } + pre.src-prolog:before { content: 'Prolog'; } + pre.src-simula:before { content: 'Simula'; } + pre.src-tcl:before { content: 'tcl'; } + pre.src-tex:before { content: 'TeX'; } + pre.src-plain-tex:before { content: 'Plain TeX'; } + pre.src-verilog:before { content: 'Verilog'; } + pre.src-vhdl:before { content: 'VHDL'; } + pre.src-xml:before { content: 'XML'; } + pre.src-nxml:before { content: 'XML'; } + /* add a generic configuration mode; LaTeX export needs an additional + (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */ + pre.src-conf:before { content: 'Configuration File'; } + + table { border-collapse:collapse; } + caption.t-above { caption-side: top; } + caption.t-bottom { caption-side: bottom; } + td, th { vertical-align:top; } + th.org-right { text-align: center; } + th.org-left { text-align: center; } + th.org-center { text-align: center; } + td.org-right { text-align: right; } + td.org-left { text-align: left; } + td.org-center { text-align: center; } + dt { font-weight: bold; } + .footpara { display: inline; } + .footdef { margin-bottom: 1em; } + .figure { padding: 1em; } + .figure p { text-align: center; } + .equation-container { + display: table; + text-align: center; + width: 100%; + } + .equation { + vertical-align: middle; + } + .equation-label { + display: table-cell; + text-align: right; + vertical-align: middle; + } + .inlinetask { + padding: 10px; + border: 2px solid gray; + margin: 10px; + background: #ffffcc; + } + #org-div-home-and-up + { text-align: right; font-size: 70%; white-space: nowrap; } + textarea { overflow-x: auto; } + .linenr { font-size: smaller } + .code-highlighted { background-color: #ffff00; } + .org-info-js_info-navigation { border-style: none; } + #org-info-js_console-label + { font-size: 10px; font-weight: bold; white-space: nowrap; } + .org-info-js_search-highlight + { background-color: #ffff00; color: #000000; font-weight: bold; } + .org-svg { width: 90%; } + /*]]>*/--> +</style> +<link rel="Blog stylesheet" type="text/css" href="blog.css" /> +<script type="text/javascript"> +/* +@licstart The following is the entire license notice for the +JavaScript code in this tag. + +Copyright (C) 2012-2020 Free Software Foundation, Inc. + +The JavaScript code in this tag is free software: you can +redistribute it and/or modify it under the terms of the GNU +General Public License (GNU GPL) as published by the Free Software +Foundation, either version 3 of the License, or (at your option) +any later version. The code is distributed WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU GPL for more details. + +As additional permission under GNU GPL version 3 section 7, you +may distribute non-source (e.g., minimized or compacted) forms of +that code without the copy of the GNU GPL normally required by +section 4, provided you include this license notice and a URL +through which recipients can access the Corresponding Source. + + +@licend The above is the entire license notice +for the JavaScript code in this tag. +*/ +<!--/*--><![CDATA[/*><!--*/ + function CodeHighlightOn(elem, id) + { + var target = document.getElementById(id); + if(null != target) { + elem.cacheClassElem = elem.className; + elem.cacheClassTarget = target.className; + target.className = "code-highlighted"; + elem.className = "code-highlighted"; + } + } + function CodeHighlightOff(elem, id) + { + var target = document.getElementById(id); + if(elem.cacheClassElem) + elem.className = elem.cacheClassElem; + if(elem.cacheClassTarget) + target.className = elem.cacheClassTarget; + } +/*]]>*///--> +</script> +</head> +<body> +<div id="org-div-home-and-up"> + <a accesskey="h" href=""> UP </a> + | + <a accesskey="H" href="http://covid19.genenetwork.org"> HOME </a> +</div><div id="content"> +<h1 class="title">COVID-19 PubSeq (part 2)</h1> +<div id="table-of-contents"> +<h2>Table of Contents</h2> +<div id="text-table-of-contents"> +<ul> +<li><a href="#org7942167">1. Finding output of workflows</a></li> +<li><a href="#org0022bbe">2. Introduction</a></li> +<li><a href="#org3929710">3. The Arvados file interface</a></li> +<li><a href="#orgc4dba6e">4. Using the Arvados API</a></li> +</ul> +</div> +</div> +<p> +As part of the COVID-19 Biohackathon 2020 we formed a working group to +create a COVID-19 Public Sequence Resource (COVID-19 PubSeq) for +Corona virus sequences. The general idea is to create a repository +that has a low barrier to entry for uploading sequence data using best +practices. I.e., data published with a creative commons 4.0 (CC-4.0) +license with metadata using state-of-the art standards and, perhaps +most importantly, providing standardised workflows that get triggered +on upload, so that results are immediately available in standardised +data formats. +</p> + +<div id="outline-container-org7942167" class="outline-2"> +<h2 id="org7942167"><span class="section-number-2">1</span> Finding output of workflows</h2> +<div class="outline-text-2" id="text-1"> +<p> +As part of the COVID-19 Biohackathon 2020 we formed a working group to +create a COVID-19 Public Sequence Resource (COVID-19 PubSeq) for +Corona virus sequences. The general idea is to create a repository +that has a low barrier to entry for uploading sequence data using best +practices. I.e., data published with a creative commons 4.0 (CC-4.0) +license with metadata using state-of-the art standards and, perhaps +most importantly, providing standardised workflows that get triggered +on upload, so that results are immediately available in standardised +data formats. +</p> +</div> +</div> + +<div id="outline-container-org0022bbe" class="outline-2"> +<h2 id="org0022bbe"><span class="section-number-2">2</span> Introduction</h2> +<div class="outline-text-2" id="text-2"> +<p> +We are using Arvados to run common workflow language (CWL) pipelines. +The most recent output is on display on a <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">web page</a> (with time stamp) +and a full list is generated <a href="https://collections.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/">here</a>. It is nice to start up, but for +most users we need a dedicated and themed results page. People don't +want to wade through thousands of output files! +</p> +</div> +</div> + +<div id="outline-container-org3929710" class="outline-2"> +<h2 id="org3929710"><span class="section-number-2">3</span> The Arvados file interface</h2> +<div class="outline-text-2" id="text-3"> +<p> +Arvados has the web server, but it also has a REST API and associated +command line tools. We are already using the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/main.py#L27">API</a> to upload data. If +you follow the pip or <a href="../INSTALL.md">../INSTALL.md</a> GNU Guix instructions for +installing Arvados API you'll find the following command line tools +(also documented <a href="https://doc.arvados.org/v2.0/sdk/cli/subcommands.html">here</a>): +</p> + +<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides"> + + +<colgroup> +<col class="org-left" /> + +<col class="org-left" /> +</colgroup> +<thead> +<tr> +<th scope="col" class="org-left">Command</th> +<th scope="col" class="org-left">Description</th> +</tr> +</thead> +<tbody> +<tr> +<td class="org-left">arv-ls</td> +<td class="org-left">list files in Arvados</td> +</tr> + +<tr> +<td class="org-left">arv-put</td> +<td class="org-left">upload a file to Arvados</td> +</tr> + +<tr> +<td class="org-left">arv-get</td> +<td class="org-left">get a textual representation of Arvados objects from the command line. The output can be limited to a subset of the object’s fields. This command can be used with only the knowledge of an object’s UUID</td> +</tr> +</tbody> +</table> + +<p> +Now, this is a public instance so we can use the tokens from +the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/main.py#L16">uploader</a>. +</p> + +<div class="SOURCE"> +<p> +export ARVADOS<sub>API</sub><sub>HOST</sub>='lugli.arvadosapi.com' +export ARVADOS<sub>API</sub><sub>TOKEN</sub>='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' +arv-ls lugli-4zz18-z513nlpqm03hpca +</p> + +</div> + +<p> +will list all files (the UUID we got from the Arvados results page). To +get the UUID of the files +</p> + +<div class="SOURCE"> +<p> +curl <a href="https://lugli.arvadosapi.com/arvados/v1/config">https://lugli.arvadosapi.com/arvados/v1/config</a> | jq .Users.AnonymousUserToken +env ARVADOS<sub>API</sub><sub>TOKEN</sub>=5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh \ + arv-get lugli-4zz18-z513nlpqm03hpca +</p> + +</div> + +<p> +and fetch one listed JSON file <code>chunk001_bin4000.schematic.json</code> with +its listed UUID: +</p> + +<pre class="example"> +arv-get 2be6af7b4741f2a5c5f8ff2bc6152d73+1955623+Ab9ad65d7fe958a053b3a57d545839de18290843a@5ed7f3c5 +</pre> +</div> +</div> + +<div id="outline-container-orgc4dba6e" class="outline-2"> +<h2 id="orgc4dba6e"><span class="section-number-2">4</span> Using the Arvados API</h2> +</div> +</div> +<div id="postamble" class="status"> +<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:50</small>. +</div> +</body> +</html> |