aboutsummaryrefslogtreecommitdiff
path: root/doc/blog
diff options
context:
space:
mode:
Diffstat (limited to 'doc/blog')
-rw-r--r--doc/blog/using-covid-19-pubseq-part2.html394
-rw-r--r--doc/blog/using-covid-19-pubseq-part3.html296
-rw-r--r--doc/blog/using-covid-19-pubseq-part3.org116
-rw-r--r--doc/blog/using-covid-19-pubseq-part4.html266
-rw-r--r--doc/blog/using-covid-19-pubseq-part4.org3
-rw-r--r--doc/blog/using-covid-19-pubseq-part5.html277
-rw-r--r--doc/blog/using-covid-19-pubseq-part5.org17
7 files changed, 1260 insertions, 109 deletions
diff --git a/doc/blog/using-covid-19-pubseq-part2.html b/doc/blog/using-covid-19-pubseq-part2.html
new file mode 100644
index 0000000..c047441
--- /dev/null
+++ b/doc/blog/using-covid-19-pubseq-part2.html
@@ -0,0 +1,394 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<head>
+<!-- 2020-05-30 Sat 11:50 -->
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>COVID-19 PubSeq (part 2)</title>
+<meta name="generator" content="Org mode" />
+<meta name="author" content="Pjotr Prins" />
+<style type="text/css">
+ <!--/*--><![CDATA[/*><!--*/
+ .title { text-align: center;
+ margin-bottom: .2em; }
+ .subtitle { text-align: center;
+ font-size: medium;
+ font-weight: bold;
+ margin-top:0; }
+ .todo { font-family: monospace; color: red; }
+ .done { font-family: monospace; color: green; }
+ .priority { font-family: monospace; color: orange; }
+ .tag { background-color: #eee; font-family: monospace;
+ padding: 2px; font-size: 80%; font-weight: normal; }
+ .timestamp { color: #bebebe; }
+ .timestamp-kwd { color: #5f9ea0; }
+ .org-right { margin-left: auto; margin-right: 0px; text-align: right; }
+ .org-left { margin-left: 0px; margin-right: auto; text-align: left; }
+ .org-center { margin-left: auto; margin-right: auto; text-align: center; }
+ .underline { text-decoration: underline; }
+ #postamble p, #preamble p { font-size: 90%; margin: .2em; }
+ p.verse { margin-left: 3%; }
+ pre {
+ border: 1px solid #ccc;
+ box-shadow: 3px 3px 3px #eee;
+ padding: 8pt;
+ font-family: monospace;
+ overflow: auto;
+ margin: 1.2em;
+ }
+ pre.src {
+ position: relative;
+ overflow: visible;
+ padding-top: 1.2em;
+ }
+ pre.src:before {
+ display: none;
+ position: absolute;
+ background-color: white;
+ top: -10px;
+ right: 10px;
+ padding: 3px;
+ border: 1px solid black;
+ }
+ pre.src:hover:before { display: inline;}
+ /* Languages per Org manual */
+ pre.src-asymptote:before { content: 'Asymptote'; }
+ pre.src-awk:before { content: 'Awk'; }
+ pre.src-C:before { content: 'C'; }
+ /* pre.src-C++ doesn't work in CSS */
+ pre.src-clojure:before { content: 'Clojure'; }
+ pre.src-css:before { content: 'CSS'; }
+ pre.src-D:before { content: 'D'; }
+ pre.src-ditaa:before { content: 'ditaa'; }
+ pre.src-dot:before { content: 'Graphviz'; }
+ pre.src-calc:before { content: 'Emacs Calc'; }
+ pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
+ pre.src-fortran:before { content: 'Fortran'; }
+ pre.src-gnuplot:before { content: 'gnuplot'; }
+ pre.src-haskell:before { content: 'Haskell'; }
+ pre.src-hledger:before { content: 'hledger'; }
+ pre.src-java:before { content: 'Java'; }
+ pre.src-js:before { content: 'Javascript'; }
+ pre.src-latex:before { content: 'LaTeX'; }
+ pre.src-ledger:before { content: 'Ledger'; }
+ pre.src-lisp:before { content: 'Lisp'; }
+ pre.src-lilypond:before { content: 'Lilypond'; }
+ pre.src-lua:before { content: 'Lua'; }
+ pre.src-matlab:before { content: 'MATLAB'; }
+ pre.src-mscgen:before { content: 'Mscgen'; }
+ pre.src-ocaml:before { content: 'Objective Caml'; }
+ pre.src-octave:before { content: 'Octave'; }
+ pre.src-org:before { content: 'Org mode'; }
+ pre.src-oz:before { content: 'OZ'; }
+ pre.src-plantuml:before { content: 'Plantuml'; }
+ pre.src-processing:before { content: 'Processing.js'; }
+ pre.src-python:before { content: 'Python'; }
+ pre.src-R:before { content: 'R'; }
+ pre.src-ruby:before { content: 'Ruby'; }
+ pre.src-sass:before { content: 'Sass'; }
+ pre.src-scheme:before { content: 'Scheme'; }
+ pre.src-screen:before { content: 'Gnu Screen'; }
+ pre.src-sed:before { content: 'Sed'; }
+ pre.src-sh:before { content: 'shell'; }
+ pre.src-sql:before { content: 'SQL'; }
+ pre.src-sqlite:before { content: 'SQLite'; }
+ /* additional languages in org.el's org-babel-load-languages alist */
+ pre.src-forth:before { content: 'Forth'; }
+ pre.src-io:before { content: 'IO'; }
+ pre.src-J:before { content: 'J'; }
+ pre.src-makefile:before { content: 'Makefile'; }
+ pre.src-maxima:before { content: 'Maxima'; }
+ pre.src-perl:before { content: 'Perl'; }
+ pre.src-picolisp:before { content: 'Pico Lisp'; }
+ pre.src-scala:before { content: 'Scala'; }
+ pre.src-shell:before { content: 'Shell Script'; }
+ pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
+ /* additional language identifiers per "defun org-babel-execute"
+ in ob-*.el */
+ pre.src-cpp:before { content: 'C++'; }
+ pre.src-abc:before { content: 'ABC'; }
+ pre.src-coq:before { content: 'Coq'; }
+ pre.src-groovy:before { content: 'Groovy'; }
+ /* additional language identifiers from org-babel-shell-names in
+ ob-shell.el: ob-shell is the only babel language using a lambda to put
+ the execution function name together. */
+ pre.src-bash:before { content: 'bash'; }
+ pre.src-csh:before { content: 'csh'; }
+ pre.src-ash:before { content: 'ash'; }
+ pre.src-dash:before { content: 'dash'; }
+ pre.src-ksh:before { content: 'ksh'; }
+ pre.src-mksh:before { content: 'mksh'; }
+ pre.src-posh:before { content: 'posh'; }
+ /* Additional Emacs modes also supported by the LaTeX listings package */
+ pre.src-ada:before { content: 'Ada'; }
+ pre.src-asm:before { content: 'Assembler'; }
+ pre.src-caml:before { content: 'Caml'; }
+ pre.src-delphi:before { content: 'Delphi'; }
+ pre.src-html:before { content: 'HTML'; }
+ pre.src-idl:before { content: 'IDL'; }
+ pre.src-mercury:before { content: 'Mercury'; }
+ pre.src-metapost:before { content: 'MetaPost'; }
+ pre.src-modula-2:before { content: 'Modula-2'; }
+ pre.src-pascal:before { content: 'Pascal'; }
+ pre.src-ps:before { content: 'PostScript'; }
+ pre.src-prolog:before { content: 'Prolog'; }
+ pre.src-simula:before { content: 'Simula'; }
+ pre.src-tcl:before { content: 'tcl'; }
+ pre.src-tex:before { content: 'TeX'; }
+ pre.src-plain-tex:before { content: 'Plain TeX'; }
+ pre.src-verilog:before { content: 'Verilog'; }
+ pre.src-vhdl:before { content: 'VHDL'; }
+ pre.src-xml:before { content: 'XML'; }
+ pre.src-nxml:before { content: 'XML'; }
+ /* add a generic configuration mode; LaTeX export needs an additional
+ (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
+ pre.src-conf:before { content: 'Configuration File'; }
+
+ table { border-collapse:collapse; }
+ caption.t-above { caption-side: top; }
+ caption.t-bottom { caption-side: bottom; }
+ td, th { vertical-align:top; }
+ th.org-right { text-align: center; }
+ th.org-left { text-align: center; }
+ th.org-center { text-align: center; }
+ td.org-right { text-align: right; }
+ td.org-left { text-align: left; }
+ td.org-center { text-align: center; }
+ dt { font-weight: bold; }
+ .footpara { display: inline; }
+ .footdef { margin-bottom: 1em; }
+ .figure { padding: 1em; }
+ .figure p { text-align: center; }
+ .equation-container {
+ display: table;
+ text-align: center;
+ width: 100%;
+ }
+ .equation {
+ vertical-align: middle;
+ }
+ .equation-label {
+ display: table-cell;
+ text-align: right;
+ vertical-align: middle;
+ }
+ .inlinetask {
+ padding: 10px;
+ border: 2px solid gray;
+ margin: 10px;
+ background: #ffffcc;
+ }
+ #org-div-home-and-up
+ { text-align: right; font-size: 70%; white-space: nowrap; }
+ textarea { overflow-x: auto; }
+ .linenr { font-size: smaller }
+ .code-highlighted { background-color: #ffff00; }
+ .org-info-js_info-navigation { border-style: none; }
+ #org-info-js_console-label
+ { font-size: 10px; font-weight: bold; white-space: nowrap; }
+ .org-info-js_search-highlight
+ { background-color: #ffff00; color: #000000; font-weight: bold; }
+ .org-svg { width: 90%; }
+ /*]]>*/-->
+</style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
+<script type="text/javascript">
+/*
+@licstart The following is the entire license notice for the
+JavaScript code in this tag.
+
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
+
+The JavaScript code in this tag is free software: you can
+redistribute it and/or modify it under the terms of the GNU
+General Public License (GNU GPL) as published by the Free Software
+Foundation, either version 3 of the License, or (at your option)
+any later version. The code is distributed WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
+
+As additional permission under GNU GPL version 3 section 7, you
+may distribute non-source (e.g., minimized or compacted) forms of
+that code without the copy of the GNU GPL normally required by
+section 4, provided you include this license notice and a URL
+through which recipients can access the Corresponding Source.
+
+
+@licend The above is the entire license notice
+for the JavaScript code in this tag.
+*/
+<!--/*--><![CDATA[/*><!--*/
+ function CodeHighlightOn(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(null != target) {
+ elem.cacheClassElem = elem.className;
+ elem.cacheClassTarget = target.className;
+ target.className = "code-highlighted";
+ elem.className = "code-highlighted";
+ }
+ }
+ function CodeHighlightOff(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(elem.cacheClassElem)
+ elem.className = elem.cacheClassElem;
+ if(elem.cacheClassTarget)
+ target.className = elem.cacheClassTarget;
+ }
+/*]]>*///-->
+</script>
+</head>
+<body>
+<div id="org-div-home-and-up">
+ <a accesskey="h" href=""> UP </a>
+ |
+ <a accesskey="H" href="http://covid19.genenetwork.org"> HOME </a>
+</div><div id="content">
+<h1 class="title">COVID-19 PubSeq (part 2)</h1>
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#org7942167">1. Finding output of workflows</a></li>
+<li><a href="#org0022bbe">2. Introduction</a></li>
+<li><a href="#org3929710">3. The Arvados file interface</a></li>
+<li><a href="#orgc4dba6e">4. Using the Arvados API</a></li>
+</ul>
+</div>
+</div>
+<p>
+As part of the COVID-19 Biohackathon 2020 we formed a working group to
+create a COVID-19 Public Sequence Resource (COVID-19 PubSeq) for
+Corona virus sequences. The general idea is to create a repository
+that has a low barrier to entry for uploading sequence data using best
+practices. I.e., data published with a creative commons 4.0 (CC-4.0)
+license with metadata using state-of-the art standards and, perhaps
+most importantly, providing standardised workflows that get triggered
+on upload, so that results are immediately available in standardised
+data formats.
+</p>
+
+<div id="outline-container-org7942167" class="outline-2">
+<h2 id="org7942167"><span class="section-number-2">1</span> Finding output of workflows</h2>
+<div class="outline-text-2" id="text-1">
+<p>
+As part of the COVID-19 Biohackathon 2020 we formed a working group to
+create a COVID-19 Public Sequence Resource (COVID-19 PubSeq) for
+Corona virus sequences. The general idea is to create a repository
+that has a low barrier to entry for uploading sequence data using best
+practices. I.e., data published with a creative commons 4.0 (CC-4.0)
+license with metadata using state-of-the art standards and, perhaps
+most importantly, providing standardised workflows that get triggered
+on upload, so that results are immediately available in standardised
+data formats.
+</p>
+</div>
+</div>
+
+<div id="outline-container-org0022bbe" class="outline-2">
+<h2 id="org0022bbe"><span class="section-number-2">2</span> Introduction</h2>
+<div class="outline-text-2" id="text-2">
+<p>
+We are using Arvados to run common workflow language (CWL) pipelines.
+The most recent output is on display on a <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">web page</a> (with time stamp)
+and a full list is generated <a href="https://collections.lugli.arvadosapi.com/c=lugli-4zz18-z513nlpqm03hpca/">here</a>. It is nice to start up, but for
+most users we need a dedicated and themed results page. People don't
+want to wade through thousands of output files!
+</p>
+</div>
+</div>
+
+<div id="outline-container-org3929710" class="outline-2">
+<h2 id="org3929710"><span class="section-number-2">3</span> The Arvados file interface</h2>
+<div class="outline-text-2" id="text-3">
+<p>
+Arvados has the web server, but it also has a REST API and associated
+command line tools. We are already using the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/main.py#L27">API</a> to upload data. If
+you follow the pip or <a href="../INSTALL.md">../INSTALL.md</a> GNU Guix instructions for
+installing Arvados API you'll find the following command line tools
+(also documented <a href="https://doc.arvados.org/v2.0/sdk/cli/subcommands.html">here</a>):
+</p>
+
+<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
+
+
+<colgroup>
+<col class="org-left" />
+
+<col class="org-left" />
+</colgroup>
+<thead>
+<tr>
+<th scope="col" class="org-left">Command</th>
+<th scope="col" class="org-left">Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="org-left">arv-ls</td>
+<td class="org-left">list files in Arvados</td>
+</tr>
+
+<tr>
+<td class="org-left">arv-put</td>
+<td class="org-left">upload a file to Arvados</td>
+</tr>
+
+<tr>
+<td class="org-left">arv-get</td>
+<td class="org-left">get a textual representation of Arvados objects from the command line. The output can be limited to a subset of the object’s fields. This command can be used with only the knowledge of an object’s UUID</td>
+</tr>
+</tbody>
+</table>
+
+<p>
+Now, this is a public instance so we can use the tokens from
+the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/main.py#L16">uploader</a>.
+</p>
+
+<div class="SOURCE">
+<p>
+export ARVADOS<sub>API</sub><sub>HOST</sub>='lugli.arvadosapi.com'
+export ARVADOS<sub>API</sub><sub>TOKEN</sub>='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
+arv-ls lugli-4zz18-z513nlpqm03hpca
+</p>
+
+</div>
+
+<p>
+will list all files (the UUID we got from the Arvados results page). To
+get the UUID of the files
+</p>
+
+<div class="SOURCE">
+<p>
+curl <a href="https://lugli.arvadosapi.com/arvados/v1/config">https://lugli.arvadosapi.com/arvados/v1/config</a> | jq .Users.AnonymousUserToken
+env ARVADOS<sub>API</sub><sub>TOKEN</sub>=5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh \
+ arv-get lugli-4zz18-z513nlpqm03hpca
+</p>
+
+</div>
+
+<p>
+and fetch one listed JSON file <code>chunk001_bin4000.schematic.json</code> with
+its listed UUID:
+</p>
+
+<pre class="example">
+arv-get 2be6af7b4741f2a5c5f8ff2bc6152d73+1955623+Ab9ad65d7fe958a053b3a57d545839de18290843a@5ed7f3c5
+</pre>
+</div>
+</div>
+
+<div id="outline-container-orgc4dba6e" class="outline-2">
+<h2 id="orgc4dba6e"><span class="section-number-2">4</span> Using the Arvados API</h2>
+</div>
+</div>
+<div id="postamble" class="status">
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:50</small>.
+</div>
+</body>
+</html>
diff --git a/doc/blog/using-covid-19-pubseq-part3.html b/doc/blog/using-covid-19-pubseq-part3.html
index 4132784..91879b0 100644
--- a/doc/blog/using-covid-19-pubseq-part3.html
+++ b/doc/blog/using-covid-19-pubseq-part3.html
@@ -3,7 +3,7 @@
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
-<!-- 2020-05-30 Sat 10:45 -->
+<!-- 2020-05-30 Sat 18:12 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>COVID-19 PubSeq Uploading Data (part 3)</title>
@@ -248,64 +248,62 @@ for the JavaScript code in this tag.
<h2>Table of Contents</h2>
<div id="text-table-of-contents">
<ul>
-<li><a href="#org7fda7c8">1. Uploading Data</a></li>
-<li><a href="#orgb062ac0">2. Introduction</a></li>
-<li><a href="#org4061598">3. Step 1: Upload sequence</a></li>
-<li><a href="#org51d80f8">4. Step 2: Add metadata</a>
+<li><a href="#org193669a">1. Uploading Data</a></li>
+<li><a href="#orgc6b3a47">2. Step 1: Upload sequence</a></li>
+<li><a href="#org9c08714">3. Step 2: Add metadata</a>
<ul>
-<li><a href="#orgbb8f0bb">4.1. Obligatory fields</a>
+<li><a href="#org4c2e907">3.1. Obligatory fields</a>
<ul>
-<li><a href="#org0e615dc">4.1.1. Sample ID (sample<sub>id</sub>)</a></li>
-<li><a href="#org4d5308a">4.1.2. Collection date</a></li>
-<li><a href="#org429f153">4.1.3. Collection location</a></li>
-<li><a href="#orgbd7fa51">4.1.4. Sequencing technology</a></li>
-<li><a href="#orgc3b424f">4.1.5. Authors</a></li>
+<li><a href="#orgdddcb2e">3.1.1. Sample ID (sample<sub>id</sub>)</a></li>
+<li><a href="#orge9c2e76">3.1.2. Collection date</a></li>
+<li><a href="#org62c55ce">3.1.3. Collection location</a></li>
+<li><a href="#org460b377">3.1.4. Sequencing technology</a></li>
+<li><a href="#org77b1e14">3.1.5. Authors</a></li>
</ul>
</li>
-<li><a href="#org5c01347">4.2. Optional fields</a>
+<li><a href="#org3cb346f">3.2. Optional fields</a>
<ul>
-<li><a href="#org7fc5461">4.2.1. Host information</a></li>
-<li><a href="#org140c8b5">4.2.2. Collecting institution</a></li>
-<li><a href="#orgf231cf9">4.2.3. Specimen source</a></li>
-<li><a href="#org74de839">4.2.4. Source database accession</a></li>
-<li><a href="#org8927a67">4.2.5. Strain name</a></li>
+<li><a href="#orgb0cffbb">3.2.1. Host information</a></li>
+<li><a href="#orgd2a43a6">3.2.2. Collecting institution</a></li>
+<li><a href="#org8d5bcf7">3.2.3. Specimen source</a></li>
+<li><a href="#org86b21b2">3.2.4. Source database accession</a></li>
+<li><a href="#org771ea66">3.2.5. Strain name</a></li>
</ul>
</li>
</ul>
</li>
-<li><a href="#org38d48d8">5. Step 3: Submit to COVID-19 PubSeq</a></li>
-<li><a href="#org5ec1337">6. Step 4: Check output</a>
+<li><a href="#org7d281f5">4. Step 3: Submit to COVID-19 PubSeq</a>
<ul>
-<li><a href="#org070e13e">6.1. Trouble shooting</a></li>
+<li><a href="#orgdf0f02d">4.1. Trouble shooting</a></li>
+</ul>
+</li>
+<li><a href="#org29f8a92">5. Step 4: Check output</a></li>
+<li><a href="#orgf493854">6. Bulk sequence uploader</a>
+<ul>
+<li><a href="#org37fadbc">6.1. Run the uploader (CLI)</a></li>
+<li><a href="#org39adf09">6.2. Example: uploading bulk GenBank sequences</a></li>
</ul>
</li>
</ul>
</div>
</div>
-<div id="outline-container-org7fda7c8" class="outline-2">
-<h2 id="org7fda7c8"><span class="section-number-2">1</span> Uploading Data</h2>
-<div class="outline-text-2" id="text-1">
-<p>
-<i>Work in progress!</i>
-</p>
-</div>
-</div>
-<div id="outline-container-orgb062ac0" class="outline-2">
-<h2 id="orgb062ac0"><span class="section-number-2">2</span> Introduction</h2>
-<div class="outline-text-2" id="text-2">
+
+<div id="outline-container-org193669a" class="outline-2">
+<h2 id="org193669a"><span class="section-number-2">1</span> Uploading Data</h2>
+<div class="outline-text-2" id="text-1">
<p>
The COVID-19 PubSeq allows you to upload your SARS-Cov-2 strains to a
-public resource for global comparisons. Compute it triggered on
-upload. Read the <a href="./about">ABOUT</a> page for more information.
+public resource for global comparisons. A recompute of the pangenome
+gets triggered on upload. Read the <a href="./about">ABOUT</a> page for more information.
</p>
</div>
</div>
-<div id="outline-container-org4061598" class="outline-2">
-<h2 id="org4061598"><span class="section-number-2">3</span> Step 1: Upload sequence</h2>
-<div class="outline-text-2" id="text-3">
+<div id="outline-container-orgc6b3a47" class="outline-2">
+<h2 id="orgc6b3a47"><span class="section-number-2">2</span> Step 1: Upload sequence</h2>
+<div class="outline-text-2" id="text-2">
<p>
To upload a sequence in the <a href="http://covid19.genenetwork.org/">web upload page</a> hit the browse button and
select the FASTA file on your local hard disk.
@@ -332,9 +330,9 @@ an improved pangenome.
</div>
</div>
-<div id="outline-container-org51d80f8" class="outline-2">
-<h2 id="org51d80f8"><span class="section-number-2">4</span> Step 2: Add metadata</h2>
-<div class="outline-text-2" id="text-4">
+<div id="outline-container-org9c08714" class="outline-2">
+<h2 id="org9c08714"><span class="section-number-2">3</span> Step 2: Add metadata</h2>
+<div class="outline-text-2" id="text-3">
<p>
The <a href="./">web upload page</a> contains fields for adding metadata. Metadata is
not only important for attribution, is also important for
@@ -348,7 +346,7 @@ A number of fields are obligatory: sample id, date, location,
technology and authors. The others are optional, but it is valuable to
enter them when information is available. Metadata is defined in this
<a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">schema</a>. From this schema we generate the input form. Note that
-opitional fields have a question mark in the <code>type</code>. You can add
+optional fields have a question mark in the <code>type</code>. You can add
metadata yourself, btw, because this is a public resource! See also
<a href="./blog?id=using-covid-19-pubseq-part5">Modify metadata</a> for more information.
</p>
@@ -359,13 +357,13 @@ the web form. Here we add some extra information.
</p>
</div>
-<div id="outline-container-orgbb8f0bb" class="outline-3">
-<h3 id="orgbb8f0bb"><span class="section-number-3">4.1</span> Obligatory fields</h3>
-<div class="outline-text-3" id="text-4-1">
+<div id="outline-container-org4c2e907" class="outline-3">
+<h3 id="org4c2e907"><span class="section-number-3">3.1</span> Obligatory fields</h3>
+<div class="outline-text-3" id="text-3-1">
</div>
-<div id="outline-container-org0e615dc" class="outline-4">
-<h4 id="org0e615dc"><span class="section-number-4">4.1.1</span> Sample ID (sample<sub>id</sub>)</h4>
-<div class="outline-text-4" id="text-4-1-1">
+<div id="outline-container-orgdddcb2e" class="outline-4">
+<h4 id="orgdddcb2e"><span class="section-number-4">3.1.1</span> Sample ID (sample<sub>id</sub>)</h4>
+<div class="outline-text-4" id="text-3-1-1">
<p>
This is a string field that defines a unique sample identifier by the
submitter. In addition to sample<sub>id</sub> we also have host<sub>id</sub>,
@@ -382,37 +380,37 @@ Here we add the GenBank ID MT536190.1.
</div>
</div>
-<div id="outline-container-org4d5308a" class="outline-4">
-<h4 id="org4d5308a"><span class="section-number-4">4.1.2</span> Collection date</h4>
-<div class="outline-text-4" id="text-4-1-2">
+<div id="outline-container-orge9c2e76" class="outline-4">
+<h4 id="orge9c2e76"><span class="section-number-4">3.1.2</span> Collection date</h4>
+<div class="outline-text-4" id="text-3-1-2">
<p>
Estimated collection date. The GenBank page says April 6, 2020.
</p>
</div>
</div>
-<div id="outline-container-org429f153" class="outline-4">
-<h4 id="org429f153"><span class="section-number-4">4.1.3</span> Collection location</h4>
-<div class="outline-text-4" id="text-4-1-3">
+<div id="outline-container-org62c55ce" class="outline-4">
+<h4 id="org62c55ce"><span class="section-number-4">3.1.3</span> Collection location</h4>
+<div class="outline-text-4" id="text-3-1-3">
<p>
-A search on wikidata says Los Angelos is
+A search on wikidata says Los Angeles is
<a href="https://www.wikidata.org/entity/Q65">https://www.wikidata.org/entity/Q65</a>
</p>
</div>
</div>
-<div id="outline-container-orgbd7fa51" class="outline-4">
-<h4 id="orgbd7fa51"><span class="section-number-4">4.1.4</span> Sequencing technology</h4>
-<div class="outline-text-4" id="text-4-1-4">
+<div id="outline-container-org460b377" class="outline-4">
+<h4 id="org460b377"><span class="section-number-4">3.1.4</span> Sequencing technology</h4>
+<div class="outline-text-4" id="text-3-1-4">
<p>
GenBank entry says Illumina, so we can fill that in
</p>
</div>
</div>
-<div id="outline-container-orgc3b424f" class="outline-4">
-<h4 id="orgc3b424f"><span class="section-number-4">4.1.5</span> Authors</h4>
-<div class="outline-text-4" id="text-4-1-5">
+<div id="outline-container-org77b1e14" class="outline-4">
+<h4 id="org77b1e14"><span class="section-number-4">3.1.5</span> Authors</h4>
+<div class="outline-text-4" id="text-3-1-5">
<p>
GenBank entry says 'Lamers,S., Nolan,D.J., Rose,R., Cross,S., Moraga
Amador,D., Yang,T., Caruso,L., Navia,W., Von Borstel,L., Hui Zhou,X.,
@@ -422,17 +420,17 @@ Freehan,A. and Garcia-Diaz,J.', so we can fill that in.
</div>
</div>
-<div id="outline-container-org5c01347" class="outline-3">
-<h3 id="org5c01347"><span class="section-number-3">4.2</span> Optional fields</h3>
-<div class="outline-text-3" id="text-4-2">
+<div id="outline-container-org3cb346f" class="outline-3">
+<h3 id="org3cb346f"><span class="section-number-3">3.2</span> Optional fields</h3>
+<div class="outline-text-3" id="text-3-2">
<p>
All other fields are optional. But let's see what we can add.
</p>
</div>
-<div id="outline-container-org7fc5461" class="outline-4">
-<h4 id="org7fc5461"><span class="section-number-4">4.2.1</span> Host information</h4>
-<div class="outline-text-4" id="text-4-2-1">
+<div id="outline-container-orgb0cffbb" class="outline-4">
+<h4 id="orgb0cffbb"><span class="section-number-4">3.2.1</span> Host information</h4>
+<div class="outline-text-4" id="text-3-2-1">
<p>
Sadly, not much is known about the host from GenBank. A little
sleuthing renders an interesting paper by some of the authors titled
@@ -445,27 +443,27 @@ did to the person and what the person was like (say age group).
</div>
</div>
-<div id="outline-container-org140c8b5" class="outline-4">
-<h4 id="org140c8b5"><span class="section-number-4">4.2.2</span> Collecting institution</h4>
-<div class="outline-text-4" id="text-4-2-2">
+<div id="outline-container-orgd2a43a6" class="outline-4">
+<h4 id="orgd2a43a6"><span class="section-number-4">3.2.2</span> Collecting institution</h4>
+<div class="outline-text-4" id="text-3-2-2">
<p>
We can fill that in.
</p>
</div>
</div>
-<div id="outline-container-orgf231cf9" class="outline-4">
-<h4 id="orgf231cf9"><span class="section-number-4">4.2.3</span> Specimen source</h4>
-<div class="outline-text-4" id="text-4-2-3">
+<div id="outline-container-org8d5bcf7" class="outline-4">
+<h4 id="org8d5bcf7"><span class="section-number-4">3.2.3</span> Specimen source</h4>
+<div class="outline-text-4" id="text-3-2-3">
<p>
We have that: nasopharyngeal swab
</p>
</div>
</div>
-<div id="outline-container-org74de839" class="outline-4">
-<h4 id="org74de839"><span class="section-number-4">4.2.4</span> Source database accession</h4>
-<div class="outline-text-4" id="text-4-2-4">
+<div id="outline-container-org86b21b2" class="outline-4">
+<h4 id="org86b21b2"><span class="section-number-4">3.2.4</span> Source database accession</h4>
+<div class="outline-text-4" id="text-3-2-4">
<p>
Genbank which is <a href="http://identifiers.org/insdc/MT536190.1#sequence">http://identifiers.org/insdc/MT536190.1#sequence</a>.
Note we plug in our own identifier MT536190.1.
@@ -473,9 +471,9 @@ Note we plug in our own identifier MT536190.1.
</div>
</div>
-<div id="outline-container-org8927a67" class="outline-4">
-<h4 id="org8927a67"><span class="section-number-4">4.2.5</span> Strain name</h4>
-<div class="outline-text-4" id="text-4-2-5">
+<div id="outline-container-org771ea66" class="outline-4">
+<h4 id="org771ea66"><span class="section-number-4">3.2.5</span> Strain name</h4>
+<div class="outline-text-4" id="text-3-2-5">
<p>
SARS-CoV-2/human/USA/LA-BIE-070/2020
</p>
@@ -484,20 +482,36 @@ SARS-CoV-2/human/USA/LA-BIE-070/2020
</div>
</div>
-<div id="outline-container-org38d48d8" class="outline-2">
-<h2 id="org38d48d8"><span class="section-number-2">5</span> Step 3: Submit to COVID-19 PubSeq</h2>
-<div class="outline-text-2" id="text-5">
+<div id="outline-container-org7d281f5" class="outline-2">
+<h2 id="org7d281f5"><span class="section-number-2">4</span> Step 3: Submit to COVID-19 PubSeq</h2>
+<div class="outline-text-2" id="text-4">
<p>
Once you have the sequence and the metadata together, hit
the 'Add to Pangenome' button. The data will be checked,
submitted and the workflows should kick in!
</p>
</div>
+
+
+<div id="outline-container-orgdf0f02d" class="outline-3">
+<h3 id="orgdf0f02d"><span class="section-number-3">4.1</span> Trouble shooting</h3>
+<div class="outline-text-3" id="text-4-1">
+<p>
+We got an error saying: {"stem": "<a href="http://www.wikidata.org/entity/">http://www.wikidata.org/entity/</a>",&#x2026;
+which means that our location field was not formed correctly! After
+fixing it to look like <a href="http://www.wikidata.org/entity/Q65">http://www.wikidata.org/entity/Q65</a> (note http
+instead on https and entity instead of wiki) the submission went
+through. Reload the page (it won't empty the fields) to re-enable the
+submit button.
+</p>
+</div>
+</div>
</div>
-<div id="outline-container-org5ec1337" class="outline-2">
-<h2 id="org5ec1337"><span class="section-number-2">6</span> Step 4: Check output</h2>
-<div class="outline-text-2" id="text-6">
+
+<div id="outline-container-org29f8a92" class="outline-2">
+<h2 id="org29f8a92"><span class="section-number-2">5</span> Step 4: Check output</h2>
+<div class="outline-text-2" id="text-5">
<p>
The current pipeline takes 5.5 hours to complete! Once it completes
the updated data can be checked on the <a href="./download">DOWNLOAD</a> page. After completion
@@ -505,24 +519,122 @@ of above output this <a href="http://sparql.genenetwork.org/sparql/?default-grap
in.
</p>
</div>
+</div>
+
+<div id="outline-container-orgf493854" class="outline-2">
+<h2 id="orgf493854"><span class="section-number-2">6</span> Bulk sequence uploader</h2>
+<div class="outline-text-2" id="text-6">
+<p>
+Above steps require a manual upload of one sequence with metadata.
+What if you have a number of sequences you want to upload in bulk?
+For this we have a command line version of the uploader that can
+directly submit to COVID-19 PubSeq. It accepts a FASTA sequence
+file an associated metadata in <a href="https://github.com/arvados/bh20-seq-resource/blob/master/example/maximum_metadata_example.yaml">YAML</a> format. The YAML matches
+the web form and gets validated from the same <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">schema</a> looks. The YAML
+that you need to create/generate for your samples looks like
+</p>
+
+<div class="org-src-container">
+<pre class="src src-json">id: placeholder
+
+host:
+ host_id: XX<span style="color: #8bc34a;">1</span>
+ host_species: http://purl.obolibrary.org/obo/NCBITaxon_<span style="color: #8bc34a;">9606</span>
+ host_sex: http://purl.obolibrary.org/obo/PATO_<span style="color: #8bc34a;">0000384</span>
+ host_age: <span style="color: #8bc34a;">20</span>
+ host_age_unit: http://purl.obolibrary.org/obo/UO_<span style="color: #8bc34a;">0000036</span>
+ host_health_status: http://purl.obolibrary.org/obo/NCIT_C<span style="color: #8bc34a;">25269</span>
+ host_treatment: Process in which the act is intended to modify or alter host status <span style="color: #e91e63;">(</span>Compounds<span style="color: #e91e63;">)</span>
+ host_vaccination: <span style="color: #e91e63;">[</span>vaccines<span style="color: #8bc34a;">1</span>,vaccine<span style="color: #8bc34a;">2</span><span style="color: #e91e63;">]</span>
+ ethnicity: http://purl.obolibrary.org/obo/HANCESTRO_<span style="color: #8bc34a;">0010</span>
+ additional_host_information: Optional free text field for additional information
+
+sample:
+ sample_id: Id of the sample as defined by the submitter
+ collector_name: Name of the person that took the sample
+ collecting_institution: Institute that was responsible of sampling
+ specimen_source: <span style="color: #e91e63;">[</span>http://purl.obolibrary.org/obo/NCIT_C<span style="color: #8bc34a;">155831</span>,http://purl.obolibrary.org/obo/NCIT_C<span style="color: #8bc34a;">155835</span>]
+ collection_date: <span style="color: #9ccc65;">"2020-01-01"</span>
+ collection_location: http://www.wikidata.org/entity/Q<span style="color: #8bc34a;">148</span>
+ sample_storage_conditions: frozen specimen
+ source_database_accession: <span style="color: #2196F3;">[</span>http://identifiers.org/insdc/LC<span style="color: #8bc34a;">522350.1</span>#sequence]
+ additional_collection_information: Optional free text field for additional information
+
+virus:
+ virus_species: http://purl.obolibrary.org/obo/NCBITaxon_<span style="color: #8bc34a;">2697049</span>
+ virus_strain: SARS-CoV-<span style="color: #8bc34a;">2</span>/human/CHN/HS_<span style="color: #8bc34a;">8</span>/<span style="color: #8bc34a;">2020</span>
+
+technology:
+ sample_sequencing_technology: <span style="color: #EF6C00;">[</span>http://www.ebi.ac.uk/efo/EFO_<span style="color: #8bc34a;">0009173</span>,http://www.ebi.ac.uk/efo/EFO_<span style="color: #8bc34a;">0009173</span>]
+ sequence_assembly_method: Protocol used for assembly
+ sequencing_coverage: <span style="color: #B388FF;">[</span><span style="color: #8bc34a;">70.0</span>, <span style="color: #8bc34a;">100.0</span><span style="color: #B388FF;">]</span>
+ additional_technology_information: Optional free text field for additional information
+
+submitter:
+ authors: <span style="color: #B388FF;">[</span>John Doe, Joe Boe, Jonny Oe<span style="color: #B388FF;">]</span>
+ submitter_name: <span style="color: #B388FF;">[</span>John Doe<span style="color: #B388FF;">]</span>
+ submitter_address: John Doe's address
+ originating_lab: John Doe kitchen
+ lab_address: John Doe's address
+ provider_sample_id: XXX<span style="color: #8bc34a;">1</span>
+ submitter_sample_id: XXX<span style="color: #8bc34a;">2</span>
+ publication: PMID<span style="color: #8bc34a;">00001113</span>
+ submitter_orcid: <span style="color: #B388FF;">[</span>https://orcid.org/<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0000</span>,https://orcid.org/<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0000</span>-<span style="color: #8bc34a;">0001</span>]
+ additional_submitter_information: Optional free text field for additional information
+</pre>
+</div>
+</div>
-<div id="outline-container-org070e13e" class="outline-3">
-<h3 id="org070e13e"><span class="section-number-3">6.1</span> Trouble shooting</h3>
+<div id="outline-container-org37fadbc" class="outline-3">
+<h3 id="org37fadbc"><span class="section-number-3">6.1</span> Run the uploader (CLI)</h3>
<div class="outline-text-3" id="text-6-1">
<p>
-We got an error saying: {"stem": "<a href="http://www.wikidata.org/entity/">http://www.wikidata.org/entity/</a>",&#x2026;
-which means that our location field was not formed correctly! After
-fixing it to look like <a href="http://www.wikidata.org/entity/Q65">http://www.wikidata.org/entity/Q65</a> (note http
-instead on https and entity instead of wiki) the submission went
-through. Reload the page (it won't empty the fields) to re-enable the
-submit button.
+Installing with pip you should be
+able to run
+</p>
+
+<pre class="example">
+bh20sequploader sequence.fasta metadata.yaml
+</pre>
+
+
+
+<p>
+Alternatively the script can be installed from <a href="https://github.com/arvados/bh20-seq-resource#installation">github</a>. Run on the
+command line
+</p>
+
+<pre class="example">
+python3 bh20sequploader/main.py example/sequence.fasta example/maximum_metadata_example.yaml
+</pre>
+
+
+<p>
+after installing dependencies (also described in <a href="https://github.com/arvados/bh20-seq-resource/blob/master/doc/INSTALL.md">INSTALL</a> with the GNU
+Guix package manager).
+</p>
+
+<p>
+The web interface using this exact same script so it should just work
+(TM).
+</p>
+</div>
+</div>
+
+<div id="outline-container-org39adf09" class="outline-3">
+<h3 id="org39adf09"><span class="section-number-3">6.2</span> Example: uploading bulk GenBank sequences</h3>
+<div class="outline-text-3" id="text-6-2">
+<p>
+We also use above script to bulk upload GenBank sequences with a <a href="https://github.com/arvados/bh20-seq-resource/blob/master/scripts/from_genbank_to_fasta_and_yaml.py">FASTA
+and YAML</a> extractor specific for GenBank. This means that the steps we
+took above for uploading a GenBank sequence are already automated.
</p>
</div>
</div>
</div>
</div>
<div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 10:44</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 18:12</small>.
</div>
</body>
</html>
diff --git a/doc/blog/using-covid-19-pubseq-part3.org b/doc/blog/using-covid-19-pubseq-part3.org
index 4dd3078..03f37ab 100644
--- a/doc/blog/using-covid-19-pubseq-part3.org
+++ b/doc/blog/using-covid-19-pubseq-part3.org
@@ -6,26 +6,26 @@
#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
-* Uploading Data
-/Work in progress!/
* Table of Contents :TOC:noexport:
- [[#uploading-data][Uploading Data]]
- - [[#introduction][Introduction]]
- [[#step-1-upload-sequence][Step 1: Upload sequence]]
- [[#step-2-add-metadata][Step 2: Add metadata]]
- [[#obligatory-fields][Obligatory fields]]
- [[#optional-fields][Optional fields]]
- [[#step-3-submit-to-covid-19-pubseq][Step 3: Submit to COVID-19 PubSeq]]
- - [[#step-4-check-output][Step 4: Check output]]
- [[#trouble-shooting][Trouble shooting]]
+ - [[#step-4-check-output][Step 4: Check output]]
+ - [[#bulk-sequence-uploader][Bulk sequence uploader]]
+ - [[#run-the-uploader-cli][Run the uploader (CLI)]]
+ - [[#example-uploading-bulk-genbank-sequences][Example: uploading bulk GenBank sequences]]
-* Introduction
+* Uploading Data
The COVID-19 PubSeq allows you to upload your SARS-Cov-2 strains to a
-public resource for global comparisons. Compute it triggered on
-upload. Read the [[./about][ABOUT]] page for more information.
+public resource for global comparisons. A recompute of the pangenome
+gets triggered on upload. Read the [[./about][ABOUT]] page for more information.
* Step 1: Upload sequence
@@ -59,7 +59,7 @@ A number of fields are obligatory: sample id, date, location,
technology and authors. The others are optional, but it is valuable to
enter them when information is available. Metadata is defined in this
[[https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml][schema]]. From this schema we generate the input form. Note that
-opitional fields have a question mark in the ~type~. You can add
+optional fields have a question mark in the ~type~. You can add
metadata yourself, btw, because this is a public resource! See also
[[./blog?id=using-covid-19-pubseq-part5][Modify metadata]] for more information.
@@ -86,7 +86,7 @@ Estimated collection date. The GenBank page says April 6, 2020.
*** Collection location
-A search on wikidata says Los Angelos is
+A search on wikidata says Los Angeles is
https://www.wikidata.org/entity/Q65
*** Sequencing technology
@@ -136,12 +136,6 @@ Once you have the sequence and the metadata together, hit
the 'Add to Pangenome' button. The data will be checked,
submitted and the workflows should kick in!
-* Step 4: Check output
-
-The current pipeline takes 5.5 hours to complete! Once it completes
-the updated data can be checked on the [[./download][DOWNLOAD]] page. After completion
-of above output this [[http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=PREFIX+pubseq%3A+%3Chttp%3A%2F%2Fbiohackathon.org%2Fbh20-seq-schema%23MainSchema%2F%3E%0D%0APREFIX+sio%3A+%3Chttp%3A%2F%2Fsemanticscience.org%2Fresource%2F%3E%0D%0Aselect+distinct+%3Fsample+%3Fp+%3Fo%0D%0A%7B%0D%0A+++%3Fsample+sio%3ASIO_000115+%22MT536190.1%22+.%0D%0A+++%3Fsample+%3Fp+%3Fo+.%0D%0A%7D&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+][SPARQL query]] shows some of the metadata we put
-in.
** Trouble shooting
@@ -151,3 +145,95 @@ fixing it to look like http://www.wikidata.org/entity/Q65 (note http
instead on https and entity instead of wiki) the submission went
through. Reload the page (it won't empty the fields) to re-enable the
submit button.
+
+
+* Step 4: Check output
+
+The current pipeline takes 5.5 hours to complete! Once it completes
+the updated data can be checked on the [[./download][DOWNLOAD]] page. After completion
+of above output this [[http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=PREFIX+pubseq%3A+%3Chttp%3A%2F%2Fbiohackathon.org%2Fbh20-seq-schema%23MainSchema%2F%3E%0D%0APREFIX+sio%3A+%3Chttp%3A%2F%2Fsemanticscience.org%2Fresource%2F%3E%0D%0Aselect+distinct+%3Fsample+%3Fp+%3Fo%0D%0A%7B%0D%0A+++%3Fsample+sio%3ASIO_000115+%22MT536190.1%22+.%0D%0A+++%3Fsample+%3Fp+%3Fo+.%0D%0A%7D&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+][SPARQL query]] shows some of the metadata we put
+in.
+
+* Bulk sequence uploader
+
+Above steps require a manual upload of one sequence with metadata.
+What if you have a number of sequences you want to upload in bulk?
+For this we have a command line version of the uploader that can
+directly submit to COVID-19 PubSeq. It accepts a FASTA sequence
+file an associated metadata in [[https://github.com/arvados/bh20-seq-resource/blob/master/example/maximum_metadata_example.yaml][YAML]] format. The YAML matches
+the web form and gets validated from the same [[https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml][schema]] looks. The YAML
+that you need to create/generate for your samples looks like
+
+#+begin_src json
+id: placeholder
+
+host:
+ host_id: XX1
+ host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
+ host_sex: http://purl.obolibrary.org/obo/PATO_0000384
+ host_age: 20
+ host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
+ host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
+ host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
+ host_vaccination: [vaccines1,vaccine2]
+ ethnicity: http://purl.obolibrary.org/obo/HANCESTRO_0010
+ additional_host_information: Optional free text field for additional information
+
+sample:
+ sample_id: Id of the sample as defined by the submitter
+ collector_name: Name of the person that took the sample
+ collecting_institution: Institute that was responsible of sampling
+ specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
+ collection_date: "2020-01-01"
+ collection_location: http://www.wikidata.org/entity/Q148
+ sample_storage_conditions: frozen specimen
+ source_database_accession: [http://identifiers.org/insdc/LC522350.1#sequence]
+ additional_collection_information: Optional free text field for additional information
+
+virus:
+ virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+ virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
+
+technology:
+ sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173]
+ sequence_assembly_method: Protocol used for assembly
+ sequencing_coverage: [70.0, 100.0]
+ additional_technology_information: Optional free text field for additional information
+
+submitter:
+ authors: [John Doe, Joe Boe, Jonny Oe]
+ submitter_name: [John Doe]
+ submitter_address: John Doe's address
+ originating_lab: John Doe kitchen
+ lab_address: John Doe's address
+ provider_sample_id: XXX1
+ submitter_sample_id: XXX2
+ publication: PMID00001113
+ submitter_orcid: [https://orcid.org/0000-0000-0000-0000,https://orcid.org/0000-0000-0000-0001]
+ additional_submitter_information: Optional free text field for additional information
+#+end_src
+
+** Run the uploader (CLI)
+
+Installing with pip you should be
+able to run
+
+: bh20sequploader sequence.fasta metadata.yaml
+
+
+Alternatively the script can be installed from [[https://github.com/arvados/bh20-seq-resource#installation][github]]. Run on the
+command line
+
+: python3 bh20sequploader/main.py example/sequence.fasta example/maximum_metadata_example.yaml
+
+after installing dependencies (also described in [[https://github.com/arvados/bh20-seq-resource/blob/master/doc/INSTALL.md][INSTALL]] with the GNU
+Guix package manager).
+
+The web interface using this exact same script so it should just work
+(TM).
+
+** Example: uploading bulk GenBank sequences
+
+We also use above script to bulk upload GenBank sequences with a [[https://github.com/arvados/bh20-seq-resource/blob/master/scripts/from_genbank_to_fasta_and_yaml.py][FASTA
+and YAML]] extractor specific for GenBank. This means that the steps we
+took above for uploading a GenBank sequence are already automated.
diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html
new file mode 100644
index 0000000..67d299e
--- /dev/null
+++ b/doc/blog/using-covid-19-pubseq-part4.html
@@ -0,0 +1,266 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<head>
+<!-- 2020-05-30 Sat 11:52 -->
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>&lrm;</title>
+<meta name="generator" content="Org mode" />
+<meta name="author" content="Pjotr Prins" />
+<style type="text/css">
+ <!--/*--><![CDATA[/*><!--*/
+ .title { text-align: center;
+ margin-bottom: .2em; }
+ .subtitle { text-align: center;
+ font-size: medium;
+ font-weight: bold;
+ margin-top:0; }
+ .todo { font-family: monospace; color: red; }
+ .done { font-family: monospace; color: green; }
+ .priority { font-family: monospace; color: orange; }
+ .tag { background-color: #eee; font-family: monospace;
+ padding: 2px; font-size: 80%; font-weight: normal; }
+ .timestamp { color: #bebebe; }
+ .timestamp-kwd { color: #5f9ea0; }
+ .org-right { margin-left: auto; margin-right: 0px; text-align: right; }
+ .org-left { margin-left: 0px; margin-right: auto; text-align: left; }
+ .org-center { margin-left: auto; margin-right: auto; text-align: center; }
+ .underline { text-decoration: underline; }
+ #postamble p, #preamble p { font-size: 90%; margin: .2em; }
+ p.verse { margin-left: 3%; }
+ pre {
+ border: 1px solid #ccc;
+ box-shadow: 3px 3px 3px #eee;
+ padding: 8pt;
+ font-family: monospace;
+ overflow: auto;
+ margin: 1.2em;
+ }
+ pre.src {
+ position: relative;
+ overflow: visible;
+ padding-top: 1.2em;
+ }
+ pre.src:before {
+ display: none;
+ position: absolute;
+ background-color: white;
+ top: -10px;
+ right: 10px;
+ padding: 3px;
+ border: 1px solid black;
+ }
+ pre.src:hover:before { display: inline;}
+ /* Languages per Org manual */
+ pre.src-asymptote:before { content: 'Asymptote'; }
+ pre.src-awk:before { content: 'Awk'; }
+ pre.src-C:before { content: 'C'; }
+ /* pre.src-C++ doesn't work in CSS */
+ pre.src-clojure:before { content: 'Clojure'; }
+ pre.src-css:before { content: 'CSS'; }
+ pre.src-D:before { content: 'D'; }
+ pre.src-ditaa:before { content: 'ditaa'; }
+ pre.src-dot:before { content: 'Graphviz'; }
+ pre.src-calc:before { content: 'Emacs Calc'; }
+ pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
+ pre.src-fortran:before { content: 'Fortran'; }
+ pre.src-gnuplot:before { content: 'gnuplot'; }
+ pre.src-haskell:before { content: 'Haskell'; }
+ pre.src-hledger:before { content: 'hledger'; }
+ pre.src-java:before { content: 'Java'; }
+ pre.src-js:before { content: 'Javascript'; }
+ pre.src-latex:before { content: 'LaTeX'; }
+ pre.src-ledger:before { content: 'Ledger'; }
+ pre.src-lisp:before { content: 'Lisp'; }
+ pre.src-lilypond:before { content: 'Lilypond'; }
+ pre.src-lua:before { content: 'Lua'; }
+ pre.src-matlab:before { content: 'MATLAB'; }
+ pre.src-mscgen:before { content: 'Mscgen'; }
+ pre.src-ocaml:before { content: 'Objective Caml'; }
+ pre.src-octave:before { content: 'Octave'; }
+ pre.src-org:before { content: 'Org mode'; }
+ pre.src-oz:before { content: 'OZ'; }
+ pre.src-plantuml:before { content: 'Plantuml'; }
+ pre.src-processing:before { content: 'Processing.js'; }
+ pre.src-python:before { content: 'Python'; }
+ pre.src-R:before { content: 'R'; }
+ pre.src-ruby:before { content: 'Ruby'; }
+ pre.src-sass:before { content: 'Sass'; }
+ pre.src-scheme:before { content: 'Scheme'; }
+ pre.src-screen:before { content: 'Gnu Screen'; }
+ pre.src-sed:before { content: 'Sed'; }
+ pre.src-sh:before { content: 'shell'; }
+ pre.src-sql:before { content: 'SQL'; }
+ pre.src-sqlite:before { content: 'SQLite'; }
+ /* additional languages in org.el's org-babel-load-languages alist */
+ pre.src-forth:before { content: 'Forth'; }
+ pre.src-io:before { content: 'IO'; }
+ pre.src-J:before { content: 'J'; }
+ pre.src-makefile:before { content: 'Makefile'; }
+ pre.src-maxima:before { content: 'Maxima'; }
+ pre.src-perl:before { content: 'Perl'; }
+ pre.src-picolisp:before { content: 'Pico Lisp'; }
+ pre.src-scala:before { content: 'Scala'; }
+ pre.src-shell:before { content: 'Shell Script'; }
+ pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
+ /* additional language identifiers per "defun org-babel-execute"
+ in ob-*.el */
+ pre.src-cpp:before { content: 'C++'; }
+ pre.src-abc:before { content: 'ABC'; }
+ pre.src-coq:before { content: 'Coq'; }
+ pre.src-groovy:before { content: 'Groovy'; }
+ /* additional language identifiers from org-babel-shell-names in
+ ob-shell.el: ob-shell is the only babel language using a lambda to put
+ the execution function name together. */
+ pre.src-bash:before { content: 'bash'; }
+ pre.src-csh:before { content: 'csh'; }
+ pre.src-ash:before { content: 'ash'; }
+ pre.src-dash:before { content: 'dash'; }
+ pre.src-ksh:before { content: 'ksh'; }
+ pre.src-mksh:before { content: 'mksh'; }
+ pre.src-posh:before { content: 'posh'; }
+ /* Additional Emacs modes also supported by the LaTeX listings package */
+ pre.src-ada:before { content: 'Ada'; }
+ pre.src-asm:before { content: 'Assembler'; }
+ pre.src-caml:before { content: 'Caml'; }
+ pre.src-delphi:before { content: 'Delphi'; }
+ pre.src-html:before { content: 'HTML'; }
+ pre.src-idl:before { content: 'IDL'; }
+ pre.src-mercury:before { content: 'Mercury'; }
+ pre.src-metapost:before { content: 'MetaPost'; }
+ pre.src-modula-2:before { content: 'Modula-2'; }
+ pre.src-pascal:before { content: 'Pascal'; }
+ pre.src-ps:before { content: 'PostScript'; }
+ pre.src-prolog:before { content: 'Prolog'; }
+ pre.src-simula:before { content: 'Simula'; }
+ pre.src-tcl:before { content: 'tcl'; }
+ pre.src-tex:before { content: 'TeX'; }
+ pre.src-plain-tex:before { content: 'Plain TeX'; }
+ pre.src-verilog:before { content: 'Verilog'; }
+ pre.src-vhdl:before { content: 'VHDL'; }
+ pre.src-xml:before { content: 'XML'; }
+ pre.src-nxml:before { content: 'XML'; }
+ /* add a generic configuration mode; LaTeX export needs an additional
+ (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
+ pre.src-conf:before { content: 'Configuration File'; }
+
+ table { border-collapse:collapse; }
+ caption.t-above { caption-side: top; }
+ caption.t-bottom { caption-side: bottom; }
+ td, th { vertical-align:top; }
+ th.org-right { text-align: center; }
+ th.org-left { text-align: center; }
+ th.org-center { text-align: center; }
+ td.org-right { text-align: right; }
+ td.org-left { text-align: left; }
+ td.org-center { text-align: center; }
+ dt { font-weight: bold; }
+ .footpara { display: inline; }
+ .footdef { margin-bottom: 1em; }
+ .figure { padding: 1em; }
+ .figure p { text-align: center; }
+ .equation-container {
+ display: table;
+ text-align: center;
+ width: 100%;
+ }
+ .equation {
+ vertical-align: middle;
+ }
+ .equation-label {
+ display: table-cell;
+ text-align: right;
+ vertical-align: middle;
+ }
+ .inlinetask {
+ padding: 10px;
+ border: 2px solid gray;
+ margin: 10px;
+ background: #ffffcc;
+ }
+ #org-div-home-and-up
+ { text-align: right; font-size: 70%; white-space: nowrap; }
+ textarea { overflow-x: auto; }
+ .linenr { font-size: smaller }
+ .code-highlighted { background-color: #ffff00; }
+ .org-info-js_info-navigation { border-style: none; }
+ #org-info-js_console-label
+ { font-size: 10px; font-weight: bold; white-space: nowrap; }
+ .org-info-js_search-highlight
+ { background-color: #ffff00; color: #000000; font-weight: bold; }
+ .org-svg { width: 90%; }
+ /*]]>*/-->
+</style>
+<script type="text/javascript">
+/*
+@licstart The following is the entire license notice for the
+JavaScript code in this tag.
+
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
+
+The JavaScript code in this tag is free software: you can
+redistribute it and/or modify it under the terms of the GNU
+General Public License (GNU GPL) as published by the Free Software
+Foundation, either version 3 of the License, or (at your option)
+any later version. The code is distributed WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
+
+As additional permission under GNU GPL version 3 section 7, you
+may distribute non-source (e.g., minimized or compacted) forms of
+that code without the copy of the GNU GPL normally required by
+section 4, provided you include this license notice and a URL
+through which recipients can access the Corresponding Source.
+
+
+@licend The above is the entire license notice
+for the JavaScript code in this tag.
+*/
+<!--/*--><![CDATA[/*><!--*/
+ function CodeHighlightOn(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(null != target) {
+ elem.cacheClassElem = elem.className;
+ elem.cacheClassTarget = target.className;
+ target.className = "code-highlighted";
+ elem.className = "code-highlighted";
+ }
+ }
+ function CodeHighlightOff(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(elem.cacheClassElem)
+ elem.className = elem.cacheClassElem;
+ if(elem.cacheClassTarget)
+ target.className = elem.cacheClassTarget;
+ }
+/*]]>*///-->
+</script>
+</head>
+<body>
+<div id="content">
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#orgda6f48c">1. Modify Workflow</a></li>
+</ul>
+</div>
+</div>
+<div id="outline-container-orgda6f48c" class="outline-2">
+<h2 id="orgda6f48c"><span class="section-number-2">1</span> Modify Workflow</h2>
+<div class="outline-text-2" id="text-1">
+<p>
+<i>Work in progress!</i>
+</p>
+</div>
+</div>
+</div>
+<div id="postamble" class="status">
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:52</small>.
+</div>
+</body>
+</html>
diff --git a/doc/blog/using-covid-19-pubseq-part4.org b/doc/blog/using-covid-19-pubseq-part4.org
index c147ba3..58a1f56 100644
--- a/doc/blog/using-covid-19-pubseq-part4.org
+++ b/doc/blog/using-covid-19-pubseq-part4.org
@@ -1,2 +1,3 @@
-/Work in progress!/
+* Modify Workflow
+/Work in progress!/
diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html
new file mode 100644
index 0000000..30a3f83
--- /dev/null
+++ b/doc/blog/using-covid-19-pubseq-part5.html
@@ -0,0 +1,277 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<head>
+<!-- 2020-05-30 Sat 11:59 -->
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>&lrm;</title>
+<meta name="generator" content="Org mode" />
+<meta name="author" content="Pjotr Prins" />
+<style type="text/css">
+ <!--/*--><![CDATA[/*><!--*/
+ .title { text-align: center;
+ margin-bottom: .2em; }
+ .subtitle { text-align: center;
+ font-size: medium;
+ font-weight: bold;
+ margin-top:0; }
+ .todo { font-family: monospace; color: red; }
+ .done { font-family: monospace; color: green; }
+ .priority { font-family: monospace; color: orange; }
+ .tag { background-color: #eee; font-family: monospace;
+ padding: 2px; font-size: 80%; font-weight: normal; }
+ .timestamp { color: #bebebe; }
+ .timestamp-kwd { color: #5f9ea0; }
+ .org-right { margin-left: auto; margin-right: 0px; text-align: right; }
+ .org-left { margin-left: 0px; margin-right: auto; text-align: left; }
+ .org-center { margin-left: auto; margin-right: auto; text-align: center; }
+ .underline { text-decoration: underline; }
+ #postamble p, #preamble p { font-size: 90%; margin: .2em; }
+ p.verse { margin-left: 3%; }
+ pre {
+ border: 1px solid #ccc;
+ box-shadow: 3px 3px 3px #eee;
+ padding: 8pt;
+ font-family: monospace;
+ overflow: auto;
+ margin: 1.2em;
+ }
+ pre.src {
+ position: relative;
+ overflow: visible;
+ padding-top: 1.2em;
+ }
+ pre.src:before {
+ display: none;
+ position: absolute;
+ background-color: white;
+ top: -10px;
+ right: 10px;
+ padding: 3px;
+ border: 1px solid black;
+ }
+ pre.src:hover:before { display: inline;}
+ /* Languages per Org manual */
+ pre.src-asymptote:before { content: 'Asymptote'; }
+ pre.src-awk:before { content: 'Awk'; }
+ pre.src-C:before { content: 'C'; }
+ /* pre.src-C++ doesn't work in CSS */
+ pre.src-clojure:before { content: 'Clojure'; }
+ pre.src-css:before { content: 'CSS'; }
+ pre.src-D:before { content: 'D'; }
+ pre.src-ditaa:before { content: 'ditaa'; }
+ pre.src-dot:before { content: 'Graphviz'; }
+ pre.src-calc:before { content: 'Emacs Calc'; }
+ pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
+ pre.src-fortran:before { content: 'Fortran'; }
+ pre.src-gnuplot:before { content: 'gnuplot'; }
+ pre.src-haskell:before { content: 'Haskell'; }
+ pre.src-hledger:before { content: 'hledger'; }
+ pre.src-java:before { content: 'Java'; }
+ pre.src-js:before { content: 'Javascript'; }
+ pre.src-latex:before { content: 'LaTeX'; }
+ pre.src-ledger:before { content: 'Ledger'; }
+ pre.src-lisp:before { content: 'Lisp'; }
+ pre.src-lilypond:before { content: 'Lilypond'; }
+ pre.src-lua:before { content: 'Lua'; }
+ pre.src-matlab:before { content: 'MATLAB'; }
+ pre.src-mscgen:before { content: 'Mscgen'; }
+ pre.src-ocaml:before { content: 'Objective Caml'; }
+ pre.src-octave:before { content: 'Octave'; }
+ pre.src-org:before { content: 'Org mode'; }
+ pre.src-oz:before { content: 'OZ'; }
+ pre.src-plantuml:before { content: 'Plantuml'; }
+ pre.src-processing:before { content: 'Processing.js'; }
+ pre.src-python:before { content: 'Python'; }
+ pre.src-R:before { content: 'R'; }
+ pre.src-ruby:before { content: 'Ruby'; }
+ pre.src-sass:before { content: 'Sass'; }
+ pre.src-scheme:before { content: 'Scheme'; }
+ pre.src-screen:before { content: 'Gnu Screen'; }
+ pre.src-sed:before { content: 'Sed'; }
+ pre.src-sh:before { content: 'shell'; }
+ pre.src-sql:before { content: 'SQL'; }
+ pre.src-sqlite:before { content: 'SQLite'; }
+ /* additional languages in org.el's org-babel-load-languages alist */
+ pre.src-forth:before { content: 'Forth'; }
+ pre.src-io:before { content: 'IO'; }
+ pre.src-J:before { content: 'J'; }
+ pre.src-makefile:before { content: 'Makefile'; }
+ pre.src-maxima:before { content: 'Maxima'; }
+ pre.src-perl:before { content: 'Perl'; }
+ pre.src-picolisp:before { content: 'Pico Lisp'; }
+ pre.src-scala:before { content: 'Scala'; }
+ pre.src-shell:before { content: 'Shell Script'; }
+ pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
+ /* additional language identifiers per "defun org-babel-execute"
+ in ob-*.el */
+ pre.src-cpp:before { content: 'C++'; }
+ pre.src-abc:before { content: 'ABC'; }
+ pre.src-coq:before { content: 'Coq'; }
+ pre.src-groovy:before { content: 'Groovy'; }
+ /* additional language identifiers from org-babel-shell-names in
+ ob-shell.el: ob-shell is the only babel language using a lambda to put
+ the execution function name together. */
+ pre.src-bash:before { content: 'bash'; }
+ pre.src-csh:before { content: 'csh'; }
+ pre.src-ash:before { content: 'ash'; }
+ pre.src-dash:before { content: 'dash'; }
+ pre.src-ksh:before { content: 'ksh'; }
+ pre.src-mksh:before { content: 'mksh'; }
+ pre.src-posh:before { content: 'posh'; }
+ /* Additional Emacs modes also supported by the LaTeX listings package */
+ pre.src-ada:before { content: 'Ada'; }
+ pre.src-asm:before { content: 'Assembler'; }
+ pre.src-caml:before { content: 'Caml'; }
+ pre.src-delphi:before { content: 'Delphi'; }
+ pre.src-html:before { content: 'HTML'; }
+ pre.src-idl:before { content: 'IDL'; }
+ pre.src-mercury:before { content: 'Mercury'; }
+ pre.src-metapost:before { content: 'MetaPost'; }
+ pre.src-modula-2:before { content: 'Modula-2'; }
+ pre.src-pascal:before { content: 'Pascal'; }
+ pre.src-ps:before { content: 'PostScript'; }
+ pre.src-prolog:before { content: 'Prolog'; }
+ pre.src-simula:before { content: 'Simula'; }
+ pre.src-tcl:before { content: 'tcl'; }
+ pre.src-tex:before { content: 'TeX'; }
+ pre.src-plain-tex:before { content: 'Plain TeX'; }
+ pre.src-verilog:before { content: 'Verilog'; }
+ pre.src-vhdl:before { content: 'VHDL'; }
+ pre.src-xml:before { content: 'XML'; }
+ pre.src-nxml:before { content: 'XML'; }
+ /* add a generic configuration mode; LaTeX export needs an additional
+ (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
+ pre.src-conf:before { content: 'Configuration File'; }
+
+ table { border-collapse:collapse; }
+ caption.t-above { caption-side: top; }
+ caption.t-bottom { caption-side: bottom; }
+ td, th { vertical-align:top; }
+ th.org-right { text-align: center; }
+ th.org-left { text-align: center; }
+ th.org-center { text-align: center; }
+ td.org-right { text-align: right; }
+ td.org-left { text-align: left; }
+ td.org-center { text-align: center; }
+ dt { font-weight: bold; }
+ .footpara { display: inline; }
+ .footdef { margin-bottom: 1em; }
+ .figure { padding: 1em; }
+ .figure p { text-align: center; }
+ .equation-container {
+ display: table;
+ text-align: center;
+ width: 100%;
+ }
+ .equation {
+ vertical-align: middle;
+ }
+ .equation-label {
+ display: table-cell;
+ text-align: right;
+ vertical-align: middle;
+ }
+ .inlinetask {
+ padding: 10px;
+ border: 2px solid gray;
+ margin: 10px;
+ background: #ffffcc;
+ }
+ #org-div-home-and-up
+ { text-align: right; font-size: 70%; white-space: nowrap; }
+ textarea { overflow-x: auto; }
+ .linenr { font-size: smaller }
+ .code-highlighted { background-color: #ffff00; }
+ .org-info-js_info-navigation { border-style: none; }
+ #org-info-js_console-label
+ { font-size: 10px; font-weight: bold; white-space: nowrap; }
+ .org-info-js_search-highlight
+ { background-color: #ffff00; color: #000000; font-weight: bold; }
+ .org-svg { width: 90%; }
+ /*]]>*/-->
+</style>
+<script type="text/javascript">
+/*
+@licstart The following is the entire license notice for the
+JavaScript code in this tag.
+
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
+
+The JavaScript code in this tag is free software: you can
+redistribute it and/or modify it under the terms of the GNU
+General Public License (GNU GPL) as published by the Free Software
+Foundation, either version 3 of the License, or (at your option)
+any later version. The code is distributed WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
+
+As additional permission under GNU GPL version 3 section 7, you
+may distribute non-source (e.g., minimized or compacted) forms of
+that code without the copy of the GNU GPL normally required by
+section 4, provided you include this license notice and a URL
+through which recipients can access the Corresponding Source.
+
+
+@licend The above is the entire license notice
+for the JavaScript code in this tag.
+*/
+<!--/*--><![CDATA[/*><!--*/
+ function CodeHighlightOn(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(null != target) {
+ elem.cacheClassElem = elem.className;
+ elem.cacheClassTarget = target.className;
+ target.className = "code-highlighted";
+ elem.className = "code-highlighted";
+ }
+ }
+ function CodeHighlightOff(elem, id)
+ {
+ var target = document.getElementById(id);
+ if(elem.cacheClassElem)
+ elem.className = elem.cacheClassElem;
+ if(elem.cacheClassTarget)
+ target.className = elem.cacheClassTarget;
+ }
+/*]]>*///-->
+</script>
+</head>
+<body>
+<div id="content">
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#org31c224e">1. Modify Metadata</a></li>
+</ul>
+</div>
+</div>
+<div id="outline-container-org31c224e" class="outline-2">
+<h2 id="org31c224e"><span class="section-number-2">1</span> Modify Metadata</h2>
+<div class="outline-text-2" id="text-1">
+<p>
+The public sequence resource uses multiple data formats listed on the
+<a href="./download">DOWNLOAD</a> page. One of the most exciting features is the full support
+for RDF and semantic web/linked data ontologies. This technology
+allows for querying data in unprescribed ways - that is, you can
+formulate your own queries without dealing with a preset model of that
+data (so typical of CSV files and SQL tables). Examples of exploring
+data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>.
+</p>
+
+<p>
+In this BLOG we are going to look at the metadata entered on the
+<a href="./">COVID-19 PubSeq</a> website (or command line client).
+</p>
+</div>
+</div>
+</div>
+<div id="postamble" class="status">
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:59</small>.
+</div>
+</body>
+</html>
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index c147ba3..8d7504e 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -1,2 +1,17 @@
-/Work in progress!/
+* Modify Metadata
+The public sequence resource uses multiple data formats listed on the
+[[./download][DOWNLOAD]] page. One of the most exciting features is the full support
+for RDF and semantic web/linked data ontologies. This technology
+allows for querying data in unprescribed ways - that is, you can
+formulate your own queries without dealing with a preset model of that
+data (so typical of CSV files and SQL tables). Examples of exploring
+data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
+
+In this BLOG we are going to look at the metadata entered on the
+[[./][COVID-19 PubSeq]] website (or command line client). It is important to
+understand that you and us can change that information.
+
+* What is the schema?
+
+* How is the website generated?