18 files changed, 383 insertions, 190 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index c553f41..d1ea398 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -3,6 +3,10 @@
 # being unique or at least using the same options in different containing
 # types.
 
+license_type:
+  CC0 Public Domain Dedication: http://creativecommons.org/publicdomain/zero/1.0/
+  CC-BY-4.0 Attribution 4.0 International: http://creativecommons.org/licenses/by/4.0/
+
 host_age_unit:
   Years: http://purl.obolibrary.org/obo/UO_0000036
   Months: http://purl.obolibrary.org/obo/UO_0000035
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index a8ab920..29ac22c 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -1,5 +1,6 @@
 $base: http://biohackathon.org/bh20-seq-schema
 $namespaces:
+  cc:  http://creativecommons.org/ns#
   sch: https://schema.org/
   efo: http://www.ebi.ac.uk/efo/
   obo: http://purl.obolibrary.org/obo/
@@ -9,6 +10,30 @@ $namespaces:
 
 $graph:
 
+- name: licenseSchema
+  type: record
+  fields:
+    license_type:
+      doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+      type: string
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#License
+    title:
+      doc: Attribution title related to data license
+      type: string?
+      jsonldPredicate:
+          _id: http://semanticscience.org/resource/SIO_001167
+    attribution_url:
+      doc: Attribution URL related to data license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+    attribution_source:
+      doc: Attribution source URL related to data license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+
 - name: hostSchema
   type: record
   fields:
@@ -233,6 +258,7 @@ $graph:
     virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
+    license: licenseSchema
     id:
       doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 9132453..8a6794e 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -47,6 +47,7 @@ def type_to_heading(type_name):
     Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading.
     """
 
+    print(type_name,file=sys.stderr)
     # Remove camel case
     decamel = re.sub('([A-Z])', r' \1', type_name)
     # Split
@@ -227,8 +228,13 @@ def generate_form(schema, options):
 
 
 # At startup, we need to load the metadata schema from the uploader module, so we can make a form for it
-METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
-METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
+if os.path.isfile("bh20sequploader/bh20seq-schema.yml"):
+    METADATA_SCHEMA = yaml.safe_load(open("bh20sequploader/bh20seq-schema.yml","r").read())
+    METADATA_OPTION_DEFINITIONS = yaml.safe_load(open("bh20sequploader/bh20seq-options.yml","r").read())
+else:
+    METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
+    METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
+# print(METADATA_SCHEMA,file=sys.stderr)
 FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS)
 
 @app.route('/')
@@ -505,7 +511,7 @@ def status_page():
     Processing status
     """
 
-    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True)
     pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", UPLOADER_PROJECT]])
     out = []
     status = {}
@@ -567,11 +573,34 @@ baseURL='http://sparql.genenetwork.org/sparql/'
 
 @app.route('/api/getCount', methods=['GET'])
 def getCount():
-    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+    """
+    Get sequence counts from Arvados record
+    """
+    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True)
     c = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute()
 
     return jsonify({'sequences': c["items_available"]})
 
+@app.route('/api/getCountDB', methods=['GET'])
+def getCountDB():
+    """
+    Get sequence counts from Virtuoso DB
+    """
+    query="""
+    PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+    select (COUNT(distinct ?dataset) as ?num)
+    {
+    ?dataset pubseq:submitter ?id .
+    ?id ?p ?submitter
+    }
+    """
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}]
+    # print(result, file=sys.stderr)
+    return jsonify({'sequences': int(result[0]["num"]["value"])})
+
 @app.route('/api/getAllaccessions', methods=['GET'])
 def getAllaccessions():
     query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}"""
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
index a9dfc10..4703047 100644
--- a/bh20simplewebuploader/static/main.js
+++ b/bh20simplewebuploader/static/main.js
@@ -89,6 +89,10 @@ let fetchCount = () => {
   fetchAPI("/api/getCount");
 }
 
+let fetchCountDB = () => {
+  fetchAPI("/api/getCountDB");
+}
+
 let fetchSEQCountBySpecimen = () => {
   fetchAPIV2("/api/getSEQCountbySpecimenSource");
 }
@@ -191,7 +195,7 @@ function addField(e) {
   // Increment the number and use the keypath and number to set IDs and cross
   // references.
   // TODO: Heavily dependent on the form field HTML. Maybe we want custom
-  // elements for the labeled controlsd that know how to be list items?
+  // elements for the labeled controls that know how to be list items?
   fieldNumber++
   newField.dataset.number = fieldNumber
   let newID = keypath + '[' + fieldNumber + ']'
diff --git a/bh20simplewebuploader/templates/about.html b/bh20simplewebuploader/templates/about.html
index 07b6951..4bd238e 100644
--- a/bh20simplewebuploader/templates/about.html
+++ b/bh20simplewebuploader/templates/about.html
@@ -11,19 +11,6 @@
 
       <script type="text/javascript">
         let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
-        document.addEventListener("DOMContentLoaded", function(){
-            var count = fetch("/api/getCount")
-                .then((resp) => resp.json())
-                .then(function (data) {
-                    count = data["sequences"];
-              console.log(count);
-                    span = document.getElementById("Counter");
-                    txt = document.createTextNode(count);
-                    span.appendChild(txt);
-                });
-        });
-
       </script>
     </body>
 
diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html
index 8f8ab66..dbc0b99 100644
--- a/bh20simplewebuploader/templates/blog.html
+++ b/bh20simplewebuploader/templates/blog.html
@@ -73,19 +73,6 @@
 
     <script type="text/javascript">
       let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
-      document.addEventListener("DOMContentLoaded", function(){
-          var count = fetch("/api/getCount")
-              .then((resp) => resp.json())
-              .then(function (data) {
-                  count = data["sequences"];
-                  console.log(count);
-                  span = document.getElementById("Counter");
-                  txt = document.createTextNode(count);
-                  span.appendChild(txt);
-              });
-      });
-
     </script>
   </body>
 
diff --git a/bh20simplewebuploader/templates/demo.html b/bh20simplewebuploader/templates/demo.html
index 76c19c4..44aded0 100644
--- a/bh20simplewebuploader/templates/demo.html
+++ b/bh20simplewebuploader/templates/demo.html
@@ -5,6 +5,7 @@
     {% include 'banner.html' %}
     {% include 'menu.html' %}
     {% include 'search.html' %}
+      <p>The Virtuoso database contains <span id="CounterDB"></span> public sequences!</p>
     {% include 'demo-run.html' %}
     {% include 'footer.html' %}
 
@@ -12,16 +13,17 @@
       let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
 
       document.addEventListener("DOMContentLoaded", function(){
-          var count = fetch("/api/getCount")
+          var count = fetch("/api/getCountDB")
               .then((resp) => resp.json())
               .then(function (data) {
                   count = data["sequences"];
                   console.log(count);
-                  span = document.getElementById("Counter");
+                  span = document.getElementById("CounterDB");
                   txt = document.createTextNode(count);
                   span.appendChild(txt);
               });
       });
+
     </script>
   </body>
 
diff --git a/bh20simplewebuploader/templates/download.html b/bh20simplewebuploader/templates/download.html
index 07b6951..4bd238e 100644
--- a/bh20simplewebuploader/templates/download.html
+++ b/bh20simplewebuploader/templates/download.html
@@ -11,19 +11,6 @@
 
       <script type="text/javascript">
         let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
-        document.addEventListener("DOMContentLoaded", function(){
-            var count = fetch("/api/getCount")
-                .then((resp) => resp.json())
-                .then(function (data) {
-                    count = data["sequences"];
-              console.log(count);
-                    span = document.getElementById("Counter");
-                    txt = document.createTextNode(count);
-                    span.appendChild(txt);
-                });
-        });
-
       </script>
     </body>
 
diff --git a/bh20simplewebuploader/templates/map.html b/bh20simplewebuploader/templates/map.html
index 6d63c3c..595af0c 100644
--- a/bh20simplewebuploader/templates/map.html
+++ b/bh20simplewebuploader/templates/map.html
@@ -16,19 +16,6 @@
 
        <script type="text/javascript">
         let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
-        document.addEventListener("DOMContentLoaded", function(){
-            var count = fetch("/api/getCount")
-                .then((resp) => resp.json())
-                .then(function (data) {
-                    count = data["sequences"];
-              console.log(count);
-                    span = document.getElementById("Counter");
-                    txt = document.createTextNode(count);
-                    span.appendChild(txt);
-                });
-        });
-
       </script>
 
 <!-- Make sure you put this AFTER Leaflet's CSS -->
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index 6dcd72b..3b270dd 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -42,7 +42,7 @@ repository.
 
 ### Using the Web Uploader
 
-To run the web uploader in a GNU Guix environment/container
+To run the web uploader in a GNU Guix environment/container run it with something like
 
 ```
 guix environment guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic  nss-certs --network openssl -- env FLASK_ENV=development PYTHONPATH=$PYTHONPATH:./bh20sequploader FLASK_APP=bh20simplewebuploader/main.py flask run
@@ -59,7 +59,7 @@ WIP: add gunicorn container
 Currently the full webserver container deploy command looks like
 
 ```
-penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$ env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic  nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run
-``
+penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$  env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-oinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic  nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run
+```
 
 Note: see above on GUIX_PACKAGE_PATH.
diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html
index 67d299e..b5a05ca 100644
--- a/doc/blog/using-covid-19-pubseq-part4.html
+++ b/doc/blog/using-covid-19-pubseq-part4.html
@@ -3,10 +3,10 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-05-30 Sat 11:52 -->
+<!-- 2020-07-12 Sun 06:24 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>&lrm;</title>
+<title>COVID-19 PubSeq (part 4)</title>
 <meta name="generator" content="Org mode" />
 <meta name="author" content="Pjotr Prins" />
 <style type="text/css">
@@ -161,19 +161,6 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
-  .equation-container {
-    display: table;
-    text-align: center;
-    width: 100%;
-  }
-  .equation {
-    vertical-align: middle;
-  }
-  .equation-label {
-    display: table-cell;
-    text-align: right;
-    vertical-align: middle;
-  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -193,12 +180,13 @@
   .org-svg { width: 90%; }
   /*]]>*/-->
 </style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
 <script type="text/javascript">
 /*
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -242,25 +230,41 @@ for the JavaScript code in this tag.
 </head>
 <body>
 <div id="content">
+<h1 class="title">COVID-19 PubSeq (part 4)</h1>
 <div id="table-of-contents">
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#orgda6f48c">1. Modify Workflow</a></li>
+<li><a href="#org8f8b64a">1. What does this mean?</a></li>
+<li><a href="#orgcc7a403">2. Modify Workflow</a></li>
 </ul>
 </div>
 </div>
-<div id="outline-container-orgda6f48c" class="outline-2">
-<h2 id="orgda6f48c"><span class="section-number-2">1</span> Modify Workflow</h2>
+
+
+<div id="outline-container-org8f8b64a" class="outline-2">
+<h2 id="org8f8b64a"><span class="section-number-2">1</span> What does this mean?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
+This means that when someone uploads a SARS-CoV-2 sequence using one
+of our tools (CLI or web-based) they add a sequence and some metadata
+which triggers a rerun of our workflows.
+</p>
+</div>
+</div>
+
+
+<div id="outline-container-orgcc7a403" class="outline-2">
+<h2 id="orgcc7a403"><span class="section-number-2">2</span> Modify Workflow</h2>
+<div class="outline-text-2" id="text-2">
+<p>
 <i>Work in progress!</i>
 </p>
 </div>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:52</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part4.org b/doc/blog/using-covid-19-pubseq-part4.org
index 58a1f56..5fe71d1 100644
--- a/doc/blog/using-covid-19-pubseq-part4.org
+++ b/doc/blog/using-covid-19-pubseq-part4.org
@@ -1,3 +1,24 @@
+#+TITLE: COVID-19 PubSeq (part 4)
+#+AUTHOR: Pjotr Prins
+# C-c C-e h h   publish
+# C-c !         insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time)
+# C-c C-t       task rotate
+# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png
+
+#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
+
+
+* Table of Contents                                                     :TOC:noexport:
+ - [[#what-does-this-mean][What does this mean?]]
+ - [[#modify-workflow][Modify Workflow]]
+
+* What does this mean?
+
+This means that when someone uploads a SARS-CoV-2 sequence using one
+of our tools (CLI or web-based) they add a sequence and some metadata
+which triggers a rerun of our workflows.
+
+
 * Modify Workflow
 
 /Work in progress!/
diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html
index 30a3f83..80bf559 100644
--- a/doc/blog/using-covid-19-pubseq-part5.html
+++ b/doc/blog/using-covid-19-pubseq-part5.html
@@ -3,10 +3,10 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-05-30 Sat 11:59 -->
+<!-- 2020-07-12 Sun 06:24 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>&lrm;</title>
+<title>COVID-19 PubSeq (part 4)</title>
 <meta name="generator" content="Org mode" />
 <meta name="author" content="Pjotr Prins" />
 <style type="text/css">
@@ -161,19 +161,6 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
-  .equation-container {
-    display: table;
-    text-align: center;
-    width: 100%;
-  }
-  .equation {
-    vertical-align: middle;
-  }
-  .equation-label {
-    display: table-cell;
-    text-align: right;
-    vertical-align: middle;
-  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -193,12 +180,13 @@
   .org-svg { width: 90%; }
   /*]]>*/-->
 </style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
 <script type="text/javascript">
 /*
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -242,16 +230,22 @@ for the JavaScript code in this tag.
 </head>
 <body>
 <div id="content">
+<h1 class="title">COVID-19 PubSeq (part 4)</h1>
 <div id="table-of-contents">
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org31c224e">1. Modify Metadata</a></li>
+<li><a href="#org871ad58">1. Modify Metadata</a></li>
+<li><a href="#org07e8755">2. What is the schema?</a></li>
+<li><a href="#org4857280">3. How is the website generated?</a></li>
+<li><a href="#orge709ae2">4. Modifying the schema</a></li>
 </ul>
 </div>
 </div>
-<div id="outline-container-org31c224e" class="outline-2">
-<h2 id="org31c224e"><span class="section-number-2">1</span> Modify Metadata</h2>
+
+
+<div id="outline-container-org871ad58" class="outline-2">
+<h2 id="org871ad58"><span class="section-number-2">1</span> Modify Metadata</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 The public sequence resource uses multiple data formats listed on the
@@ -265,13 +259,56 @@ data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>.
 
 <p>
 In this BLOG we are going to look at the metadata entered on the
-<a href="./">COVID-19 PubSeq</a> website (or command line client).
+<a href="./">COVID-19 PubSeq</a> website (or command line client). It is important to
+understand that anyone, including you, can change that information!
+</p>
+</div>
+</div>
+
+<div id="outline-container-org07e8755" class="outline-2">
+<h2 id="org07e8755"><span class="section-number-2">2</span> What is the schema?</h2>
+<div class="outline-text-2" id="text-2">
+<p>
+The default metadata schema is listed <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">here</a>.
+</p>
+</div>
+</div>
+
+<div id="outline-container-org4857280" class="outline-2">
+<h2 id="org4857280"><span class="section-number-2">3</span> How is the website generated?</h2>
+<div class="outline-text-2" id="text-3">
+<p>
+Using the schema we use <a href="https://pypi.org/project/PyShEx/">pyshex</a> shex expressions and <a href="https://github.com/common-workflow-language/schema_salad">schema salad</a> to
+generate the <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47">input form</a>, <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13">validate</a> the user input and to build <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24">RDF</a>!
+All from that one metadata schema.
+</p>
+</div>
+</div>
+
+<div id="outline-container-orge709ae2" class="outline-2">
+<h2 id="orge709ae2"><span class="section-number-2">4</span> Modifying the schema</h2>
+<div class="outline-text-2" id="text-4">
+<p>
+One of the first things we wanted to do is to add a field for the data
+license. Initially we only support CC-4.0 as a license by default, but
+now we want to give uploaders the option to make it an even more
+liberal CC0 license. The first step is to find a good ontology term
+for the field. Searching for `creative commons cc0 rdf' rendered this
+useful <a href="https://creativecommons.org/ns">page</a>.  We also find an <a href="https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview">overview</a> where CC0 is represented as URI
+<a href="https://creativecommons.org/publicdomain/zero/1.0/">https://creativecommons.org/publicdomain/zero/1.0/</a>.  Meanwhile the
+attribution license <a href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</a>.
+According to this <a href="https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf">document</a> we should really also add fields for
+attributionName and attributionURL.
+</p>
+
+<p>
+<i>Note: work in progress</i>
 </p>
 </div>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:59</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index 8d7504e..aa06d5e 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -1,3 +1,20 @@
+#+TITLE: COVID-19 PubSeq (part 4)
+#+AUTHOR: Pjotr Prins
+# C-c C-e h h   publish
+# C-c !         insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time)
+# C-c C-t       task rotate
+# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png
+
+#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
+
+
+* Table of Contents                                                     :TOC:noexport:
+ - [[#modify-metadata][Modify Metadata]]
+ - [[#what-is-the-schema][What is the schema?]]
+ - [[#how-is-the-website-generated][How is the website generated?]]
+ - [[#modifying-the-schema][Modifying the schema]]
+ - [[#adding-fields-to-the-form][Adding fields to the form]]
+
 * Modify Metadata
 
 The public sequence resource uses multiple data formats listed on the
@@ -10,8 +27,102 @@ data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
 
 In this BLOG we are going to look at the metadata entered on the
 [[./][COVID-19 PubSeq]] website (or command line client). It is important to
-understand that you and us can change that information.
+understand that anyone, including you, can change that information!
 
 * What is the schema?
 
+The default metadata schema is listed [[https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml][here]].
+
 * How is the website generated?
+
+Using the schema we use [[https://pypi.org/project/PyShEx/][pyshex]] shex expressions and [[https://github.com/common-workflow-language/schema_salad][schema salad]] to
+generate the [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47][input form]], [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13][validate]] the user input and to build [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24][RDF]]!
+All from that one metadata schema.
+
+* Modifying the schema
+
+One of the first things we want to do is to add a field for the data
+license. Initially we only support CC-4.0 as a license by default, but
+now we want to give uploaders the option to make it an even more
+liberal CC0 license. The first step is to find a good ontology term
+for the field. Searching for `creative commons cc0 rdf' rendered this
+useful [[https://creativecommons.org/ns][page]].  We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI
+https://creativecommons.org/publicdomain/zero/1.0/.  Meanwhile the
+attribution license https://creativecommons.org/licenses/by/4.0/.
+According to this [[https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf][document]] we should really also add fields for
+attributionName and attributionURL.
+
+A minimal triple should be
+
+: id  xhtml:license  <http://creativecommons.org/licenses/by/4.0/> .
+
+Other suggestions are
+
+: id  dc:title "Description" .
+: id  cc:attributionName "Your Name" .
+: id  cc:attributionURL <http://resource.org/id>
+
+and 'dc:source' which indicates the original source of any modified
+work, specified as a URI.
+The prefix 'cc:' is an abbreviation for http://creativecommons.org/ns#.
+
+Going back to the schema, where does it fit? Under host, sample,
+virus, technology or submitter block? It could fit under sample, but
+actually the license concerns the whole metadata block and sequence,
+so I think we can fit under its own license tag. For example
+
+
+id: placeholder
+
+: license:
+:     license_type: http://creativecommons.org/licenses/by/4.0/
+:     attribution_title: "Sample ID"
+:     attribution_name: "John doe, Joe Boe, Jonny Oe"
+:     attribution_url: http://covid19.genenetwork.org/id
+:     attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888
+
+So, let's update the example. Notice the license info is optional - if it is missing
+we just assume the default CC-4.0.
+
+One thing that is interesting is that in the name space https://creativecommons.org/ns there
+is no mention of a title. I think it is useful, however, because we have no such field.
+So, we'll add it simply as a title field. Now the draft schema is
+
+#+BEGIN_SRC js
+- name: licenseSchema
+  type: record
+  fields:
+    license_type:
+      doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#License
+    title:
+      doc: Attribution title related to license
+      type: string?
+      jsonldPredicate:
+          _id: http://semanticscience.org/resource/SIO_001167
+    attribution_url:
+      doc: Attribution URL related to license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+    attribution_source:
+      doc: Attribution source URL
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+#+END_SRC
+
+Now, we are no ontology experts, right? So, next we submit a patch to
+our source tree and ask for feedback before wiring it up in the data
+entry form. The pull request was submitted [[https://github.com/arvados/bh20-seq-resource/pull/97][here]] and reviewed on the
+gitter channel and I merged it.
+
+* Adding fields to the form
+
+To add the new fields to the form we have to modify it a little. If we
+go to the upload form we need to add the license box. The schema is
+loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
+
+/Note: work in progress/
diff --git a/doc/web/about.html b/doc/web/about.html
index c907e6c..9b16c92 100644
--- a/doc/web/about.html
+++ b/doc/web/about.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-05-29 Fri 08:27 -->
+<!-- 2020-07-12 Sun 06:29 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>About/FAQ</title>
@@ -161,19 +161,6 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
-  .equation-container {
-    display: table;
-    text-align: center;
-    width: 100%;
-  }
-  .equation {
-    vertical-align: middle;
-  }
-  .equation-label {
-    display: table-cell;
-    text-align: right;
-    vertical-align: middle;
-  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -198,7 +185,7 @@
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -247,29 +234,29 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org783b5e9">1. What is the 'public sequence resource' about?</a></li>
-<li><a href="#org2c0bcfd">2. Who created the public sequence resource?</a></li>
-<li><a href="#org34070d3">3. How does the public sequence resource compare to other data resources?</a></li>
-<li><a href="#org64a9493">4. Why should I upload my data here?</a></li>
-<li><a href="#orgf898e7f">5. Why should I not upload by data here?</a></li>
-<li><a href="#org828e164">6. How does the public sequence resource work?</a></li>
-<li><a href="#org7b0d03f">7. Who uses the public sequence resource?</a></li>
-<li><a href="#org31aaf23">8. Is this about open data?</a></li>
-<li><a href="#orgb376b6c">9. Is this about free software?</a></li>
-<li><a href="#orgf19cd96">10. How do I upload raw data?</a></li>
-<li><a href="#orgebfed00">11. How do I change metadata?</a></li>
-<li><a href="#orge2aecf8">12. How do I change the work flows?</a></li>
-<li><a href="#orgd45b3bc">13. How do I change the source code?</a></li>
-<li><a href="#org2bb9455">14. Should I choose CC-BY or CC0?</a></li>
-<li><a href="#org62bf23f">15. How do I deal with private data and privacy?</a></li>
-<li><a href="#org40c6da0">16. How do I communicate with you?</a></li>
-<li><a href="#org1f27c44">17. Who are the sponsors?</a></li>
+<li><a href="#orgac6ad8b">1. What is the 'public sequence resource' about?</a></li>
+<li><a href="#org0c21c2e">2. Who created the public sequence resource?</a></li>
+<li><a href="#org3fb8cb3">3. How does the public sequence resource compare to other data resources?</a></li>
+<li><a href="#org6cd9ea2">4. Why should I upload my data here?</a></li>
+<li><a href="#org0b6e3fb">5. Why should I not upload by data here?</a></li>
+<li><a href="#org3eb3a4e">6. How does the public sequence resource work?</a></li>
+<li><a href="#org7a397f5">7. Who uses the public sequence resource?</a></li>
+<li><a href="#org92cb008">8. Is this about open data?</a></li>
+<li><a href="#org232d6fa">9. Is this about free software?</a></li>
+<li><a href="#orgd93869f">10. How do I upload raw data?</a></li>
+<li><a href="#org88e8b0a">11. How do I change metadata?</a></li>
+<li><a href="#orgd04b8f8">12. How do I change the work flows?</a></li>
+<li><a href="#org5d1ee05">13. How do I change the source code?</a></li>
+<li><a href="#orgae6461b">14. Should I choose CC-BY or CC0?</a></li>
+<li><a href="#org3ea90a9">15. How do I deal with private data and privacy?</a></li>
+<li><a href="#org7ff7106">16. How do I communicate with you?</a></li>
+<li><a href="#org9566fa7">17. Who are the sponsors?</a></li>
 </ul>
 </div>
 </div>
 
-<div id="outline-container-org783b5e9" class="outline-2">
-<h2 id="org783b5e9"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2>
+<div id="outline-container-orgac6ad8b" class="outline-2">
+<h2 id="orgac6ad8b"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 The <b>public sequence resource</b> aims to provide a generic and useful
@@ -280,17 +267,18 @@ sequence comparison and protein prediction.
 </div>
 </div>
 
-<div id="outline-container-org2c0bcfd" class="outline-2">
-<h2 id="org2c0bcfd"><span class="section-number-2">2</span> Who created the public sequence resource?</h2>
+<div id="outline-container-org0c21c2e" class="outline-2">
+<h2 id="org0c21c2e"><span class="section-number-2">2</span> Who created the public sequence resource?</h2>
 <div class="outline-text-2" id="text-2">
 <p>
 The <b>public sequence resource</b> is an initiative by <a href="https://github.com/arvados/bh20-seq-resource/graphs/contributors">bioinformatics</a> and
 ontology experts who want to create something agile and useful for the
 wider research community. The initiative started at the COVID-19
 biohackathon in April 2020 and is ongoing. The main project drivers
-are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common
-Workflow Language), Thomas Liener (consultant, formerly EBI) and
-Jerven Bolleman (Swiss Institute of Bioinformatics).
+are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino
+(University of Rome Tor Vergata), Michael Crusoe (Common Workflow
+Language), Thomas Liener (consultant, formerly EBI), Erik Garrison
+(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics).
 </p>
 
 <p>
@@ -301,8 +289,8 @@ wrangling experts. Thank you everyone!
 </div>
 </div>
 
-<div id="outline-container-org34070d3" class="outline-2">
-<h2 id="org34070d3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2>
+<div id="outline-container-org3fb8cb3" class="outline-2">
+<h2 id="org3fb8cb3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2>
 <div class="outline-text-2" id="text-3">
 <p>
 The short version is that we use state-of-the-art practices in
@@ -312,17 +300,18 @@ to building out this resource!
 </p>
 
 <p>
-Importantly: all data is published under the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0
-attribution license</a> which means it data can be published and workflows
-can run in public environments allowing for improved access for
-research and reproducible results. This contrasts with some other
-public resources, including GISAID.
+Importantly: all data is published under either the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons
+4.0 attribution license</a> or the <a href="https://creativecommons.org/share-your-work/public-domain/cc0/">CC0 “No Rights Reserved” license</a> which
+means it data can be published and workflows can run in public
+environments allowing for improved access for research and
+reproducible results. This contrasts with some other public resources,
+including GISAID.
 </p>
 </div>
 </div>
 
-<div id="outline-container-org64a9493" class="outline-2">
-<h2 id="org64a9493"><span class="section-number-2">4</span> Why should I upload my data here?</h2>
+<div id="outline-container-org6cd9ea2" class="outline-2">
+<h2 id="org6cd9ea2"><span class="section-number-2">4</span> Why should I upload my data here?</h2>
 <div class="outline-text-2" id="text-4">
 <ol class="org-ol">
 <li>We champion truly shareable data without licensing restrictions - with proper
@@ -353,8 +342,8 @@ multiple resources.
 </div>
 </div>
 
-<div id="outline-container-orgf898e7f" class="outline-2">
-<h2 id="orgf898e7f"><span class="section-number-2">5</span> Why should I not upload by data here?</h2>
+<div id="outline-container-org0b6e3fb" class="outline-2">
+<h2 id="org0b6e3fb"><span class="section-number-2">5</span> Why should I not upload by data here?</h2>
 <div class="outline-text-2" id="text-5">
 <p>
 Funny question.  There are only good reasons to upload your data here
@@ -376,8 +365,8 @@ for bulk uploads!
 </div>
 </div>
 
-<div id="outline-container-org828e164" class="outline-2">
-<h2 id="org828e164"><span class="section-number-2">6</span> How does the public sequence resource work?</h2>
+<div id="outline-container-org3eb3a4e" class="outline-2">
+<h2 id="org3eb3a4e"><span class="section-number-2">6</span> How does the public sequence resource work?</h2>
 <div class="outline-text-2" id="text-6">
 <p>
 On uploading a sequence with metadata it will automatically be
@@ -388,8 +377,8 @@ using workflows from the High Performance Open Biology Lab defined
 </div>
 </div>
 
-<div id="outline-container-org7b0d03f" class="outline-2">
-<h2 id="org7b0d03f"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2>
+<div id="outline-container-org7a397f5" class="outline-2">
+<h2 id="org7a397f5"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2>
 <div class="outline-text-2" id="text-7">
 <p>
 The Swiss Institute of Bioinformatics has included this data in
@@ -397,14 +386,18 @@ The Swiss Institute of Bioinformatics has included this data in
 </p>
 
 <p>
+The Pantograph <a href="https://graph-genome.github.io/">viewer</a> uses PubSeq data for their visualisations.
+</p>
+
+<p>
 <a href="https://uthsc.edu">UTHSC</a> and <a href="https://www.ornl.gov/news/ornl-fight-against-covid-19">ORNL</a> use COVID-19 PubSeq data for protein prediction and
 drug development.
 </p>
 </div>
 </div>
 
-<div id="outline-container-org31aaf23" class="outline-2">
-<h2 id="org31aaf23"><span class="section-number-2">8</span> Is this about open data?</h2>
+<div id="outline-container-org92cb008" class="outline-2">
+<h2 id="org92cb008"><span class="section-number-2">8</span> Is this about open data?</h2>
 <div class="outline-text-2" id="text-8">
 <p>
 All data is published under a <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 attribution license</a>
@@ -414,8 +407,8 @@ data and store it for further processing.
 </div>
 </div>
 
-<div id="outline-container-orgb376b6c" class="outline-2">
-<h2 id="orgb376b6c"><span class="section-number-2">9</span> Is this about free software?</h2>
+<div id="outline-container-org232d6fa" class="outline-2">
+<h2 id="org232d6fa"><span class="section-number-2">9</span> Is this about free software?</h2>
 <div class="outline-text-2" id="text-9">
 <p>
 Absolutely. Free software allows for fully reproducible pipelines. You
@@ -424,8 +417,8 @@ can take our workflows and data and run it elsewhere!
 </div>
 </div>
 
-<div id="outline-container-orgf19cd96" class="outline-2">
-<h2 id="orgf19cd96"><span class="section-number-2">10</span> How do I upload raw data?</h2>
+<div id="outline-container-orgd93869f" class="outline-2">
+<h2 id="orgd93869f"><span class="section-number-2">10</span> How do I upload raw data?</h2>
 <div class="outline-text-2" id="text-10">
 <p>
 We are preparing raw sequence data pipelines (fastq and BAM). The
@@ -440,8 +433,8 @@ assembly variations into consideration. This is all work in progress.
 </div>
 </div>
 
-<div id="outline-container-orgebfed00" class="outline-2">
-<h2 id="orgebfed00"><span class="section-number-2">11</span> How do I change metadata?</h2>
+<div id="outline-container-org88e8b0a" class="outline-2">
+<h2 id="org88e8b0a"><span class="section-number-2">11</span> How do I change metadata?</h2>
 <div class="outline-text-2" id="text-11">
 <p>
 See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>!
@@ -449,8 +442,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork
 </div>
 </div>
 
-<div id="outline-container-orge2aecf8" class="outline-2">
-<h2 id="orge2aecf8"><span class="section-number-2">12</span> How do I change the work flows?</h2>
+<div id="outline-container-orgd04b8f8" class="outline-2">
+<h2 id="orgd04b8f8"><span class="section-number-2">12</span> How do I change the work flows?</h2>
 <div class="outline-text-2" id="text-12">
 <p>
 See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>!
@@ -458,8 +451,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork
 </div>
 </div>
 
-<div id="outline-container-orgd45b3bc" class="outline-2">
-<h2 id="orgd45b3bc"><span class="section-number-2">13</span> How do I change the source code?</h2>
+<div id="outline-container-org5d1ee05" class="outline-2">
+<h2 id="org5d1ee05"><span class="section-number-2">13</span> How do I change the source code?</h2>
 <div class="outline-text-2" id="text-13">
 <p>
 Go to our <a href="https://github.com/arvados/bh20-seq-resource">source code repositories</a>, fork/clone the repository, change
@@ -469,8 +462,8 @@ many PRs we already merged.
 </div>
 </div>
 
-<div id="outline-container-org2bb9455" class="outline-2">
-<h2 id="org2bb9455"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2>
+<div id="outline-container-orgae6461b" class="outline-2">
+<h2 id="orgae6461b"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2>
 <div class="outline-text-2" id="text-14">
 <p>
 Restrictive data licenses are hampering data sharing and reproducible
@@ -486,8 +479,8 @@ In all honesty: we prefer both data and software to be free.
 </div>
 </div>
 
-<div id="outline-container-org62bf23f" class="outline-2">
-<h2 id="org62bf23f"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2>
+<div id="outline-container-org3ea90a9" class="outline-2">
+<h2 id="org3ea90a9"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2>
 <div class="outline-text-2" id="text-15">
 <p>
 A public sequence resource is about public data. Metadata can refer to
@@ -498,8 +491,8 @@ plan to combine identifiers with clinical data stored securely at
 </div>
 </div>
 
-<div id="outline-container-org40c6da0" class="outline-2">
-<h2 id="org40c6da0"><span class="section-number-2">16</span> How do I communicate with you?</h2>
+<div id="outline-container-org7ff7106" class="outline-2">
+<h2 id="org7ff7106"><span class="section-number-2">16</span> How do I communicate with you?</h2>
 <div class="outline-text-2" id="text-16">
 <p>
 We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&amp;utm_medium=link&amp;utm_campaign=share-link">gitter channel</a> you can join.
@@ -507,8 +500,8 @@ We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&amp;utm
 </div>
 </div>
 
-<div id="outline-container-org1f27c44" class="outline-2">
-<h2 id="org1f27c44"><span class="section-number-2">17</span> Who are the sponsors?</h2>
+<div id="outline-container-org9566fa7" class="outline-2">
+<h2 id="org9566fa7"><span class="section-number-2">17</span> Who are the sponsors?</h2>
 <div class="outline-text-2" id="text-17">
 <p>
 The main sponsors are listed in the footer. In addition to the time
@@ -519,7 +512,7 @@ for donating COVID-19 related compute time.
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-29 Fri 08:26</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 04:54</small>.
 </div>
 </body>
 </html>
diff --git a/doc/web/about.org b/doc/web/about.org
index b6387e7..ad13bc3 100644
--- a/doc/web/about.org
+++ b/doc/web/about.org
@@ -33,9 +33,10 @@ The *public sequence resource* is an initiative by [[https://github.com/arvados/
 ontology experts who want to create something agile and useful for the
 wider research community. The initiative started at the COVID-19
 biohackathon in April 2020 and is ongoing. The main project drivers
-are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common
-Workflow Language), Thomas Liener (consultant, formerly EBI) and
-Jerven Bolleman (Swiss Institute of Bioinformatics).
+are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino
+(University of Rome Tor Vergata), Michael Crusoe (Common Workflow
+Language), Thomas Liener (consultant, formerly EBI), Erik Garrison
+(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics).
 
 Notably, as this is a free software initiative, the project represents
 major work by hundreds of software developers and ontology and data
@@ -48,11 +49,12 @@ bioinformatics using agile methods. Unlike the resources from large
 institutes we can improve things on a dime and anyone can contribute
 to building out this resource!
 
-Importantly: all data is published under the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons 4.0
-attribution license]] which means it data can be published and workflows
-can run in public environments allowing for improved access for
-research and reproducible results. This contrasts with some other
-public resources, including GISAID.
+Importantly: all data is published under either the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons
+4.0 attribution license]] or the [[https://creativecommons.org/share-your-work/public-domain/cc0/][CC0 “No Rights Reserved” license]] which
+means it data can be published and workflows can run in public
+environments allowing for improved access for research and
+reproducible results. This contrasts with some other public resources,
+including GISAID.
 
 * Why should I upload my data here?
 
@@ -105,6 +107,8 @@ using workflows from the High Performance Open Biology Lab defined
 The Swiss Institute of Bioinformatics has included this data in
 https://covid-19-sparql.expasy.org/ and made it part of [[https://www.uniprot.org/][Uniprot]].
 
+The Pantograph [[https://graph-genome.github.io/][viewer]] uses PubSeq data for their visualisations.
+
 [[https://uthsc.edu][UTHSC]] and [[https://www.ornl.gov/news/ornl-fight-against-covid-19][ORNL]] use COVID-19 PubSeq data for protein prediction and
 drug development.
 
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
index 432877c..54736f8 100644
--- a/example/maximum_metadata_example.yaml
+++ b/example/maximum_metadata_example.yaml
@@ -1,5 +1,11 @@
 id: placeholder
 
+license:
+    license_type: http://creativecommons.org/licenses/by/4.0/
+    title: "Sample"
+    attribution_name: "John doe, Joe Boe, Jonny Oe"
+    attribution_url: http://covid19.genenetwork.org/id
+
 host:
     host_id: XX1
     host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
@@ -13,9 +19,9 @@ host:
     additional_host_information: Optional free text field for additional information
 
 sample:
-    sample_id: Id of the sample as defined by the submitter 
+    sample_id: Id of the sample as defined by the submitter
     collector_name: Name of the person that took the sample
-    collecting_institution: Institute that was responsible of sampling  
+    collecting_institution: Institute that was responsible of sampling
     specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
     collection_date: "2020-01-01"
     collection_location: http://www.wikidata.org/entity/Q148
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
index 51f8a87..1b46cc7 100644
--- a/example/minimal_metadata_example.yaml
+++ b/example/minimal_metadata_example.yaml
@@ -1,5 +1,9 @@
 id: placeholder
 
+
+license:
+    license_type: http://creativecommons.org/licenses/by/4.0/
+
 host:
     host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
 
@@ -15,4 +19,4 @@ technology:
     sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632]
 
 submitter:
-    authors: [John Doe]
\ No newline at end of file
+    authors: [John Doe]