From 9c9512a7e040f8247d259bdc6f9cf55d5d276baf Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 12:48:12 +0100
Subject: Load metadata locally without pkg_resources

---
 doc/blog/using-covid-19-pubseq-part5.org | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'doc/blog')

diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index 4b0ea64..aa06d5e 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -13,6 +13,7 @@
  - [[#what-is-the-schema][What is the schema?]]
  - [[#how-is-the-website-generated][How is the website generated?]]
  - [[#modifying-the-schema][Modifying the schema]]
+ - [[#adding-fields-to-the-form][Adding fields to the form]]
 
 * Modify Metadata
 
@@ -113,8 +114,15 @@ So, we'll add it simply as a title field. Now the draft schema is
           _id: https://creativecommons.org/ns#Work
 #+END_SRC
 
-Now, we are no ontology experts, right? So, next we submit a patch to our source tree and
-ask for feedback before wiring it up in the data entry form. The pull request was
-submitted here FIXME.
+Now, we are no ontology experts, right? So, next we submit a patch to
+our source tree and ask for feedback before wiring it up in the data
+entry form. The pull request was submitted [[https://github.com/arvados/bh20-seq-resource/pull/97][here]] and reviewed on the
+gitter channel and I merged it.
+
+* Adding fields to the form
+
+To add the new fields to the form we have to modify it a little. If we
+go to the upload form we need to add the license box. The schema is
+loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
 
 /Note: work in progress/
-- 
cgit 1.4.1


From f4ed46dae20abe5147871495ede2d6ac2b0854bc Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:30:56 +0100
Subject: Add RDF output

---
 bh20sequploader/bh20seq-schema.yml       |  9 +++++++--
 bh20sequploader/bh20seq-shex.rdf         | 24 +++++++++++++++++-------
 doc/blog/using-covid-19-pubseq-part5.org |  2 ++
 3 files changed, 26 insertions(+), 9 deletions(-)

(limited to 'doc/blog')

diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 29ac22c..c690e8a 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -23,16 +23,21 @@ $graph:
       type: string?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_001167
+    attribution_name:
+      doc: Attribution NAME related to data license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#attributionName
     attribution_url:
       doc: Attribution URL related to data license
       type: string?
       jsonldPredicate:
-          _id: https://creativecommons.org/ns#Work
+          _id: https://creativecommons.org/ns#attributionURL
     attribution_source:
       doc: Attribution source URL related to data license
       type: string?
       jsonldPredicate:
-          _id: https://creativecommons.org/ns#Work
+          _id: https://creativecommons.org/ns#attributionSource
 
 - name: hostSchema
   type: record
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 965229c..c48267d 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -1,6 +1,7 @@
 PREFIX : <https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#>
 PREFIX MainSchema: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
 PREFIX hostSchema: <http://biohackathon.org/bh20-seq-schema#hostSchema/>
+PREFIX cc:  <http://creativecommons.org/ns#>
 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 PREFIX obo: <http://purl.obolibrary.org/obo/>
 PREFIX sio: <http://semanticscience.org/resource/>
@@ -15,10 +16,11 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
   MainSchema:submitter @:submitterShape ;
   MainSchema:technology @:technologyShape ;
   MainSchema:virus @:virusShape;
+  MainSchema:license @:licenseShape;
 }
 
 :hostShape  {
-  	efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
+    efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
     sio:SIO_000115 xsd:string ?;
     obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 obo:PATO_0001340] ?;
     obo:PATO_0000011 xsd:integer ?;
@@ -32,14 +34,14 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 
 :sampleShape  {
     sio:SIO_000115 xsd:string;
-	  evs:C25164 xsd:string;
-	  obo:GAZ_00000448 [wikidata:~] ;
+    evs:C25164 xsd:string;
+    obo:GAZ_00000448 [wikidata:~] ;
     obo:OBI_0001895 xsd:string ?;
     obo:NCIT_C41206 xsd:string ?;
     obo:OBI_0001479 IRI {0,2};
     obo:OBI_0001472 xsd:string ?;
     sio:SIO_001167 xsd:string ?;
-	edam:data_2091 IRI {0,3};
+    edam:data_2091 IRI {0,3};
 }
 
 :submitterShape {
@@ -47,7 +49,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
     sio:SIO_000116 xsd:string *;
     sio:SIO_000172 xsd:string ?;
     obo:NCIT_C37984 xsd:string ?;
-  	obo:NCIT_C37900 xsd:string ?;
+    obo:NCIT_C37900 xsd:string ?;
     efo:EFO_0001741 xsd:string ?;
     obo:NCIT_C42781 xsd:string ?;
     obo:NCIT_C19026 xsd:string ?;
@@ -63,6 +65,14 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 }
 
 :virusShape{
-	edam:data_1875 [ obo:NCBITaxon_~ ] ;
-  	sio:SIO_010055 xsd:string ?;
+    edam:data_1875 [ obo:NCBITaxon_~ ] ;
+    sio:SIO_010055 xsd:string ?;
 }
+
+:licenseShape{
+    cc:License xsd:string;
+    sio:SIO_001167 xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
\ No newline at end of file
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index aa06d5e..cb11f43 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -125,4 +125,6 @@ To add the new fields to the form we have to modify it a little. If we
 go to the upload form we need to add the license box. The schema is
 loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
 
+With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
+
 /Note: work in progress/
-- 
cgit 1.4.1


From 712614e5627e54df7ec6ab975dc86a1055051455 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:54:59 +0100
Subject: License RDF

---
 bh20sequploader/bh20seq-schema.yml       |  3 ++-
 bh20sequploader/bh20seq-shex.rdf         |  3 ++-
 doc/blog/using-covid-19-pubseq-part5.org | 29 +++++++++++++++++++++++------
 3 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'doc/blog')

diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index c690e8a..ef55c55 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -1,6 +1,7 @@
 $base: http://biohackathon.org/bh20-seq-schema
 $namespaces:
   cc:  http://creativecommons.org/ns#
+  dc:  http://purl.org/metadata/dublin_core_elements#
   sch: https://schema.org/
   efo: http://www.ebi.ac.uk/efo/
   obo: http://purl.obolibrary.org/obo/
@@ -22,7 +23,7 @@ $graph:
       doc: Attribution title related to data license
       type: string?
       jsonldPredicate:
-          _id: http://semanticscience.org/resource/SIO_001167
+          _id: http://purl.org/metadata/dublin_core_elements#Title
     attribution_name:
       doc: Attribution NAME related to data license
       type: string?
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index c48267d..9fab334 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -2,6 +2,7 @@ PREFIX : <https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh2
 PREFIX MainSchema: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
 PREFIX hostSchema: <http://biohackathon.org/bh20-seq-schema#hostSchema/>
 PREFIX cc:  <http://creativecommons.org/ns#>
+PREFIX dc:  <http://purl.org/metadata/dublin_core_elements#>
 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 PREFIX obo: <http://purl.obolibrary.org/obo/>
 PREFIX sio: <http://semanticscience.org/resource/>
@@ -71,7 +72,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 
 :licenseShape{
     cc:License xsd:string;
-    sio:SIO_001167 xsd:string ?;
+    dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
     cc:attributionSource xsd:string ?;
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index cb11f43..98c2c31 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -14,19 +14,20 @@
  - [[#how-is-the-website-generated][How is the website generated?]]
  - [[#modifying-the-schema][Modifying the schema]]
  - [[#adding-fields-to-the-form][Adding fields to the form]]
+ - [[#testing-the-license-fields][Testing the license fields]]
 
 * Modify Metadata
 
 The public sequence resource uses multiple data formats listed on the
-[[./download][DOWNLOAD]] page. One of the most exciting features is the full support
+[[http://covid19.genenetwork.org/download][download]] page. One of the most exciting features is the full support
 for RDF and semantic web/linked data ontologies. This technology
 allows for querying data in unprescribed ways - that is, you can
 formulate your own queries without dealing with a preset model of that
 data (so typical of CSV files and SQL tables). Examples of exploring
-data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
+data are listed [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part1][here]].
 
 In this BLOG we are going to look at the metadata entered on the
-[[./][COVID-19 PubSeq]] website (or command line client). It is important to
+COVID-19 PubSeq website (or command line client). It is important to
 understand that anyone, including you, can change that information!
 
 * What is the schema?
@@ -42,8 +43,8 @@ All from that one metadata schema.
 * Modifying the schema
 
 One of the first things we want to do is to add a field for the data
-license. Initially we only support CC-4.0 as a license by default, but
-now we want to give uploaders the option to make it an even more
+license. Initially we only supported CC-4.0 as a license, but
+we wanted to give uploaders the option to use an even more
 liberal CC0 license. The first step is to find a good ontology term
 for the field. Searching for `creative commons cc0 rdf' rendered this
 useful [[https://creativecommons.org/ns][page]].  We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI
@@ -127,4 +128,20 @@ loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e
 
 With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
 
-/Note: work in progress/
+Finally, to make RDF output work we need to add expressions to bh20seq-shex.rdf. This
+was done with this [[https://github.com/arvados/bh20-seq-resource/commit/f4ed46dae20abe5147871495ede2d6ac2b0854bc][patch]]. In the end we decided to use the Dublin core title,
+http://purl.org/metadata/dublin_core_elements#Title:
+
+#+BEGIN_SRC js
+:licenseShape{
+    cc:License xsd:string;
+    dc:Title xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
+#+END_SRC
+
+Note that cc:AttributionSource is not really defined in the cc standard.
+
+* TODO Testing the license fields
-- 
cgit 1.4.1


From 73be46fd1db58f132fa60ff30d33d67927a341a7 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 16 Jul 2020 09:48:31 +0100
Subject: Addes ESR logo and cropped CWL logo

---
 bh20simplewebuploader/static/image/CWL.png  | Bin 0 -> 11066 bytes
 bh20simplewebuploader/static/image/ESR.png  | Bin 0 -> 67869 bytes
 bh20simplewebuploader/templates/footer.html |   6 +++++-
 doc/blog/using-covid-19-pubseq-part5.org    |   4 ++++
 4 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 bh20simplewebuploader/static/image/CWL.png
 create mode 100644 bh20simplewebuploader/static/image/ESR.png

(limited to 'doc/blog')

diff --git a/bh20simplewebuploader/static/image/CWL.png b/bh20simplewebuploader/static/image/CWL.png
new file mode 100644
index 0000000..81d1807
Binary files /dev/null and b/bh20simplewebuploader/static/image/CWL.png differ
diff --git a/bh20simplewebuploader/static/image/ESR.png b/bh20simplewebuploader/static/image/ESR.png
new file mode 100644
index 0000000..557c798
Binary files /dev/null and b/bh20simplewebuploader/static/image/ESR.png differ
diff --git a/bh20simplewebuploader/templates/footer.html b/bh20simplewebuploader/templates/footer.html
index a1dd4fd..37a6b64 100644
--- a/bh20simplewebuploader/templates/footer.html
+++ b/bh20simplewebuploader/templates/footer.html
@@ -21,7 +21,7 @@
           <img src="static/image/covid19biohackathon.png"></a>
       </div>
       <div class="sponsorimg">
-        <a href="https://www.commonwl.org/"><img src="static/image/CWL-Logo-Header.png"></a>
+        <a href="https://www.commonwl.org/"><img src="static/image/CWL.png"></a>
       </div>
       <div class="sponsorimg">
         <a href="https://arvados.org/"><img src="static/image/arvados-logo.png"></a>
@@ -29,6 +29,10 @@
       <div class="sponsorimg">
         <a href="https://uthsc.edu/"><img src="static/image/UTHSC-primary-stacked-logo-4c.png"></a>
       </div>
+      <div class="sponsorimg">
+        <a href="https://www.esr.cri.nz/"><img src="static/image/ESR.png"></a>
+      </div>
+
     </div>
   </div>
   <div class="footer">
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index 98c2c31..78eea66 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -144,4 +144,8 @@ http://purl.org/metadata/dublin_core_elements#Title:
 
 Note that cc:AttributionSource is not really defined in the cc standard.
 
+When pushing the license info we discovered the workflow broke because
+the existing data had no licensing info. So we changed the license
+field to be optional - a missing license assumes it is CC-BY-4.0.
+
 * TODO Testing the license fields
-- 
cgit 1.4.1


From 0a94b15d1bb84042f5e136fd59974de41192b68d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 16 Jul 2020 13:00:24 +0100
Subject: Blog: workflows

---
 doc/blog/using-covid-19-pubseq-part4.org | 6 ++++++
 doc/web/about.org                        | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'doc/blog')

diff --git a/doc/blog/using-covid-19-pubseq-part4.org b/doc/blog/using-covid-19-pubseq-part4.org
index 5fe71d1..8ad5e2d 100644
--- a/doc/blog/using-covid-19-pubseq-part4.org
+++ b/doc/blog/using-covid-19-pubseq-part4.org
@@ -10,6 +10,7 @@
 
 * Table of Contents                                                     :TOC:noexport:
  - [[#what-does-this-mean][What does this mean?]]
+ - [[#where-can-i-find-the-workflows][Where can I find the workflows?]]
  - [[#modify-workflow][Modify Workflow]]
 
 * What does this mean?
@@ -18,6 +19,11 @@ This means that when someone uploads a SARS-CoV-2 sequence using one
 of our tools (CLI or web-based) they add a sequence and some metadata
 which triggers a rerun of our workflows.
 
+* Where can I find the workflows?
+
+Workflows are written in the common workflow language (CWL) and listed
+on [[https://github.com/arvados/bh20-seq-resource/tree/master/workflows][github]]. PubSeq being an open project these workflows can be studied
+and modified!
 
 * Modify Workflow
 
diff --git a/doc/web/about.org b/doc/web/about.org
index ad13bc3..1949e2d 100644
--- a/doc/web/about.org
+++ b/doc/web/about.org
@@ -140,7 +140,8 @@ See the [[http://covid19.genenetwork.org/blog]]!
 
 * How do I change the work flows?
 
-See the [[http://covid19.genenetwork.org/blog]]!
+Workflows are on [[https://github.com/arvados/bh20-seq-resource/tree/master/workflows][github]] and can be modified. See also the
+[[[[http://covid19.genenetwork.org/blog]]][workflow blog]].
 
 * How do I change the source code?
 
-- 
cgit 1.4.1


From c69046ee9a5e24eadcd8cb885633328b0fd88011 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 17 Jul 2020 11:06:33 +0100
Subject: Update generated docs

---
 doc/blog/using-covid-19-pubseq-part1.html | 192 +++++++++++++++--------------
 doc/blog/using-covid-19-pubseq-part4.html |  44 +++++--
 doc/blog/using-covid-19-pubseq-part5.html | 194 ++++++++++++++++++++++++++----
 3 files changed, 305 insertions(+), 125 deletions(-)

(limited to 'doc/blog')

diff --git a/doc/blog/using-covid-19-pubseq-part1.html b/doc/blog/using-covid-19-pubseq-part1.html
index 1959fac..0e6136c 100644
--- a/doc/blog/using-covid-19-pubseq-part1.html
+++ b/doc/blog/using-covid-19-pubseq-part1.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-05-29 Fri 12:06 -->
+<!-- 2020-07-17 Fri 05:05 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 1)</title>
@@ -248,20 +248,20 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org9afe6ab">1. What does this mean?</a></li>
-<li><a href="#orgf4bc3d4">2. Fetch sequence data</a></li>
-<li><a href="#org9d7d482">3. Predicates</a></li>
-<li><a href="#orgc6046bb">4. Fetch submitter info and other metadata</a></li>
-<li><a href="#orgdcb216b">5. Fetch all sequences from Washington state</a></li>
-<li><a href="#org7060f51">6. Discussion</a></li>
-<li><a href="#orgdc51ccc">7. Acknowledgements</a></li>
+<li><a href="#org0db5db0">1. What does this mean?</a></li>
+<li><a href="#orge5267fd">2. Fetch sequence data</a></li>
+<li><a href="#orgfbd3adc">3. Predicates</a></li>
+<li><a href="#org08e70e1">4. Fetch submitter info and other metadata</a></li>
+<li><a href="#org9194557">5. Fetch all sequences from Washington state</a></li>
+<li><a href="#org76317ad">6. Discussion</a></li>
+<li><a href="#orgeb871a1">7. Acknowledgements</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org9afe6ab" class="outline-2">
-<h2 id="org9afe6ab"><span class="section-number-2">1</span> What does this mean?</h2>
+<div id="outline-container-org0db5db0" class="outline-2">
+<h2 id="org0db5db0"><span class="section-number-2">1</span> What does this mean?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 This means that when someone uploads a SARS-CoV-2 sequence using one
@@ -274,24 +274,24 @@ expressed in a <a href="https://github.com/arvados/bh20-seq-resource/blob/master
   type: record
   fields:
     host_species:
-        doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_<span style="color: #8bc34a;">9606</span> for Homo sapiens
+        doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_9606 for Homo sapiens
         type: string
         jsonldPredicate:
-          _id: http://www.ebi.ac.uk/efo/EFO_<span style="color: #8bc34a;">0000532</span>
-          _type: <span style="color: #9ccc65;">"@id"</span>
-          noLinkCheck: <span style="color: #8bc34a;">true</span>
+          _id: http://www.ebi.ac.uk/efo/EFO_0000532
+          _type: "@id"
+          noLinkCheck: true
     host_sex:
-        doc: Sex of the host as defined in PATO, expect male <span style="color: #e91e63;">()</span> or female <span style="color: #e91e63;">()</span>
+        doc: Sex of the host as defined in PATO, expect male () or female ()
         type: string?
         jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/PATO_<span style="color: #8bc34a;">0000047</span>
-          _type: <span style="color: #9ccc65;">"@id"</span>
-          noLinkCheck: <span style="color: #8bc34a;">true</span>
+          _id: http://purl.obolibrary.org/obo/PATO_0000047
+          _type: "@id"
+          noLinkCheck: true
     host_age:
-        doc: Age of the host as number <span style="color: #e91e63;">(</span>e.g. <span style="color: #8bc34a;">50</span><span style="color: #e91e63;">)</span>
+        doc: Age of the host as number (e.g. 50)
         type: int?
         jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/PATO_<span style="color: #8bc34a;">0000011</span>
+          _id: http://purl.obolibrary.org/obo/PATO_0000011
 </pre>
 </div>
 
@@ -314,8 +314,8 @@ initiative!
 </div>
 
 
-<div id="outline-container-orgf4bc3d4" class="outline-2">
-<h2 id="orgf4bc3d4"><span class="section-number-2">2</span> Fetch sequence data</h2>
+<div id="outline-container-orge5267fd" class="outline-2">
+<h2 id="orge5267fd"><span class="section-number-2">2</span> Fetch sequence data</h2>
 <div class="outline-text-2" id="text-2">
 <p>
 The latest run of the pipeline can be viewed <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">here</a>. Each of these
@@ -339,8 +339,8 @@ these identifiers throughout.
 </div>
 </div>
 
-<div id="outline-container-org9d7d482" class="outline-2">
-<h2 id="org9d7d482"><span class="section-number-2">3</span> Predicates</h2>
+<div id="outline-container-orgfbd3adc" class="outline-2">
+<h2 id="orgfbd3adc"><span class="section-number-2">3</span> Predicates</h2>
 <div class="outline-text-2" id="text-3">
 <p>
 To explore an RDF dataset, the first query we can do is open and gets
@@ -350,10 +350,10 @@ the following in a SPARQL end point
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?p
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">select distinct ?p
+{
    ?o ?p ?s
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -364,10 +364,10 @@ To get a <a href="http://sparql.genenetwork.org/sparql/?default-graph-uri=&amp;q
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?g
-<span style="color: #e91e63;">{</span>
-    GRAPH ?g <span style="color: #2196F3;">{</span>?s ?p ?o<span style="color: #2196F3;">}</span>
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select distinct ?g
+{
+    GRAPH ?g {?s ?p ?o}
+}
 </pre>
 </div>
 
@@ -383,10 +383,10 @@ To list all submitters, try
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?s
-<span style="color: #e91e63;">{</span>
-   ?o <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">#MainSchema/submitter&gt;</a> ?s
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select distinct ?s
+{
+   ?o &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/submitter&gt; ?s
+}
 </pre>
 </div>
 
@@ -397,11 +397,11 @@ and by
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?s
-<span style="color: #e91e63;">{</span>
-   ?o <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">#MainSchema/submitter&gt;</a> ?id .
+<pre class="src src-sql">select distinct ?s
+{
+   ?o &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/submitter&gt; ?id .
    ?id ?p ?s
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -415,12 +415,12 @@ To lift the full URL out of the query you can use a header like
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -438,32 +438,32 @@ Now we got this far, lets <a href="http://sparql.genenetwork.org/sparql/?default
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #e91e63;">(</span><span style="color: #ff8A65;">COUNT</span><span style="color: #2196F3;">(</span><span style="color: #fff59d;">distinct</span> ?dataset<span style="color: #2196F3;">)</span> <span style="color: #fff59d;">as</span> ?num<span style="color: #e91e63;">)</span>
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select (COUNT(distinct ?dataset) as ?num)
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 </div>
 </div>
 
 
-<div id="outline-container-orgc6046bb" class="outline-2">
-<h2 id="orgc6046bb"><span class="section-number-2">4</span> Fetch submitter info and other metadata</h2>
+<div id="outline-container-org08e70e1" class="outline-2">
+<h2 id="org08e70e1"><span class="section-number-2">4</span> Fetch submitter info and other metadata</h2>
 <div class="outline-text-2" id="text-4">
 <p>
 To get dataests with submitters we can do the above
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?p ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?p ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -480,13 +480,13 @@ Let's focus on one sample with
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER<span style="color: #2196F3;">(</span><span style="color: #fff59d;">CONTAINS</span><span style="color: #EF6C00;">(</span>?submitter,"Roychoudhury"<span style="color: #EF6C00;">)</span><span style="color: #2196F3;">)</span> .
-<span style="color: #e91e63;">}</span>
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+}
 </pre>
 </div>
 
@@ -496,12 +496,12 @@ see if we can get a sample ID by listing sample predicates
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?p
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?p
+{
    ?dataset ?p ?o .
    ?dataset pubseq:submitter ?id .
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -513,15 +513,15 @@ Let's zoom in on those of Roychoudhury with
 
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?sid ?sample ?p1 ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?sid ?sample ?p1 ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER<span style="color: #2196F3;">(</span><span style="color: #fff59d;">CONTAINS</span><span style="color: #EF6C00;">(</span>?submitter,"Roychoudhury"<span style="color: #EF6C00;">)</span><span style="color: #2196F3;">)</span> .
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
    ?dataset pubseq:sample ?sid .
    ?sid ?p1 ?sample
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -532,18 +532,13 @@ this database. Let's focus on one sample "MT326090.1" with predicate
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">#MainSchema/&gt;
-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">PREFIX</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/"> sio: &lt;http://semanticscience.org/resource/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?sample ?p ?o
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+PREFIX sio: &lt;http://semanticscience.org/resource/&gt;
+select distinct ?sample ?p ?o
+{
    ?sample sio:SIO_000115 "MT326090.1" .
    ?sample ?p ?o .
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -561,8 +556,8 @@ to view/query the database.
 </div>
 </div>
 
-<div id="outline-container-orgdcb216b" class="outline-2">
-<h2 id="orgdcb216b"><span class="section-number-2">5</span> Fetch all sequences from Washington state</h2>
+<div id="outline-container-org9194557" class="outline-2">
+<h2 id="org9194557"><span class="section-number-2">5</span> Fetch all sequences from Washington state</h2>
 <div class="outline-text-2" id="text-5">
 <p>
 Now we know how to get at the origin we can do it the other way round
@@ -570,15 +565,11 @@ and fetch all sequences referring to Washington state
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql">
-<span style="color: #fff59d;">select</span> ?seq ?sample
-<span style="color: #e91e63;">{</span>
-    ?seq <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223&gt;</a>
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select ?seq ?sample
+{
+    ?seq &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
+    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223&gt;
+}
 </pre>
 </div>
 
@@ -586,11 +577,26 @@ and fetch all sequences referring to Washington state
 which lists 300 sequences originating from Washington state! Which is almost
 half of the set coming out of GenBank.
 </p>
+
+<p>
+Likewise to list all sequences from Turkey we can find the wikidata
+entity is <a href="https://www.wikidata.org/wiki/Q43">Q43</a>:
+</p>
+
+<div class="org-src-container">
+<pre class="src src-sql">select ?seq ?sample
+{
+    ?seq &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
+    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q43&gt;
+}
+</pre>
 </div>
 </div>
+</div>
+
 
-<div id="outline-container-org7060f51" class="outline-2">
-<h2 id="org7060f51"><span class="section-number-2">6</span> Discussion</h2>
+<div id="outline-container-org76317ad" class="outline-2">
+<h2 id="org76317ad"><span class="section-number-2">6</span> Discussion</h2>
 <div class="outline-text-2" id="text-6">
 <p>
 The public sequence uploader collects sequences, raw data and
@@ -601,8 +607,8 @@ referenced in publications and origins are citeable.
 </div>
 </div>
 
-<div id="outline-container-orgdc51ccc" class="outline-2">
-<h2 id="orgdc51ccc"><span class="section-number-2">7</span> Acknowledgements</h2>
+<div id="outline-container-orgeb871a1" class="outline-2">
+<h2 id="orgeb871a1"><span class="section-number-2">7</span> Acknowledgements</h2>
 <div class="outline-text-2" id="text-7">
 <p>
 The overall effort was due to magnificent freely donated input by a
@@ -617,7 +623,7 @@ Garrison this initiative would not have existed!
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-29 Fri 12:06</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 05:02</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html
index b5a05ca..c975c21 100644
--- a/doc/blog/using-covid-19-pubseq-part4.html
+++ b/doc/blog/using-covid-19-pubseq-part4.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-07-12 Sun 06:24 -->
+<!-- 2020-07-17 Fri 05:04 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 4)</title>
@@ -161,6 +161,19 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
+  .equation-container {
+    display: table;
+    text-align: center;
+    width: 100%;
+  }
+  .equation {
+    vertical-align: middle;
+  }
+  .equation-label {
+    display: table-cell;
+    text-align: right;
+    vertical-align: middle;
+  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -186,7 +199,7 @@
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2018 Free Software Foundation, Inc.
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -235,15 +248,16 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org8f8b64a">1. What does this mean?</a></li>
-<li><a href="#orgcc7a403">2. Modify Workflow</a></li>
+<li><a href="#orgc2ee09f">1. What does this mean?</a></li>
+<li><a href="#org0d37881">2. Where can I find the workflows?</a></li>
+<li><a href="#orgddb0531">3. Modify Workflow</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org8f8b64a" class="outline-2">
-<h2 id="org8f8b64a"><span class="section-number-2">1</span> What does this mean?</h2>
+<div id="outline-container-orgc2ee09f" class="outline-2">
+<h2 id="orgc2ee09f"><span class="section-number-2">1</span> What does this mean?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 This means that when someone uploads a SARS-CoV-2 sequence using one
@@ -253,18 +267,28 @@ which triggers a rerun of our workflows.
 </div>
 </div>
 
-
-<div id="outline-container-orgcc7a403" class="outline-2">
-<h2 id="orgcc7a403"><span class="section-number-2">2</span> Modify Workflow</h2>
+<div id="outline-container-org0d37881" class="outline-2">
+<h2 id="org0d37881"><span class="section-number-2">2</span> Where can I find the workflows?</h2>
 <div class="outline-text-2" id="text-2">
 <p>
+Workflows are written in the common workflow language (CWL) and listed
+on <a href="https://github.com/arvados/bh20-seq-resource/tree/master/workflows">github</a>. PubSeq being an open project these workflows can be studied
+and modified!
+</p>
+</div>
+</div>
+
+<div id="outline-container-orgddb0531" class="outline-2">
+<h2 id="orgddb0531"><span class="section-number-2">3</span> Modify Workflow</h2>
+<div class="outline-text-2" id="text-3">
+<p>
 <i>Work in progress!</i>
 </p>
 </div>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 01:47</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html
index 80bf559..4caa5ac 100644
--- a/doc/blog/using-covid-19-pubseq-part5.html
+++ b/doc/blog/using-covid-19-pubseq-part5.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-07-12 Sun 06:24 -->
+<!-- 2020-07-17 Fri 05:03 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 4)</title>
@@ -161,6 +161,19 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
+  .equation-container {
+    display: table;
+    text-align: center;
+    width: 100%;
+  }
+  .equation {
+    vertical-align: middle;
+  }
+  .equation-label {
+    display: table-cell;
+    text-align: right;
+    vertical-align: middle;
+  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -186,7 +199,7 @@
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2018 Free Software Foundation, Inc.
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -235,38 +248,40 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org871ad58">1. Modify Metadata</a></li>
-<li><a href="#org07e8755">2. What is the schema?</a></li>
-<li><a href="#org4857280">3. How is the website generated?</a></li>
-<li><a href="#orge709ae2">4. Modifying the schema</a></li>
+<li><a href="#org758b923">1. Modify Metadata</a></li>
+<li><a href="#orgec32c13">2. What is the schema?</a></li>
+<li><a href="#org2e487b2">3. How is the website generated?</a></li>
+<li><a href="#orge4dfe84">4. Modifying the schema</a></li>
+<li><a href="#org564a7a8">5. Adding fields to the form</a></li>
+<li><a href="#org633781a">6. <span class="todo TODO">TODO</span> Testing the license fields</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org871ad58" class="outline-2">
-<h2 id="org871ad58"><span class="section-number-2">1</span> Modify Metadata</h2>
+<div id="outline-container-org758b923" class="outline-2">
+<h2 id="org758b923"><span class="section-number-2">1</span> Modify Metadata</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 The public sequence resource uses multiple data formats listed on the
-<a href="./download">DOWNLOAD</a> page. One of the most exciting features is the full support
+<a href="http://covid19.genenetwork.org/download">download</a> page. One of the most exciting features is the full support
 for RDF and semantic web/linked data ontologies. This technology
 allows for querying data in unprescribed ways - that is, you can
 formulate your own queries without dealing with a preset model of that
 data (so typical of CSV files and SQL tables). Examples of exploring
-data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>.
+data are listed <a href="http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part1">here</a>.
 </p>
 
 <p>
 In this BLOG we are going to look at the metadata entered on the
-<a href="./">COVID-19 PubSeq</a> website (or command line client). It is important to
+COVID-19 PubSeq website (or command line client). It is important to
 understand that anyone, including you, can change that information!
 </p>
 </div>
 </div>
 
-<div id="outline-container-org07e8755" class="outline-2">
-<h2 id="org07e8755"><span class="section-number-2">2</span> What is the schema?</h2>
+<div id="outline-container-orgec32c13" class="outline-2">
+<h2 id="orgec32c13"><span class="section-number-2">2</span> What is the schema?</h2>
 <div class="outline-text-2" id="text-2">
 <p>
 The default metadata schema is listed <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">here</a>.
@@ -274,8 +289,8 @@ The default metadata schema is listed <a href="https://github.com/arvados/bh20-s
 </div>
 </div>
 
-<div id="outline-container-org4857280" class="outline-2">
-<h2 id="org4857280"><span class="section-number-2">3</span> How is the website generated?</h2>
+<div id="outline-container-org2e487b2" class="outline-2">
+<h2 id="org2e487b2"><span class="section-number-2">3</span> How is the website generated?</h2>
 <div class="outline-text-2" id="text-3">
 <p>
 Using the schema we use <a href="https://pypi.org/project/PyShEx/">pyshex</a> shex expressions and <a href="https://github.com/common-workflow-language/schema_salad">schema salad</a> to
@@ -285,13 +300,13 @@ All from that one metadata schema.
 </div>
 </div>
 
-<div id="outline-container-orge709ae2" class="outline-2">
-<h2 id="orge709ae2"><span class="section-number-2">4</span> Modifying the schema</h2>
+<div id="outline-container-orge4dfe84" class="outline-2">
+<h2 id="orge4dfe84"><span class="section-number-2">4</span> Modifying the schema</h2>
 <div class="outline-text-2" id="text-4">
 <p>
-One of the first things we wanted to do is to add a field for the data
-license. Initially we only support CC-4.0 as a license by default, but
-now we want to give uploaders the option to make it an even more
+One of the first things we want to do is to add a field for the data
+license. Initially we only supported CC-4.0 as a license, but
+we wanted to give uploaders the option to use an even more
 liberal CC0 license. The first step is to find a good ontology term
 for the field. Searching for `creative commons cc0 rdf' rendered this
 useful <a href="https://creativecommons.org/ns">page</a>.  We also find an <a href="https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview">overview</a> where CC0 is represented as URI
@@ -302,13 +317,148 @@ attributionName and attributionURL.
 </p>
 
 <p>
-<i>Note: work in progress</i>
+A minimal triple should be
+</p>
+
+<pre class="example">
+id  xhtml:license  &lt;http://creativecommons.org/licenses/by/4.0/&gt; .
+</pre>
+
+
+<p>
+Other suggestions are
+</p>
+
+<pre class="example">
+id  dc:title "Description" .
+id  cc:attributionName "Your Name" .
+id  cc:attributionURL &lt;http://resource.org/id&gt;
+</pre>
+
+
+<p>
+and 'dc:source' which indicates the original source of any modified
+work, specified as a URI.
+The prefix 'cc:' is an abbreviation for <a href="http://creativecommons.org/ns">http://creativecommons.org/ns</a>#.
+</p>
+
+<p>
+Going back to the schema, where does it fit? Under host, sample,
+virus, technology or submitter block? It could fit under sample, but
+actually the license concerns the whole metadata block and sequence,
+so I think we can fit under its own license tag. For example
+</p>
+
+
+<p>
+id: placeholder
+</p>
+
+<pre class="example">
+license:
+    license_type: http://creativecommons.org/licenses/by/4.0/
+    attribution_title: "Sample ID"
+    attribution_name: "John doe, Joe Boe, Jonny Oe"
+    attribution_url: http://covid19.genenetwork.org/id
+    attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888
+</pre>
+
+
+<p>
+So, let's update the example. Notice the license info is optional - if it is missing
+we just assume the default CC-4.0.
+</p>
+
+<p>
+One thing that is interesting is that in the name space <a href="https://creativecommons.org/ns">https://creativecommons.org/ns</a> there
+is no mention of a title. I think it is useful, however, because we have no such field.
+So, we'll add it simply as a title field. Now the draft schema is
 </p>
+
+<div class="org-src-container">
+<pre class="src src-js">- name: licenseSchema
+  type: record
+  fields:
+    license_type:
+      doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#License
+    title:
+      doc: Attribution title related to license
+      type: string?
+      jsonldPredicate:
+          _id: http://semanticscience.org/resource/SIO_001167
+    attribution_url:
+      doc: Attribution URL related to license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+    attribution_source:
+      doc: Attribution source URL
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+</pre>
+</div>
+
+<p>
+Now, we are no ontology experts, right? So, next we submit a patch to
+our source tree and ask for feedback before wiring it up in the data
+entry form. The pull request was submitted <a href="https://github.com/arvados/bh20-seq-resource/pull/97">here</a> and reviewed on the
+gitter channel and I merged it.
+</p>
+</div>
 </div>
+
+<div id="outline-container-org564a7a8" class="outline-2">
+<h2 id="org564a7a8"><span class="section-number-2">5</span> Adding fields to the form</h2>
+<div class="outline-text-2" id="text-5">
+<p>
+To add the new fields to the form we have to modify it a little. If we
+go to the upload form we need to add the license box. The schema is
+loaded in <a href="https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229">main.py</a> in the 'generate<sub>form</sub>' function.
+</p>
+
+<p>
+With this <a href="https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3">patch</a> the website adds the license input fields on the form.
+</p>
+
+<p>
+Finally, to make RDF output work we need to add expressions to bh20seq-shex.rdf. This
+was done with this <a href="https://github.com/arvados/bh20-seq-resource/commit/f4ed46dae20abe5147871495ede2d6ac2b0854bc">patch</a>. In the end we decided to use the Dublin core title,
+<a href="http://purl.org/metadata/dublin_core_elements#Title">http://purl.org/metadata/dublin_core_elements#Title</a>:
+</p>
+
+<div class="org-src-container">
+<pre class="src src-js">:licenseShape{
+    cc:License xsd:string;
+    dc:Title xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
+</pre>
+</div>
+
+<p>
+Note that cc:AttributionSource is not really defined in the cc standard.
+</p>
+
+<p>
+When pushing the license info we discovered the workflow broke because
+the existing data had no licensing info. So we changed the license
+field to be optional - a missing license assumes it is CC-BY-4.0.
+</p>
+</div>
+</div>
+
+<div id="outline-container-org633781a" class="outline-2">
+<h2 id="org633781a"><span class="section-number-2">6</span> <span class="todo TODO">TODO</span> Testing the license fields</h2>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-16 Thu 03:27</small>.
 </div>
 </body>
 </html>
-- 
cgit 1.4.1