aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bh20sequploader/bh20seq-options.yml4
-rw-r--r--bh20sequploader/bh20seq-schema.yml26
-rw-r--r--bh20simplewebuploader/main.py37
-rw-r--r--bh20simplewebuploader/static/main.js6
-rw-r--r--bh20simplewebuploader/templates/about.html13
-rw-r--r--bh20simplewebuploader/templates/blog.html13
-rw-r--r--bh20simplewebuploader/templates/demo.html6
-rw-r--r--bh20simplewebuploader/templates/download.html13
-rw-r--r--bh20simplewebuploader/templates/map.html13
-rw-r--r--doc/INSTALL.md6
-rw-r--r--doc/blog/using-covid-19-pubseq-part4.html44
-rw-r--r--doc/blog/using-covid-19-pubseq-part4.org21
-rw-r--r--doc/blog/using-covid-19-pubseq-part5.html79
-rw-r--r--doc/blog/using-covid-19-pubseq-part5.org113
-rw-r--r--doc/web/about.html143
-rw-r--r--doc/web/about.org20
-rw-r--r--example/maximum_metadata_example.yaml10
-rw-r--r--example/minimal_metadata_example.yaml6
18 files changed, 383 insertions, 190 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index c553f41..d1ea398 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -3,6 +3,10 @@
# being unique or at least using the same options in different containing
# types.
+license_type:
+ CC0 Public Domain Dedication: http://creativecommons.org/publicdomain/zero/1.0/
+ CC-BY-4.0 Attribution 4.0 International: http://creativecommons.org/licenses/by/4.0/
+
host_age_unit:
Years: http://purl.obolibrary.org/obo/UO_0000036
Months: http://purl.obolibrary.org/obo/UO_0000035
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index a8ab920..29ac22c 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -1,5 +1,6 @@
$base: http://biohackathon.org/bh20-seq-schema
$namespaces:
+ cc: http://creativecommons.org/ns#
sch: https://schema.org/
efo: http://www.ebi.ac.uk/efo/
obo: http://purl.obolibrary.org/obo/
@@ -9,6 +10,30 @@ $namespaces:
$graph:
+- name: licenseSchema
+ type: record
+ fields:
+ license_type:
+ doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+ type: string
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#License
+ title:
+ doc: Attribution title related to data license
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001167
+ attribution_url:
+ doc: Attribution URL related to data license
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#Work
+ attribution_source:
+ doc: Attribution source URL related to data license
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#Work
+
- name: hostSchema
type: record
fields:
@@ -233,6 +258,7 @@ $graph:
virus: virusSchema
technology: technologySchema
submitter: submitterSchema
+ license: licenseSchema
id:
doc: The subject (eg the fasta/fastq file) that the metadata describes
type: string
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 9132453..8a6794e 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -47,6 +47,7 @@ def type_to_heading(type_name):
Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading.
"""
+ print(type_name,file=sys.stderr)
# Remove camel case
decamel = re.sub('([A-Z])', r' \1', type_name)
# Split
@@ -227,8 +228,13 @@ def generate_form(schema, options):
# At startup, we need to load the metadata schema from the uploader module, so we can make a form for it
-METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
-METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
+if os.path.isfile("bh20sequploader/bh20seq-schema.yml"):
+ METADATA_SCHEMA = yaml.safe_load(open("bh20sequploader/bh20seq-schema.yml","r").read())
+ METADATA_OPTION_DEFINITIONS = yaml.safe_load(open("bh20sequploader/bh20seq-options.yml","r").read())
+else:
+ METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
+ METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
+# print(METADATA_SCHEMA,file=sys.stderr)
FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS)
@app.route('/')
@@ -505,7 +511,7 @@ def status_page():
Processing status
"""
- api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+ api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True)
pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", UPLOADER_PROJECT]])
out = []
status = {}
@@ -567,11 +573,34 @@ baseURL='http://sparql.genenetwork.org/sparql/'
@app.route('/api/getCount', methods=['GET'])
def getCount():
- api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN)
+ """
+ Get sequence counts from Arvados record
+ """
+ api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True)
c = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute()
return jsonify({'sequences': c["items_available"]})
+@app.route('/api/getCountDB', methods=['GET'])
+def getCountDB():
+ """
+ Get sequence counts from Virtuoso DB
+ """
+ query="""
+ PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+ select (COUNT(distinct ?dataset) as ?num)
+ {
+ ?dataset pubseq:submitter ?id .
+ ?id ?p ?submitter
+ }
+ """
+ payload = {'query': query, 'format': 'json'}
+ r = requests.get(baseURL, params=payload)
+ result = r.json()['results']['bindings']
+ # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}]
+ # print(result, file=sys.stderr)
+ return jsonify({'sequences': int(result[0]["num"]["value"])})
+
@app.route('/api/getAllaccessions', methods=['GET'])
def getAllaccessions():
query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}"""
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
index a9dfc10..4703047 100644
--- a/bh20simplewebuploader/static/main.js
+++ b/bh20simplewebuploader/static/main.js
@@ -89,6 +89,10 @@ let fetchCount = () => {
fetchAPI("/api/getCount");
}
+let fetchCountDB = () => {
+ fetchAPI("/api/getCountDB");
+}
+
let fetchSEQCountBySpecimen = () => {
fetchAPIV2("/api/getSEQCountbySpecimenSource");
}
@@ -191,7 +195,7 @@ function addField(e) {
// Increment the number and use the keypath and number to set IDs and cross
// references.
// TODO: Heavily dependent on the form field HTML. Maybe we want custom
- // elements for the labeled controlsd that know how to be list items?
+ // elements for the labeled controls that know how to be list items?
fieldNumber++
newField.dataset.number = fieldNumber
let newID = keypath + '[' + fieldNumber + ']'
diff --git a/bh20simplewebuploader/templates/about.html b/bh20simplewebuploader/templates/about.html
index 07b6951..4bd238e 100644
--- a/bh20simplewebuploader/templates/about.html
+++ b/bh20simplewebuploader/templates/about.html
@@ -11,19 +11,6 @@
<script type="text/javascript">
let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
- document.addEventListener("DOMContentLoaded", function(){
- var count = fetch("/api/getCount")
- .then((resp) => resp.json())
- .then(function (data) {
- count = data["sequences"];
- console.log(count);
- span = document.getElementById("Counter");
- txt = document.createTextNode(count);
- span.appendChild(txt);
- });
- });
-
</script>
</body>
diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html
index 8f8ab66..dbc0b99 100644
--- a/bh20simplewebuploader/templates/blog.html
+++ b/bh20simplewebuploader/templates/blog.html
@@ -73,19 +73,6 @@
<script type="text/javascript">
let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
- document.addEventListener("DOMContentLoaded", function(){
- var count = fetch("/api/getCount")
- .then((resp) => resp.json())
- .then(function (data) {
- count = data["sequences"];
- console.log(count);
- span = document.getElementById("Counter");
- txt = document.createTextNode(count);
- span.appendChild(txt);
- });
- });
-
</script>
</body>
diff --git a/bh20simplewebuploader/templates/demo.html b/bh20simplewebuploader/templates/demo.html
index 76c19c4..44aded0 100644
--- a/bh20simplewebuploader/templates/demo.html
+++ b/bh20simplewebuploader/templates/demo.html
@@ -5,6 +5,7 @@
{% include 'banner.html' %}
{% include 'menu.html' %}
{% include 'search.html' %}
+ <p>The Virtuoso database contains <span id="CounterDB"></span> public sequences!</p>
{% include 'demo-run.html' %}
{% include 'footer.html' %}
@@ -12,16 +13,17 @@
let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
document.addEventListener("DOMContentLoaded", function(){
- var count = fetch("/api/getCount")
+ var count = fetch("/api/getCountDB")
.then((resp) => resp.json())
.then(function (data) {
count = data["sequences"];
console.log(count);
- span = document.getElementById("Counter");
+ span = document.getElementById("CounterDB");
txt = document.createTextNode(count);
span.appendChild(txt);
});
});
+
</script>
</body>
diff --git a/bh20simplewebuploader/templates/download.html b/bh20simplewebuploader/templates/download.html
index 07b6951..4bd238e 100644
--- a/bh20simplewebuploader/templates/download.html
+++ b/bh20simplewebuploader/templates/download.html
@@ -11,19 +11,6 @@
<script type="text/javascript">
let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
- document.addEventListener("DOMContentLoaded", function(){
- var count = fetch("/api/getCount")
- .then((resp) => resp.json())
- .then(function (data) {
- count = data["sequences"];
- console.log(count);
- span = document.getElementById("Counter");
- txt = document.createTextNode(count);
- span.appendChild(txt);
- });
- });
-
</script>
</body>
diff --git a/bh20simplewebuploader/templates/map.html b/bh20simplewebuploader/templates/map.html
index 6d63c3c..595af0c 100644
--- a/bh20simplewebuploader/templates/map.html
+++ b/bh20simplewebuploader/templates/map.html
@@ -16,19 +16,6 @@
<script type="text/javascript">
let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-
- document.addEventListener("DOMContentLoaded", function(){
- var count = fetch("/api/getCount")
- .then((resp) => resp.json())
- .then(function (data) {
- count = data["sequences"];
- console.log(count);
- span = document.getElementById("Counter");
- txt = document.createTextNode(count);
- span.appendChild(txt);
- });
- });
-
</script>
<!-- Make sure you put this AFTER Leaflet's CSS -->
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index 6dcd72b..3b270dd 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -42,7 +42,7 @@ repository.
### Using the Web Uploader
-To run the web uploader in a GNU Guix environment/container
+To run the web uploader in a GNU Guix environment/container run it with something like
```
guix environment guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs --network openssl -- env FLASK_ENV=development PYTHONPATH=$PYTHONPATH:./bh20sequploader FLASK_APP=bh20simplewebuploader/main.py flask run
@@ -59,7 +59,7 @@ WIP: add gunicorn container
Currently the full webserver container deploy command looks like
```
-penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$ env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run
-``
+penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$ env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-oinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run
+```
Note: see above on GUIX_PACKAGE_PATH.
diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html
index 67d299e..b5a05ca 100644
--- a/doc/blog/using-covid-19-pubseq-part4.html
+++ b/doc/blog/using-covid-19-pubseq-part4.html
@@ -3,10 +3,10 @@
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
-<!-- 2020-05-30 Sat 11:52 -->
+<!-- 2020-07-12 Sun 06:24 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>&lrm;</title>
+<title>COVID-19 PubSeq (part 4)</title>
<meta name="generator" content="Org mode" />
<meta name="author" content="Pjotr Prins" />
<style type="text/css">
@@ -161,19 +161,6 @@
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
- .equation-container {
- display: table;
- text-align: center;
- width: 100%;
- }
- .equation {
- vertical-align: middle;
- }
- .equation-label {
- display: table-cell;
- text-align: right;
- vertical-align: middle;
- }
.inlinetask {
padding: 10px;
border: 2px solid gray;
@@ -193,12 +180,13 @@
.org-svg { width: 90%; }
/*]]>*/-->
</style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
<script type="text/javascript">
/*
@licstart The following is the entire license notice for the
JavaScript code in this tag.
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
@@ -242,25 +230,41 @@ for the JavaScript code in this tag.
</head>
<body>
<div id="content">
+<h1 class="title">COVID-19 PubSeq (part 4)</h1>
<div id="table-of-contents">
<h2>Table of Contents</h2>
<div id="text-table-of-contents">
<ul>
-<li><a href="#orgda6f48c">1. Modify Workflow</a></li>
+<li><a href="#org8f8b64a">1. What does this mean?</a></li>
+<li><a href="#orgcc7a403">2. Modify Workflow</a></li>
</ul>
</div>
</div>
-<div id="outline-container-orgda6f48c" class="outline-2">
-<h2 id="orgda6f48c"><span class="section-number-2">1</span> Modify Workflow</h2>
+
+
+<div id="outline-container-org8f8b64a" class="outline-2">
+<h2 id="org8f8b64a"><span class="section-number-2">1</span> What does this mean?</h2>
<div class="outline-text-2" id="text-1">
<p>
+This means that when someone uploads a SARS-CoV-2 sequence using one
+of our tools (CLI or web-based) they add a sequence and some metadata
+which triggers a rerun of our workflows.
+</p>
+</div>
+</div>
+
+
+<div id="outline-container-orgcc7a403" class="outline-2">
+<h2 id="orgcc7a403"><span class="section-number-2">2</span> Modify Workflow</h2>
+<div class="outline-text-2" id="text-2">
+<p>
<i>Work in progress!</i>
</p>
</div>
</div>
</div>
<div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:52</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
</div>
</body>
</html>
diff --git a/doc/blog/using-covid-19-pubseq-part4.org b/doc/blog/using-covid-19-pubseq-part4.org
index 58a1f56..5fe71d1 100644
--- a/doc/blog/using-covid-19-pubseq-part4.org
+++ b/doc/blog/using-covid-19-pubseq-part4.org
@@ -1,3 +1,24 @@
+#+TITLE: COVID-19 PubSeq (part 4)
+#+AUTHOR: Pjotr Prins
+# C-c C-e h h publish
+# C-c ! insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time)
+# C-c C-t task rotate
+# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png
+
+#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
+
+
+* Table of Contents :TOC:noexport:
+ - [[#what-does-this-mean][What does this mean?]]
+ - [[#modify-workflow][Modify Workflow]]
+
+* What does this mean?
+
+This means that when someone uploads a SARS-CoV-2 sequence using one
+of our tools (CLI or web-based) they add a sequence and some metadata
+which triggers a rerun of our workflows.
+
+
* Modify Workflow
/Work in progress!/
diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html
index 30a3f83..80bf559 100644
--- a/doc/blog/using-covid-19-pubseq-part5.html
+++ b/doc/blog/using-covid-19-pubseq-part5.html
@@ -3,10 +3,10 @@
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
-<!-- 2020-05-30 Sat 11:59 -->
+<!-- 2020-07-12 Sun 06:24 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>&lrm;</title>
+<title>COVID-19 PubSeq (part 4)</title>
<meta name="generator" content="Org mode" />
<meta name="author" content="Pjotr Prins" />
<style type="text/css">
@@ -161,19 +161,6 @@
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
- .equation-container {
- display: table;
- text-align: center;
- width: 100%;
- }
- .equation {
- vertical-align: middle;
- }
- .equation-label {
- display: table-cell;
- text-align: right;
- vertical-align: middle;
- }
.inlinetask {
padding: 10px;
border: 2px solid gray;
@@ -193,12 +180,13 @@
.org-svg { width: 90%; }
/*]]>*/-->
</style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
<script type="text/javascript">
/*
@licstart The following is the entire license notice for the
JavaScript code in this tag.
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
@@ -242,16 +230,22 @@ for the JavaScript code in this tag.
</head>
<body>
<div id="content">
+<h1 class="title">COVID-19 PubSeq (part 4)</h1>
<div id="table-of-contents">
<h2>Table of Contents</h2>
<div id="text-table-of-contents">
<ul>
-<li><a href="#org31c224e">1. Modify Metadata</a></li>
+<li><a href="#org871ad58">1. Modify Metadata</a></li>
+<li><a href="#org07e8755">2. What is the schema?</a></li>
+<li><a href="#org4857280">3. How is the website generated?</a></li>
+<li><a href="#orge709ae2">4. Modifying the schema</a></li>
</ul>
</div>
</div>
-<div id="outline-container-org31c224e" class="outline-2">
-<h2 id="org31c224e"><span class="section-number-2">1</span> Modify Metadata</h2>
+
+
+<div id="outline-container-org871ad58" class="outline-2">
+<h2 id="org871ad58"><span class="section-number-2">1</span> Modify Metadata</h2>
<div class="outline-text-2" id="text-1">
<p>
The public sequence resource uses multiple data formats listed on the
@@ -265,13 +259,56 @@ data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>.
<p>
In this BLOG we are going to look at the metadata entered on the
-<a href="./">COVID-19 PubSeq</a> website (or command line client).
+<a href="./">COVID-19 PubSeq</a> website (or command line client). It is important to
+understand that anyone, including you, can change that information!
+</p>
+</div>
+</div>
+
+<div id="outline-container-org07e8755" class="outline-2">
+<h2 id="org07e8755"><span class="section-number-2">2</span> What is the schema?</h2>
+<div class="outline-text-2" id="text-2">
+<p>
+The default metadata schema is listed <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">here</a>.
+</p>
+</div>
+</div>
+
+<div id="outline-container-org4857280" class="outline-2">
+<h2 id="org4857280"><span class="section-number-2">3</span> How is the website generated?</h2>
+<div class="outline-text-2" id="text-3">
+<p>
+Using the schema we use <a href="https://pypi.org/project/PyShEx/">pyshex</a> shex expressions and <a href="https://github.com/common-workflow-language/schema_salad">schema salad</a> to
+generate the <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47">input form</a>, <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13">validate</a> the user input and to build <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24">RDF</a>!
+All from that one metadata schema.
+</p>
+</div>
+</div>
+
+<div id="outline-container-orge709ae2" class="outline-2">
+<h2 id="orge709ae2"><span class="section-number-2">4</span> Modifying the schema</h2>
+<div class="outline-text-2" id="text-4">
+<p>
+One of the first things we wanted to do is to add a field for the data
+license. Initially we only support CC-4.0 as a license by default, but
+now we want to give uploaders the option to make it an even more
+liberal CC0 license. The first step is to find a good ontology term
+for the field. Searching for `creative commons cc0 rdf' rendered this
+useful <a href="https://creativecommons.org/ns">page</a>. We also find an <a href="https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview">overview</a> where CC0 is represented as URI
+<a href="https://creativecommons.org/publicdomain/zero/1.0/">https://creativecommons.org/publicdomain/zero/1.0/</a>. Meanwhile the
+attribution license <a href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</a>.
+According to this <a href="https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf">document</a> we should really also add fields for
+attributionName and attributionURL.
+</p>
+
+<p>
+<i>Note: work in progress</i>
</p>
</div>
</div>
</div>
<div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:59</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
</div>
</body>
</html>
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index 8d7504e..aa06d5e 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -1,3 +1,20 @@
+#+TITLE: COVID-19 PubSeq (part 4)
+#+AUTHOR: Pjotr Prins
+# C-c C-e h h publish
+# C-c ! insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time)
+# C-c C-t task rotate
+# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png
+
+#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
+
+
+* Table of Contents :TOC:noexport:
+ - [[#modify-metadata][Modify Metadata]]
+ - [[#what-is-the-schema][What is the schema?]]
+ - [[#how-is-the-website-generated][How is the website generated?]]
+ - [[#modifying-the-schema][Modifying the schema]]
+ - [[#adding-fields-to-the-form][Adding fields to the form]]
+
* Modify Metadata
The public sequence resource uses multiple data formats listed on the
@@ -10,8 +27,102 @@ data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
In this BLOG we are going to look at the metadata entered on the
[[./][COVID-19 PubSeq]] website (or command line client). It is important to
-understand that you and us can change that information.
+understand that anyone, including you, can change that information!
* What is the schema?
+The default metadata schema is listed [[https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml][here]].
+
* How is the website generated?
+
+Using the schema we use [[https://pypi.org/project/PyShEx/][pyshex]] shex expressions and [[https://github.com/common-workflow-language/schema_salad][schema salad]] to
+generate the [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47][input form]], [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13][validate]] the user input and to build [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24][RDF]]!
+All from that one metadata schema.
+
+* Modifying the schema
+
+One of the first things we want to do is to add a field for the data
+license. Initially we only support CC-4.0 as a license by default, but
+now we want to give uploaders the option to make it an even more
+liberal CC0 license. The first step is to find a good ontology term
+for the field. Searching for `creative commons cc0 rdf' rendered this
+useful [[https://creativecommons.org/ns][page]]. We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI
+https://creativecommons.org/publicdomain/zero/1.0/. Meanwhile the
+attribution license https://creativecommons.org/licenses/by/4.0/.
+According to this [[https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf][document]] we should really also add fields for
+attributionName and attributionURL.
+
+A minimal triple should be
+
+: id xhtml:license <http://creativecommons.org/licenses/by/4.0/> .
+
+Other suggestions are
+
+: id dc:title "Description" .
+: id cc:attributionName "Your Name" .
+: id cc:attributionURL <http://resource.org/id>
+
+and 'dc:source' which indicates the original source of any modified
+work, specified as a URI.
+The prefix 'cc:' is an abbreviation for http://creativecommons.org/ns#.
+
+Going back to the schema, where does it fit? Under host, sample,
+virus, technology or submitter block? It could fit under sample, but
+actually the license concerns the whole metadata block and sequence,
+so I think we can fit under its own license tag. For example
+
+
+id: placeholder
+
+: license:
+: license_type: http://creativecommons.org/licenses/by/4.0/
+: attribution_title: "Sample ID"
+: attribution_name: "John doe, Joe Boe, Jonny Oe"
+: attribution_url: http://covid19.genenetwork.org/id
+: attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888
+
+So, let's update the example. Notice the license info is optional - if it is missing
+we just assume the default CC-4.0.
+
+One thing that is interesting is that in the name space https://creativecommons.org/ns there
+is no mention of a title. I think it is useful, however, because we have no such field.
+So, we'll add it simply as a title field. Now the draft schema is
+
+#+BEGIN_SRC js
+- name: licenseSchema
+ type: record
+ fields:
+ license_type:
+ doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#License
+ title:
+ doc: Attribution title related to license
+ type: string?
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_001167
+ attribution_url:
+ doc: Attribution URL related to license
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#Work
+ attribution_source:
+ doc: Attribution source URL
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#Work
+#+END_SRC
+
+Now, we are no ontology experts, right? So, next we submit a patch to
+our source tree and ask for feedback before wiring it up in the data
+entry form. The pull request was submitted [[https://github.com/arvados/bh20-seq-resource/pull/97][here]] and reviewed on the
+gitter channel and I merged it.
+
+* Adding fields to the form
+
+To add the new fields to the form we have to modify it a little. If we
+go to the upload form we need to add the license box. The schema is
+loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
+
+/Note: work in progress/
diff --git a/doc/web/about.html b/doc/web/about.html
index c907e6c..9b16c92 100644
--- a/doc/web/about.html
+++ b/doc/web/about.html
@@ -3,7 +3,7 @@
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
-<!-- 2020-05-29 Fri 08:27 -->
+<!-- 2020-07-12 Sun 06:29 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>About/FAQ</title>
@@ -161,19 +161,6 @@
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
- .equation-container {
- display: table;
- text-align: center;
- width: 100%;
- }
- .equation {
- vertical-align: middle;
- }
- .equation-label {
- display: table-cell;
- text-align: right;
- vertical-align: middle;
- }
.inlinetask {
padding: 10px;
border: 2px solid gray;
@@ -198,7 +185,7 @@
@licstart The following is the entire license notice for the
JavaScript code in this tag.
-Copyright (C) 2012-2020 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
@@ -247,29 +234,29 @@ for the JavaScript code in this tag.
<h2>Table of Contents</h2>
<div id="text-table-of-contents">
<ul>
-<li><a href="#org783b5e9">1. What is the 'public sequence resource' about?</a></li>
-<li><a href="#org2c0bcfd">2. Who created the public sequence resource?</a></li>
-<li><a href="#org34070d3">3. How does the public sequence resource compare to other data resources?</a></li>
-<li><a href="#org64a9493">4. Why should I upload my data here?</a></li>
-<li><a href="#orgf898e7f">5. Why should I not upload by data here?</a></li>
-<li><a href="#org828e164">6. How does the public sequence resource work?</a></li>
-<li><a href="#org7b0d03f">7. Who uses the public sequence resource?</a></li>
-<li><a href="#org31aaf23">8. Is this about open data?</a></li>
-<li><a href="#orgb376b6c">9. Is this about free software?</a></li>
-<li><a href="#orgf19cd96">10. How do I upload raw data?</a></li>
-<li><a href="#orgebfed00">11. How do I change metadata?</a></li>
-<li><a href="#orge2aecf8">12. How do I change the work flows?</a></li>
-<li><a href="#orgd45b3bc">13. How do I change the source code?</a></li>
-<li><a href="#org2bb9455">14. Should I choose CC-BY or CC0?</a></li>
-<li><a href="#org62bf23f">15. How do I deal with private data and privacy?</a></li>
-<li><a href="#org40c6da0">16. How do I communicate with you?</a></li>
-<li><a href="#org1f27c44">17. Who are the sponsors?</a></li>
+<li><a href="#orgac6ad8b">1. What is the 'public sequence resource' about?</a></li>
+<li><a href="#org0c21c2e">2. Who created the public sequence resource?</a></li>
+<li><a href="#org3fb8cb3">3. How does the public sequence resource compare to other data resources?</a></li>
+<li><a href="#org6cd9ea2">4. Why should I upload my data here?</a></li>
+<li><a href="#org0b6e3fb">5. Why should I not upload by data here?</a></li>
+<li><a href="#org3eb3a4e">6. How does the public sequence resource work?</a></li>
+<li><a href="#org7a397f5">7. Who uses the public sequence resource?</a></li>
+<li><a href="#org92cb008">8. Is this about open data?</a></li>
+<li><a href="#org232d6fa">9. Is this about free software?</a></li>
+<li><a href="#orgd93869f">10. How do I upload raw data?</a></li>
+<li><a href="#org88e8b0a">11. How do I change metadata?</a></li>
+<li><a href="#orgd04b8f8">12. How do I change the work flows?</a></li>
+<li><a href="#org5d1ee05">13. How do I change the source code?</a></li>
+<li><a href="#orgae6461b">14. Should I choose CC-BY or CC0?</a></li>
+<li><a href="#org3ea90a9">15. How do I deal with private data and privacy?</a></li>
+<li><a href="#org7ff7106">16. How do I communicate with you?</a></li>
+<li><a href="#org9566fa7">17. Who are the sponsors?</a></li>
</ul>
</div>
</div>
-<div id="outline-container-org783b5e9" class="outline-2">
-<h2 id="org783b5e9"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2>
+<div id="outline-container-orgac6ad8b" class="outline-2">
+<h2 id="orgac6ad8b"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2>
<div class="outline-text-2" id="text-1">
<p>
The <b>public sequence resource</b> aims to provide a generic and useful
@@ -280,17 +267,18 @@ sequence comparison and protein prediction.
</div>
</div>
-<div id="outline-container-org2c0bcfd" class="outline-2">
-<h2 id="org2c0bcfd"><span class="section-number-2">2</span> Who created the public sequence resource?</h2>
+<div id="outline-container-org0c21c2e" class="outline-2">
+<h2 id="org0c21c2e"><span class="section-number-2">2</span> Who created the public sequence resource?</h2>
<div class="outline-text-2" id="text-2">
<p>
The <b>public sequence resource</b> is an initiative by <a href="https://github.com/arvados/bh20-seq-resource/graphs/contributors">bioinformatics</a> and
ontology experts who want to create something agile and useful for the
wider research community. The initiative started at the COVID-19
biohackathon in April 2020 and is ongoing. The main project drivers
-are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common
-Workflow Language), Thomas Liener (consultant, formerly EBI) and
-Jerven Bolleman (Swiss Institute of Bioinformatics).
+are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino
+(University of Rome Tor Vergata), Michael Crusoe (Common Workflow
+Language), Thomas Liener (consultant, formerly EBI), Erik Garrison
+(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics).
</p>
<p>
@@ -301,8 +289,8 @@ wrangling experts. Thank you everyone!
</div>
</div>
-<div id="outline-container-org34070d3" class="outline-2">
-<h2 id="org34070d3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2>
+<div id="outline-container-org3fb8cb3" class="outline-2">
+<h2 id="org3fb8cb3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2>
<div class="outline-text-2" id="text-3">
<p>
The short version is that we use state-of-the-art practices in
@@ -312,17 +300,18 @@ to building out this resource!
</p>
<p>
-Importantly: all data is published under the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0
-attribution license</a> which means it data can be published and workflows
-can run in public environments allowing for improved access for
-research and reproducible results. This contrasts with some other
-public resources, including GISAID.
+Importantly: all data is published under either the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons
+4.0 attribution license</a> or the <a href="https://creativecommons.org/share-your-work/public-domain/cc0/">CC0 “No Rights Reserved” license</a> which
+means it data can be published and workflows can run in public
+environments allowing for improved access for research and
+reproducible results. This contrasts with some other public resources,
+including GISAID.
</p>
</div>
</div>
-<div id="outline-container-org64a9493" class="outline-2">
-<h2 id="org64a9493"><span class="section-number-2">4</span> Why should I upload my data here?</h2>
+<div id="outline-container-org6cd9ea2" class="outline-2">
+<h2 id="org6cd9ea2"><span class="section-number-2">4</span> Why should I upload my data here?</h2>
<div class="outline-text-2" id="text-4">
<ol class="org-ol">
<li>We champion truly shareable data without licensing restrictions - with proper
@@ -353,8 +342,8 @@ multiple resources.
</div>
</div>
-<div id="outline-container-orgf898e7f" class="outline-2">
-<h2 id="orgf898e7f"><span class="section-number-2">5</span> Why should I not upload by data here?</h2>
+<div id="outline-container-org0b6e3fb" class="outline-2">
+<h2 id="org0b6e3fb"><span class="section-number-2">5</span> Why should I not upload by data here?</h2>
<div class="outline-text-2" id="text-5">
<p>
Funny question. There are only good reasons to upload your data here
@@ -376,8 +365,8 @@ for bulk uploads!
</div>
</div>
-<div id="outline-container-org828e164" class="outline-2">
-<h2 id="org828e164"><span class="section-number-2">6</span> How does the public sequence resource work?</h2>
+<div id="outline-container-org3eb3a4e" class="outline-2">
+<h2 id="org3eb3a4e"><span class="section-number-2">6</span> How does the public sequence resource work?</h2>
<div class="outline-text-2" id="text-6">
<p>
On uploading a sequence with metadata it will automatically be
@@ -388,8 +377,8 @@ using workflows from the High Performance Open Biology Lab defined
</div>
</div>
-<div id="outline-container-org7b0d03f" class="outline-2">
-<h2 id="org7b0d03f"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2>
+<div id="outline-container-org7a397f5" class="outline-2">
+<h2 id="org7a397f5"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2>
<div class="outline-text-2" id="text-7">
<p>
The Swiss Institute of Bioinformatics has included this data in
@@ -397,14 +386,18 @@ The Swiss Institute of Bioinformatics has included this data in
</p>
<p>
+The Pantograph <a href="https://graph-genome.github.io/">viewer</a> uses PubSeq data for their visualisations.
+</p>
+
+<p>
<a href="https://uthsc.edu">UTHSC</a> and <a href="https://www.ornl.gov/news/ornl-fight-against-covid-19">ORNL</a> use COVID-19 PubSeq data for protein prediction and
drug development.
</p>
</div>
</div>
-<div id="outline-container-org31aaf23" class="outline-2">
-<h2 id="org31aaf23"><span class="section-number-2">8</span> Is this about open data?</h2>
+<div id="outline-container-org92cb008" class="outline-2">
+<h2 id="org92cb008"><span class="section-number-2">8</span> Is this about open data?</h2>
<div class="outline-text-2" id="text-8">
<p>
All data is published under a <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 attribution license</a>
@@ -414,8 +407,8 @@ data and store it for further processing.
</div>
</div>
-<div id="outline-container-orgb376b6c" class="outline-2">
-<h2 id="orgb376b6c"><span class="section-number-2">9</span> Is this about free software?</h2>
+<div id="outline-container-org232d6fa" class="outline-2">
+<h2 id="org232d6fa"><span class="section-number-2">9</span> Is this about free software?</h2>
<div class="outline-text-2" id="text-9">
<p>
Absolutely. Free software allows for fully reproducible pipelines. You
@@ -424,8 +417,8 @@ can take our workflows and data and run it elsewhere!
</div>
</div>
-<div id="outline-container-orgf19cd96" class="outline-2">
-<h2 id="orgf19cd96"><span class="section-number-2">10</span> How do I upload raw data?</h2>
+<div id="outline-container-orgd93869f" class="outline-2">
+<h2 id="orgd93869f"><span class="section-number-2">10</span> How do I upload raw data?</h2>
<div class="outline-text-2" id="text-10">
<p>
We are preparing raw sequence data pipelines (fastq and BAM). The
@@ -440,8 +433,8 @@ assembly variations into consideration. This is all work in progress.
</div>
</div>
-<div id="outline-container-orgebfed00" class="outline-2">
-<h2 id="orgebfed00"><span class="section-number-2">11</span> How do I change metadata?</h2>
+<div id="outline-container-org88e8b0a" class="outline-2">
+<h2 id="org88e8b0a"><span class="section-number-2">11</span> How do I change metadata?</h2>
<div class="outline-text-2" id="text-11">
<p>
See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>!
@@ -449,8 +442,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork
</div>
</div>
-<div id="outline-container-orge2aecf8" class="outline-2">
-<h2 id="orge2aecf8"><span class="section-number-2">12</span> How do I change the work flows?</h2>
+<div id="outline-container-orgd04b8f8" class="outline-2">
+<h2 id="orgd04b8f8"><span class="section-number-2">12</span> How do I change the work flows?</h2>
<div class="outline-text-2" id="text-12">
<p>
See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>!
@@ -458,8 +451,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork
</div>
</div>
-<div id="outline-container-orgd45b3bc" class="outline-2">
-<h2 id="orgd45b3bc"><span class="section-number-2">13</span> How do I change the source code?</h2>
+<div id="outline-container-org5d1ee05" class="outline-2">
+<h2 id="org5d1ee05"><span class="section-number-2">13</span> How do I change the source code?</h2>
<div class="outline-text-2" id="text-13">
<p>
Go to our <a href="https://github.com/arvados/bh20-seq-resource">source code repositories</a>, fork/clone the repository, change
@@ -469,8 +462,8 @@ many PRs we already merged.
</div>
</div>
-<div id="outline-container-org2bb9455" class="outline-2">
-<h2 id="org2bb9455"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2>
+<div id="outline-container-orgae6461b" class="outline-2">
+<h2 id="orgae6461b"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2>
<div class="outline-text-2" id="text-14">
<p>
Restrictive data licenses are hampering data sharing and reproducible
@@ -486,8 +479,8 @@ In all honesty: we prefer both data and software to be free.
</div>
</div>
-<div id="outline-container-org62bf23f" class="outline-2">
-<h2 id="org62bf23f"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2>
+<div id="outline-container-org3ea90a9" class="outline-2">
+<h2 id="org3ea90a9"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2>
<div class="outline-text-2" id="text-15">
<p>
A public sequence resource is about public data. Metadata can refer to
@@ -498,8 +491,8 @@ plan to combine identifiers with clinical data stored securely at
</div>
</div>
-<div id="outline-container-org40c6da0" class="outline-2">
-<h2 id="org40c6da0"><span class="section-number-2">16</span> How do I communicate with you?</h2>
+<div id="outline-container-org7ff7106" class="outline-2">
+<h2 id="org7ff7106"><span class="section-number-2">16</span> How do I communicate with you?</h2>
<div class="outline-text-2" id="text-16">
<p>
We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&amp;utm_medium=link&amp;utm_campaign=share-link">gitter channel</a> you can join.
@@ -507,8 +500,8 @@ We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&amp;utm
</div>
</div>
-<div id="outline-container-org1f27c44" class="outline-2">
-<h2 id="org1f27c44"><span class="section-number-2">17</span> Who are the sponsors?</h2>
+<div id="outline-container-org9566fa7" class="outline-2">
+<h2 id="org9566fa7"><span class="section-number-2">17</span> Who are the sponsors?</h2>
<div class="outline-text-2" id="text-17">
<p>
The main sponsors are listed in the footer. In addition to the time
@@ -519,7 +512,7 @@ for donating COVID-19 related compute time.
</div>
</div>
<div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-29 Fri 08:26</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 04:54</small>.
</div>
</body>
</html>
diff --git a/doc/web/about.org b/doc/web/about.org
index b6387e7..ad13bc3 100644
--- a/doc/web/about.org
+++ b/doc/web/about.org
@@ -33,9 +33,10 @@ The *public sequence resource* is an initiative by [[https://github.com/arvados/
ontology experts who want to create something agile and useful for the
wider research community. The initiative started at the COVID-19
biohackathon in April 2020 and is ongoing. The main project drivers
-are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common
-Workflow Language), Thomas Liener (consultant, formerly EBI) and
-Jerven Bolleman (Swiss Institute of Bioinformatics).
+are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino
+(University of Rome Tor Vergata), Michael Crusoe (Common Workflow
+Language), Thomas Liener (consultant, formerly EBI), Erik Garrison
+(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics).
Notably, as this is a free software initiative, the project represents
major work by hundreds of software developers and ontology and data
@@ -48,11 +49,12 @@ bioinformatics using agile methods. Unlike the resources from large
institutes we can improve things on a dime and anyone can contribute
to building out this resource!
-Importantly: all data is published under the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons 4.0
-attribution license]] which means it data can be published and workflows
-can run in public environments allowing for improved access for
-research and reproducible results. This contrasts with some other
-public resources, including GISAID.
+Importantly: all data is published under either the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons
+4.0 attribution license]] or the [[https://creativecommons.org/share-your-work/public-domain/cc0/][CC0 “No Rights Reserved” license]] which
+means it data can be published and workflows can run in public
+environments allowing for improved access for research and
+reproducible results. This contrasts with some other public resources,
+including GISAID.
* Why should I upload my data here?
@@ -105,6 +107,8 @@ using workflows from the High Performance Open Biology Lab defined
The Swiss Institute of Bioinformatics has included this data in
https://covid-19-sparql.expasy.org/ and made it part of [[https://www.uniprot.org/][Uniprot]].
+The Pantograph [[https://graph-genome.github.io/][viewer]] uses PubSeq data for their visualisations.
+
[[https://uthsc.edu][UTHSC]] and [[https://www.ornl.gov/news/ornl-fight-against-covid-19][ORNL]] use COVID-19 PubSeq data for protein prediction and
drug development.
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
index 432877c..54736f8 100644
--- a/example/maximum_metadata_example.yaml
+++ b/example/maximum_metadata_example.yaml
@@ -1,5 +1,11 @@
id: placeholder
+license:
+ license_type: http://creativecommons.org/licenses/by/4.0/
+ title: "Sample"
+ attribution_name: "John doe, Joe Boe, Jonny Oe"
+ attribution_url: http://covid19.genenetwork.org/id
+
host:
host_id: XX1
host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
@@ -13,9 +19,9 @@ host:
additional_host_information: Optional free text field for additional information
sample:
- sample_id: Id of the sample as defined by the submitter
+ sample_id: Id of the sample as defined by the submitter
collector_name: Name of the person that took the sample
- collecting_institution: Institute that was responsible of sampling
+ collecting_institution: Institute that was responsible of sampling
specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
collection_date: "2020-01-01"
collection_location: http://www.wikidata.org/entity/Q148
diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml
index 51f8a87..1b46cc7 100644
--- a/example/minimal_metadata_example.yaml
+++ b/example/minimal_metadata_example.yaml
@@ -1,5 +1,9 @@
id: placeholder
+
+license:
+ license_type: http://creativecommons.org/licenses/by/4.0/
+
host:
host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
@@ -15,4 +19,4 @@ technology:
sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632]
submitter:
- authors: [John Doe] \ No newline at end of file
+ authors: [John Doe]