diff options
-rw-r--r-- | bh20sequploader/bh20seq-options.yml | 4 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-schema.yml | 26 | ||||
-rw-r--r-- | bh20simplewebuploader/main.py | 37 | ||||
-rw-r--r-- | bh20simplewebuploader/static/main.js | 6 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/about.html | 13 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/blog.html | 13 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/demo.html | 6 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/download.html | 13 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/map.html | 13 | ||||
-rw-r--r-- | doc/INSTALL.md | 6 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part4.html | 44 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part4.org | 21 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part5.html | 79 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part5.org | 113 | ||||
-rw-r--r-- | doc/web/about.html | 143 | ||||
-rw-r--r-- | doc/web/about.org | 20 | ||||
-rw-r--r-- | example/maximum_metadata_example.yaml | 10 | ||||
-rw-r--r-- | example/minimal_metadata_example.yaml | 6 |
18 files changed, 383 insertions, 190 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml index c553f41..d1ea398 100644 --- a/bh20sequploader/bh20seq-options.yml +++ b/bh20sequploader/bh20seq-options.yml @@ -3,6 +3,10 @@ # being unique or at least using the same options in different containing # types. +license_type: + CC0 Public Domain Dedication: http://creativecommons.org/publicdomain/zero/1.0/ + CC-BY-4.0 Attribution 4.0 International: http://creativecommons.org/licenses/by/4.0/ + host_age_unit: Years: http://purl.obolibrary.org/obo/UO_0000036 Months: http://purl.obolibrary.org/obo/UO_0000035 diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index a8ab920..29ac22c 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -1,5 +1,6 @@ $base: http://biohackathon.org/bh20-seq-schema $namespaces: + cc: http://creativecommons.org/ns# sch: https://schema.org/ efo: http://www.ebi.ac.uk/efo/ obo: http://purl.obolibrary.org/obo/ @@ -9,6 +10,30 @@ $namespaces: $graph: +- name: licenseSchema + type: record + fields: + license_type: + doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf + type: string + jsonldPredicate: + _id: https://creativecommons.org/ns#License + title: + doc: Attribution title related to data license + type: string? + jsonldPredicate: + _id: http://semanticscience.org/resource/SIO_001167 + attribution_url: + doc: Attribution URL related to data license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + attribution_source: + doc: Attribution source URL related to data license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + - name: hostSchema type: record fields: @@ -233,6 +258,7 @@ $graph: virus: virusSchema technology: technologySchema submitter: submitterSchema + license: licenseSchema id: doc: The subject (eg the fasta/fastq file) that the metadata describes type: string diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 9132453..8a6794e 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -47,6 +47,7 @@ def type_to_heading(type_name): Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading. """ + print(type_name,file=sys.stderr) # Remove camel case decamel = re.sub('([A-Z])', r' \1', type_name) # Split @@ -227,8 +228,13 @@ def generate_form(schema, options): # At startup, we need to load the metadata schema from the uploader module, so we can make a form for it -METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml")) -METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml")) +if os.path.isfile("bh20sequploader/bh20seq-schema.yml"): + METADATA_SCHEMA = yaml.safe_load(open("bh20sequploader/bh20seq-schema.yml","r").read()) + METADATA_OPTION_DEFINITIONS = yaml.safe_load(open("bh20sequploader/bh20seq-options.yml","r").read()) +else: + METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml")) + METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml")) +# print(METADATA_SCHEMA,file=sys.stderr) FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS) @app.route('/') @@ -505,7 +511,7 @@ def status_page(): Processing status """ - api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN) + api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True) pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", UPLOADER_PROJECT]]) out = [] status = {} @@ -567,11 +573,34 @@ baseURL='http://sparql.genenetwork.org/sparql/' @app.route('/api/getCount', methods=['GET']) def getCount(): - api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN) + """ + Get sequence counts from Arvados record + """ + api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True) c = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute() return jsonify({'sequences': c["items_available"]}) +@app.route('/api/getCountDB', methods=['GET']) +def getCountDB(): + """ + Get sequence counts from Virtuoso DB + """ + query=""" + PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> + select (COUNT(distinct ?dataset) as ?num) + { + ?dataset pubseq:submitter ?id . + ?id ?p ?submitter + } + """ + payload = {'query': query, 'format': 'json'} + r = requests.get(baseURL, params=payload) + result = r.json()['results']['bindings'] + # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}] + # print(result, file=sys.stderr) + return jsonify({'sequences': int(result[0]["num"]["value"])}) + @app.route('/api/getAllaccessions', methods=['GET']) def getAllaccessions(): query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ <http://edamontology.org/data_2091> ?value ]}""" diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js index a9dfc10..4703047 100644 --- a/bh20simplewebuploader/static/main.js +++ b/bh20simplewebuploader/static/main.js @@ -89,6 +89,10 @@ let fetchCount = () => { fetchAPI("/api/getCount"); } +let fetchCountDB = () => { + fetchAPI("/api/getCountDB"); +} + let fetchSEQCountBySpecimen = () => { fetchAPIV2("/api/getSEQCountbySpecimenSource"); } @@ -191,7 +195,7 @@ function addField(e) { // Increment the number and use the keypath and number to set IDs and cross // references. // TODO: Heavily dependent on the form field HTML. Maybe we want custom - // elements for the labeled controlsd that know how to be list items? + // elements for the labeled controls that know how to be list items? fieldNumber++ newField.dataset.number = fieldNumber let newID = keypath + '[' + fieldNumber + ']' diff --git a/bh20simplewebuploader/templates/about.html b/bh20simplewebuploader/templates/about.html index 07b6951..4bd238e 100644 --- a/bh20simplewebuploader/templates/about.html +++ b/bh20simplewebuploader/templates/about.html @@ -11,19 +11,6 @@ <script type="text/javascript"> let scriptRoot = {{ request.script_root|tojson|safe }}; // examples - - document.addEventListener("DOMContentLoaded", function(){ - var count = fetch("/api/getCount") - .then((resp) => resp.json()) - .then(function (data) { - count = data["sequences"]; - console.log(count); - span = document.getElementById("Counter"); - txt = document.createTextNode(count); - span.appendChild(txt); - }); - }); - </script> </body> diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html index 8f8ab66..dbc0b99 100644 --- a/bh20simplewebuploader/templates/blog.html +++ b/bh20simplewebuploader/templates/blog.html @@ -73,19 +73,6 @@ <script type="text/javascript"> let scriptRoot = {{ request.script_root|tojson|safe }}; // examples - - document.addEventListener("DOMContentLoaded", function(){ - var count = fetch("/api/getCount") - .then((resp) => resp.json()) - .then(function (data) { - count = data["sequences"]; - console.log(count); - span = document.getElementById("Counter"); - txt = document.createTextNode(count); - span.appendChild(txt); - }); - }); - </script> </body> diff --git a/bh20simplewebuploader/templates/demo.html b/bh20simplewebuploader/templates/demo.html index 76c19c4..44aded0 100644 --- a/bh20simplewebuploader/templates/demo.html +++ b/bh20simplewebuploader/templates/demo.html @@ -5,6 +5,7 @@ {% include 'banner.html' %} {% include 'menu.html' %} {% include 'search.html' %} + <p>The Virtuoso database contains <span id="CounterDB"></span> public sequences!</p> {% include 'demo-run.html' %} {% include 'footer.html' %} @@ -12,16 +13,17 @@ let scriptRoot = {{ request.script_root|tojson|safe }}; // examples document.addEventListener("DOMContentLoaded", function(){ - var count = fetch("/api/getCount") + var count = fetch("/api/getCountDB") .then((resp) => resp.json()) .then(function (data) { count = data["sequences"]; console.log(count); - span = document.getElementById("Counter"); + span = document.getElementById("CounterDB"); txt = document.createTextNode(count); span.appendChild(txt); }); }); + </script> </body> diff --git a/bh20simplewebuploader/templates/download.html b/bh20simplewebuploader/templates/download.html index 07b6951..4bd238e 100644 --- a/bh20simplewebuploader/templates/download.html +++ b/bh20simplewebuploader/templates/download.html @@ -11,19 +11,6 @@ <script type="text/javascript"> let scriptRoot = {{ request.script_root|tojson|safe }}; // examples - - document.addEventListener("DOMContentLoaded", function(){ - var count = fetch("/api/getCount") - .then((resp) => resp.json()) - .then(function (data) { - count = data["sequences"]; - console.log(count); - span = document.getElementById("Counter"); - txt = document.createTextNode(count); - span.appendChild(txt); - }); - }); - </script> </body> diff --git a/bh20simplewebuploader/templates/map.html b/bh20simplewebuploader/templates/map.html index 6d63c3c..595af0c 100644 --- a/bh20simplewebuploader/templates/map.html +++ b/bh20simplewebuploader/templates/map.html @@ -16,19 +16,6 @@ <script type="text/javascript"> let scriptRoot = {{ request.script_root|tojson|safe }}; // examples - - document.addEventListener("DOMContentLoaded", function(){ - var count = fetch("/api/getCount") - .then((resp) => resp.json()) - .then(function (data) { - count = data["sequences"]; - console.log(count); - span = document.getElementById("Counter"); - txt = document.createTextNode(count); - span.appendChild(txt); - }); - }); - </script> <!-- Make sure you put this AFTER Leaflet's CSS --> diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 6dcd72b..3b270dd 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -42,7 +42,7 @@ repository. ### Using the Web Uploader -To run the web uploader in a GNU Guix environment/container +To run the web uploader in a GNU Guix environment/container run it with something like ``` guix environment guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs --network openssl -- env FLASK_ENV=development PYTHONPATH=$PYTHONPATH:./bh20sequploader FLASK_APP=bh20simplewebuploader/main.py flask run @@ -59,7 +59,7 @@ WIP: add gunicorn container Currently the full webserver container deploy command looks like ``` -penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$ env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run -`` +penguin2:~/iwrk/opensource/code/vg/bh20-seq-resource$ env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-oinformatics/ ~/iwrk/opensource/guix/guix/pre-inst-env guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc clustalw python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp FLASK_ENV=development FLASK_APP=bh20simplewebuploader/main.py flask run +``` Note: see above on GUIX_PACKAGE_PATH. diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html index 67d299e..b5a05ca 100644 --- a/doc/blog/using-covid-19-pubseq-part4.html +++ b/doc/blog/using-covid-19-pubseq-part4.html @@ -3,10 +3,10 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <head> -<!-- 2020-05-30 Sat 11:52 --> +<!-- 2020-07-12 Sun 06:24 --> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> -<title>‎</title> +<title>COVID-19 PubSeq (part 4)</title> <meta name="generator" content="Org mode" /> <meta name="author" content="Pjotr Prins" /> <style type="text/css"> @@ -161,19 +161,6 @@ .footdef { margin-bottom: 1em; } .figure { padding: 1em; } .figure p { text-align: center; } - .equation-container { - display: table; - text-align: center; - width: 100%; - } - .equation { - vertical-align: middle; - } - .equation-label { - display: table-cell; - text-align: right; - vertical-align: middle; - } .inlinetask { padding: 10px; border: 2px solid gray; @@ -193,12 +180,13 @@ .org-svg { width: 90%; } /*]]>*/--> </style> +<link rel="Blog stylesheet" type="text/css" href="blog.css" /> <script type="text/javascript"> /* @licstart The following is the entire license notice for the JavaScript code in this tag. -Copyright (C) 2012-2020 Free Software Foundation, Inc. +Copyright (C) 2012-2018 Free Software Foundation, Inc. The JavaScript code in this tag is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -242,25 +230,41 @@ for the JavaScript code in this tag. </head> <body> <div id="content"> +<h1 class="title">COVID-19 PubSeq (part 4)</h1> <div id="table-of-contents"> <h2>Table of Contents</h2> <div id="text-table-of-contents"> <ul> -<li><a href="#orgda6f48c">1. Modify Workflow</a></li> +<li><a href="#org8f8b64a">1. What does this mean?</a></li> +<li><a href="#orgcc7a403">2. Modify Workflow</a></li> </ul> </div> </div> -<div id="outline-container-orgda6f48c" class="outline-2"> -<h2 id="orgda6f48c"><span class="section-number-2">1</span> Modify Workflow</h2> + + +<div id="outline-container-org8f8b64a" class="outline-2"> +<h2 id="org8f8b64a"><span class="section-number-2">1</span> What does this mean?</h2> <div class="outline-text-2" id="text-1"> <p> +This means that when someone uploads a SARS-CoV-2 sequence using one +of our tools (CLI or web-based) they add a sequence and some metadata +which triggers a rerun of our workflows. +</p> +</div> +</div> + + +<div id="outline-container-orgcc7a403" class="outline-2"> +<h2 id="orgcc7a403"><span class="section-number-2">2</span> Modify Workflow</h2> +<div class="outline-text-2" id="text-2"> +<p> <i>Work in progress!</i> </p> </div> </div> </div> <div id="postamble" class="status"> -<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:52</small>. +<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>. </div> </body> </html> diff --git a/doc/blog/using-covid-19-pubseq-part4.org b/doc/blog/using-covid-19-pubseq-part4.org index 58a1f56..5fe71d1 100644 --- a/doc/blog/using-covid-19-pubseq-part4.org +++ b/doc/blog/using-covid-19-pubseq-part4.org @@ -1,3 +1,24 @@ +#+TITLE: COVID-19 PubSeq (part 4) +#+AUTHOR: Pjotr Prins +# C-c C-e h h publish +# C-c ! insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time) +# C-c C-t task rotate +# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png + +#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" /> + + +* Table of Contents :TOC:noexport: + - [[#what-does-this-mean][What does this mean?]] + - [[#modify-workflow][Modify Workflow]] + +* What does this mean? + +This means that when someone uploads a SARS-CoV-2 sequence using one +of our tools (CLI or web-based) they add a sequence and some metadata +which triggers a rerun of our workflows. + + * Modify Workflow /Work in progress!/ diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html index 30a3f83..80bf559 100644 --- a/doc/blog/using-covid-19-pubseq-part5.html +++ b/doc/blog/using-covid-19-pubseq-part5.html @@ -3,10 +3,10 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <head> -<!-- 2020-05-30 Sat 11:59 --> +<!-- 2020-07-12 Sun 06:24 --> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> -<title>‎</title> +<title>COVID-19 PubSeq (part 4)</title> <meta name="generator" content="Org mode" /> <meta name="author" content="Pjotr Prins" /> <style type="text/css"> @@ -161,19 +161,6 @@ .footdef { margin-bottom: 1em; } .figure { padding: 1em; } .figure p { text-align: center; } - .equation-container { - display: table; - text-align: center; - width: 100%; - } - .equation { - vertical-align: middle; - } - .equation-label { - display: table-cell; - text-align: right; - vertical-align: middle; - } .inlinetask { padding: 10px; border: 2px solid gray; @@ -193,12 +180,13 @@ .org-svg { width: 90%; } /*]]>*/--> </style> +<link rel="Blog stylesheet" type="text/css" href="blog.css" /> <script type="text/javascript"> /* @licstart The following is the entire license notice for the JavaScript code in this tag. -Copyright (C) 2012-2020 Free Software Foundation, Inc. +Copyright (C) 2012-2018 Free Software Foundation, Inc. The JavaScript code in this tag is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -242,16 +230,22 @@ for the JavaScript code in this tag. </head> <body> <div id="content"> +<h1 class="title">COVID-19 PubSeq (part 4)</h1> <div id="table-of-contents"> <h2>Table of Contents</h2> <div id="text-table-of-contents"> <ul> -<li><a href="#org31c224e">1. Modify Metadata</a></li> +<li><a href="#org871ad58">1. Modify Metadata</a></li> +<li><a href="#org07e8755">2. What is the schema?</a></li> +<li><a href="#org4857280">3. How is the website generated?</a></li> +<li><a href="#orge709ae2">4. Modifying the schema</a></li> </ul> </div> </div> -<div id="outline-container-org31c224e" class="outline-2"> -<h2 id="org31c224e"><span class="section-number-2">1</span> Modify Metadata</h2> + + +<div id="outline-container-org871ad58" class="outline-2"> +<h2 id="org871ad58"><span class="section-number-2">1</span> Modify Metadata</h2> <div class="outline-text-2" id="text-1"> <p> The public sequence resource uses multiple data formats listed on the @@ -265,13 +259,56 @@ data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>. <p> In this BLOG we are going to look at the metadata entered on the -<a href="./">COVID-19 PubSeq</a> website (or command line client). +<a href="./">COVID-19 PubSeq</a> website (or command line client). It is important to +understand that anyone, including you, can change that information! +</p> +</div> +</div> + +<div id="outline-container-org07e8755" class="outline-2"> +<h2 id="org07e8755"><span class="section-number-2">2</span> What is the schema?</h2> +<div class="outline-text-2" id="text-2"> +<p> +The default metadata schema is listed <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">here</a>. +</p> +</div> +</div> + +<div id="outline-container-org4857280" class="outline-2"> +<h2 id="org4857280"><span class="section-number-2">3</span> How is the website generated?</h2> +<div class="outline-text-2" id="text-3"> +<p> +Using the schema we use <a href="https://pypi.org/project/PyShEx/">pyshex</a> shex expressions and <a href="https://github.com/common-workflow-language/schema_salad">schema salad</a> to +generate the <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47">input form</a>, <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13">validate</a> the user input and to build <a href="https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24">RDF</a>! +All from that one metadata schema. +</p> +</div> +</div> + +<div id="outline-container-orge709ae2" class="outline-2"> +<h2 id="orge709ae2"><span class="section-number-2">4</span> Modifying the schema</h2> +<div class="outline-text-2" id="text-4"> +<p> +One of the first things we wanted to do is to add a field for the data +license. Initially we only support CC-4.0 as a license by default, but +now we want to give uploaders the option to make it an even more +liberal CC0 license. The first step is to find a good ontology term +for the field. Searching for `creative commons cc0 rdf' rendered this +useful <a href="https://creativecommons.org/ns">page</a>. We also find an <a href="https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview">overview</a> where CC0 is represented as URI +<a href="https://creativecommons.org/publicdomain/zero/1.0/">https://creativecommons.org/publicdomain/zero/1.0/</a>. Meanwhile the +attribution license <a href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</a>. +According to this <a href="https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf">document</a> we should really also add fields for +attributionName and attributionURL. +</p> + +<p> +<i>Note: work in progress</i> </p> </div> </div> </div> <div id="postamble" class="status"> -<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-30 Sat 11:59</small>. +<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>. </div> </body> </html> diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org index 8d7504e..aa06d5e 100644 --- a/doc/blog/using-covid-19-pubseq-part5.org +++ b/doc/blog/using-covid-19-pubseq-part5.org @@ -1,3 +1,20 @@ +#+TITLE: COVID-19 PubSeq (part 4) +#+AUTHOR: Pjotr Prins +# C-c C-e h h publish +# C-c ! insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time) +# C-c C-t task rotate +# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png + +#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" /> + + +* Table of Contents :TOC:noexport: + - [[#modify-metadata][Modify Metadata]] + - [[#what-is-the-schema][What is the schema?]] + - [[#how-is-the-website-generated][How is the website generated?]] + - [[#modifying-the-schema][Modifying the schema]] + - [[#adding-fields-to-the-form][Adding fields to the form]] + * Modify Metadata The public sequence resource uses multiple data formats listed on the @@ -10,8 +27,102 @@ data are listed [[./blog?id=using-covid-19-pubseq-part1][here]]. In this BLOG we are going to look at the metadata entered on the [[./][COVID-19 PubSeq]] website (or command line client). It is important to -understand that you and us can change that information. +understand that anyone, including you, can change that information! * What is the schema? +The default metadata schema is listed [[https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml][here]]. + * How is the website generated? + +Using the schema we use [[https://pypi.org/project/PyShEx/][pyshex]] shex expressions and [[https://github.com/common-workflow-language/schema_salad][schema salad]] to +generate the [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20simplewebuploader/templates/form.html#L47][input form]], [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/bh20sequploader/qc_metadata.py#L13][validate]] the user input and to build [[https://github.com/arvados/bh20-seq-resource/blob/edb17e7f7caebfa1e76b21006b1772a33f4f7887/workflows/pangenome-generate/merge-metadata.py#L24][RDF]]! +All from that one metadata schema. + +* Modifying the schema + +One of the first things we want to do is to add a field for the data +license. Initially we only support CC-4.0 as a license by default, but +now we want to give uploaders the option to make it an even more +liberal CC0 license. The first step is to find a good ontology term +for the field. Searching for `creative commons cc0 rdf' rendered this +useful [[https://creativecommons.org/ns][page]]. We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI +https://creativecommons.org/publicdomain/zero/1.0/. Meanwhile the +attribution license https://creativecommons.org/licenses/by/4.0/. +According to this [[https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf][document]] we should really also add fields for +attributionName and attributionURL. + +A minimal triple should be + +: id xhtml:license <http://creativecommons.org/licenses/by/4.0/> . + +Other suggestions are + +: id dc:title "Description" . +: id cc:attributionName "Your Name" . +: id cc:attributionURL <http://resource.org/id> + +and 'dc:source' which indicates the original source of any modified +work, specified as a URI. +The prefix 'cc:' is an abbreviation for http://creativecommons.org/ns#. + +Going back to the schema, where does it fit? Under host, sample, +virus, technology or submitter block? It could fit under sample, but +actually the license concerns the whole metadata block and sequence, +so I think we can fit under its own license tag. For example + + +id: placeholder + +: license: +: license_type: http://creativecommons.org/licenses/by/4.0/ +: attribution_title: "Sample ID" +: attribution_name: "John doe, Joe Boe, Jonny Oe" +: attribution_url: http://covid19.genenetwork.org/id +: attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888 + +So, let's update the example. Notice the license info is optional - if it is missing +we just assume the default CC-4.0. + +One thing that is interesting is that in the name space https://creativecommons.org/ns there +is no mention of a title. I think it is useful, however, because we have no such field. +So, we'll add it simply as a title field. Now the draft schema is + +#+BEGIN_SRC js +- name: licenseSchema + type: record + fields: + license_type: + doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#License + title: + doc: Attribution title related to license + type: string? + jsonldPredicate: + _id: http://semanticscience.org/resource/SIO_001167 + attribution_url: + doc: Attribution URL related to license + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work + attribution_source: + doc: Attribution source URL + type: string? + jsonldPredicate: + _id: https://creativecommons.org/ns#Work +#+END_SRC + +Now, we are no ontology experts, right? So, next we submit a patch to +our source tree and ask for feedback before wiring it up in the data +entry form. The pull request was submitted [[https://github.com/arvados/bh20-seq-resource/pull/97][here]] and reviewed on the +gitter channel and I merged it. + +* Adding fields to the form + +To add the new fields to the form we have to modify it a little. If we +go to the upload form we need to add the license box. The schema is +loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function. + +/Note: work in progress/ diff --git a/doc/web/about.html b/doc/web/about.html index c907e6c..9b16c92 100644 --- a/doc/web/about.html +++ b/doc/web/about.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <head> -<!-- 2020-05-29 Fri 08:27 --> +<!-- 2020-07-12 Sun 06:29 --> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>About/FAQ</title> @@ -161,19 +161,6 @@ .footdef { margin-bottom: 1em; } .figure { padding: 1em; } .figure p { text-align: center; } - .equation-container { - display: table; - text-align: center; - width: 100%; - } - .equation { - vertical-align: middle; - } - .equation-label { - display: table-cell; - text-align: right; - vertical-align: middle; - } .inlinetask { padding: 10px; border: 2px solid gray; @@ -198,7 +185,7 @@ @licstart The following is the entire license notice for the JavaScript code in this tag. -Copyright (C) 2012-2020 Free Software Foundation, Inc. +Copyright (C) 2012-2018 Free Software Foundation, Inc. The JavaScript code in this tag is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -247,29 +234,29 @@ for the JavaScript code in this tag. <h2>Table of Contents</h2> <div id="text-table-of-contents"> <ul> -<li><a href="#org783b5e9">1. What is the 'public sequence resource' about?</a></li> -<li><a href="#org2c0bcfd">2. Who created the public sequence resource?</a></li> -<li><a href="#org34070d3">3. How does the public sequence resource compare to other data resources?</a></li> -<li><a href="#org64a9493">4. Why should I upload my data here?</a></li> -<li><a href="#orgf898e7f">5. Why should I not upload by data here?</a></li> -<li><a href="#org828e164">6. How does the public sequence resource work?</a></li> -<li><a href="#org7b0d03f">7. Who uses the public sequence resource?</a></li> -<li><a href="#org31aaf23">8. Is this about open data?</a></li> -<li><a href="#orgb376b6c">9. Is this about free software?</a></li> -<li><a href="#orgf19cd96">10. How do I upload raw data?</a></li> -<li><a href="#orgebfed00">11. How do I change metadata?</a></li> -<li><a href="#orge2aecf8">12. How do I change the work flows?</a></li> -<li><a href="#orgd45b3bc">13. How do I change the source code?</a></li> -<li><a href="#org2bb9455">14. Should I choose CC-BY or CC0?</a></li> -<li><a href="#org62bf23f">15. How do I deal with private data and privacy?</a></li> -<li><a href="#org40c6da0">16. How do I communicate with you?</a></li> -<li><a href="#org1f27c44">17. Who are the sponsors?</a></li> +<li><a href="#orgac6ad8b">1. What is the 'public sequence resource' about?</a></li> +<li><a href="#org0c21c2e">2. Who created the public sequence resource?</a></li> +<li><a href="#org3fb8cb3">3. How does the public sequence resource compare to other data resources?</a></li> +<li><a href="#org6cd9ea2">4. Why should I upload my data here?</a></li> +<li><a href="#org0b6e3fb">5. Why should I not upload by data here?</a></li> +<li><a href="#org3eb3a4e">6. How does the public sequence resource work?</a></li> +<li><a href="#org7a397f5">7. Who uses the public sequence resource?</a></li> +<li><a href="#org92cb008">8. Is this about open data?</a></li> +<li><a href="#org232d6fa">9. Is this about free software?</a></li> +<li><a href="#orgd93869f">10. How do I upload raw data?</a></li> +<li><a href="#org88e8b0a">11. How do I change metadata?</a></li> +<li><a href="#orgd04b8f8">12. How do I change the work flows?</a></li> +<li><a href="#org5d1ee05">13. How do I change the source code?</a></li> +<li><a href="#orgae6461b">14. Should I choose CC-BY or CC0?</a></li> +<li><a href="#org3ea90a9">15. How do I deal with private data and privacy?</a></li> +<li><a href="#org7ff7106">16. How do I communicate with you?</a></li> +<li><a href="#org9566fa7">17. Who are the sponsors?</a></li> </ul> </div> </div> -<div id="outline-container-org783b5e9" class="outline-2"> -<h2 id="org783b5e9"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2> +<div id="outline-container-orgac6ad8b" class="outline-2"> +<h2 id="orgac6ad8b"><span class="section-number-2">1</span> What is the 'public sequence resource' about?</h2> <div class="outline-text-2" id="text-1"> <p> The <b>public sequence resource</b> aims to provide a generic and useful @@ -280,17 +267,18 @@ sequence comparison and protein prediction. </div> </div> -<div id="outline-container-org2c0bcfd" class="outline-2"> -<h2 id="org2c0bcfd"><span class="section-number-2">2</span> Who created the public sequence resource?</h2> +<div id="outline-container-org0c21c2e" class="outline-2"> +<h2 id="org0c21c2e"><span class="section-number-2">2</span> Who created the public sequence resource?</h2> <div class="outline-text-2" id="text-2"> <p> The <b>public sequence resource</b> is an initiative by <a href="https://github.com/arvados/bh20-seq-resource/graphs/contributors">bioinformatics</a> and ontology experts who want to create something agile and useful for the wider research community. The initiative started at the COVID-19 biohackathon in April 2020 and is ongoing. The main project drivers -are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common -Workflow Language), Thomas Liener (consultant, formerly EBI) and -Jerven Bolleman (Swiss Institute of Bioinformatics). +are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino +(University of Rome Tor Vergata), Michael Crusoe (Common Workflow +Language), Thomas Liener (consultant, formerly EBI), Erik Garrison +(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics). </p> <p> @@ -301,8 +289,8 @@ wrangling experts. Thank you everyone! </div> </div> -<div id="outline-container-org34070d3" class="outline-2"> -<h2 id="org34070d3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2> +<div id="outline-container-org3fb8cb3" class="outline-2"> +<h2 id="org3fb8cb3"><span class="section-number-2">3</span> How does the public sequence resource compare to other data resources?</h2> <div class="outline-text-2" id="text-3"> <p> The short version is that we use state-of-the-art practices in @@ -312,17 +300,18 @@ to building out this resource! </p> <p> -Importantly: all data is published under the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 -attribution license</a> which means it data can be published and workflows -can run in public environments allowing for improved access for -research and reproducible results. This contrasts with some other -public resources, including GISAID. +Importantly: all data is published under either the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons +4.0 attribution license</a> or the <a href="https://creativecommons.org/share-your-work/public-domain/cc0/">CC0 “No Rights Reserved” license</a> which +means it data can be published and workflows can run in public +environments allowing for improved access for research and +reproducible results. This contrasts with some other public resources, +including GISAID. </p> </div> </div> -<div id="outline-container-org64a9493" class="outline-2"> -<h2 id="org64a9493"><span class="section-number-2">4</span> Why should I upload my data here?</h2> +<div id="outline-container-org6cd9ea2" class="outline-2"> +<h2 id="org6cd9ea2"><span class="section-number-2">4</span> Why should I upload my data here?</h2> <div class="outline-text-2" id="text-4"> <ol class="org-ol"> <li>We champion truly shareable data without licensing restrictions - with proper @@ -353,8 +342,8 @@ multiple resources. </div> </div> -<div id="outline-container-orgf898e7f" class="outline-2"> -<h2 id="orgf898e7f"><span class="section-number-2">5</span> Why should I not upload by data here?</h2> +<div id="outline-container-org0b6e3fb" class="outline-2"> +<h2 id="org0b6e3fb"><span class="section-number-2">5</span> Why should I not upload by data here?</h2> <div class="outline-text-2" id="text-5"> <p> Funny question. There are only good reasons to upload your data here @@ -376,8 +365,8 @@ for bulk uploads! </div> </div> -<div id="outline-container-org828e164" class="outline-2"> -<h2 id="org828e164"><span class="section-number-2">6</span> How does the public sequence resource work?</h2> +<div id="outline-container-org3eb3a4e" class="outline-2"> +<h2 id="org3eb3a4e"><span class="section-number-2">6</span> How does the public sequence resource work?</h2> <div class="outline-text-2" id="text-6"> <p> On uploading a sequence with metadata it will automatically be @@ -388,8 +377,8 @@ using workflows from the High Performance Open Biology Lab defined </div> </div> -<div id="outline-container-org7b0d03f" class="outline-2"> -<h2 id="org7b0d03f"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2> +<div id="outline-container-org7a397f5" class="outline-2"> +<h2 id="org7a397f5"><span class="section-number-2">7</span> Who uses the public sequence resource?</h2> <div class="outline-text-2" id="text-7"> <p> The Swiss Institute of Bioinformatics has included this data in @@ -397,14 +386,18 @@ The Swiss Institute of Bioinformatics has included this data in </p> <p> +The Pantograph <a href="https://graph-genome.github.io/">viewer</a> uses PubSeq data for their visualisations. +</p> + +<p> <a href="https://uthsc.edu">UTHSC</a> and <a href="https://www.ornl.gov/news/ornl-fight-against-covid-19">ORNL</a> use COVID-19 PubSeq data for protein prediction and drug development. </p> </div> </div> -<div id="outline-container-org31aaf23" class="outline-2"> -<h2 id="org31aaf23"><span class="section-number-2">8</span> Is this about open data?</h2> +<div id="outline-container-org92cb008" class="outline-2"> +<h2 id="org92cb008"><span class="section-number-2">8</span> Is this about open data?</h2> <div class="outline-text-2" id="text-8"> <p> All data is published under a <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 attribution license</a> @@ -414,8 +407,8 @@ data and store it for further processing. </div> </div> -<div id="outline-container-orgb376b6c" class="outline-2"> -<h2 id="orgb376b6c"><span class="section-number-2">9</span> Is this about free software?</h2> +<div id="outline-container-org232d6fa" class="outline-2"> +<h2 id="org232d6fa"><span class="section-number-2">9</span> Is this about free software?</h2> <div class="outline-text-2" id="text-9"> <p> Absolutely. Free software allows for fully reproducible pipelines. You @@ -424,8 +417,8 @@ can take our workflows and data and run it elsewhere! </div> </div> -<div id="outline-container-orgf19cd96" class="outline-2"> -<h2 id="orgf19cd96"><span class="section-number-2">10</span> How do I upload raw data?</h2> +<div id="outline-container-orgd93869f" class="outline-2"> +<h2 id="orgd93869f"><span class="section-number-2">10</span> How do I upload raw data?</h2> <div class="outline-text-2" id="text-10"> <p> We are preparing raw sequence data pipelines (fastq and BAM). The @@ -440,8 +433,8 @@ assembly variations into consideration. This is all work in progress. </div> </div> -<div id="outline-container-orgebfed00" class="outline-2"> -<h2 id="orgebfed00"><span class="section-number-2">11</span> How do I change metadata?</h2> +<div id="outline-container-org88e8b0a" class="outline-2"> +<h2 id="org88e8b0a"><span class="section-number-2">11</span> How do I change metadata?</h2> <div class="outline-text-2" id="text-11"> <p> See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>! @@ -449,8 +442,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork </div> </div> -<div id="outline-container-orge2aecf8" class="outline-2"> -<h2 id="orge2aecf8"><span class="section-number-2">12</span> How do I change the work flows?</h2> +<div id="outline-container-orgd04b8f8" class="outline-2"> +<h2 id="orgd04b8f8"><span class="section-number-2">12</span> How do I change the work flows?</h2> <div class="outline-text-2" id="text-12"> <p> See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork.org/blog</a>! @@ -458,8 +451,8 @@ See the <a href="http://covid19.genenetwork.org/blog">http://covid19.genenetwork </div> </div> -<div id="outline-container-orgd45b3bc" class="outline-2"> -<h2 id="orgd45b3bc"><span class="section-number-2">13</span> How do I change the source code?</h2> +<div id="outline-container-org5d1ee05" class="outline-2"> +<h2 id="org5d1ee05"><span class="section-number-2">13</span> How do I change the source code?</h2> <div class="outline-text-2" id="text-13"> <p> Go to our <a href="https://github.com/arvados/bh20-seq-resource">source code repositories</a>, fork/clone the repository, change @@ -469,8 +462,8 @@ many PRs we already merged. </div> </div> -<div id="outline-container-org2bb9455" class="outline-2"> -<h2 id="org2bb9455"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2> +<div id="outline-container-orgae6461b" class="outline-2"> +<h2 id="orgae6461b"><span class="section-number-2">14</span> Should I choose CC-BY or CC0?</h2> <div class="outline-text-2" id="text-14"> <p> Restrictive data licenses are hampering data sharing and reproducible @@ -486,8 +479,8 @@ In all honesty: we prefer both data and software to be free. </div> </div> -<div id="outline-container-org62bf23f" class="outline-2"> -<h2 id="org62bf23f"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2> +<div id="outline-container-org3ea90a9" class="outline-2"> +<h2 id="org3ea90a9"><span class="section-number-2">15</span> How do I deal with private data and privacy?</h2> <div class="outline-text-2" id="text-15"> <p> A public sequence resource is about public data. Metadata can refer to @@ -498,8 +491,8 @@ plan to combine identifiers with clinical data stored securely at </div> </div> -<div id="outline-container-org40c6da0" class="outline-2"> -<h2 id="org40c6da0"><span class="section-number-2">16</span> How do I communicate with you?</h2> +<div id="outline-container-org7ff7106" class="outline-2"> +<h2 id="org7ff7106"><span class="section-number-2">16</span> How do I communicate with you?</h2> <div class="outline-text-2" id="text-16"> <p> We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&utm_medium=link&utm_campaign=share-link">gitter channel</a> you can join. @@ -507,8 +500,8 @@ We use a <a href="https://gitter.im/arvados/pubseq?utm_source=share-link&utm </div> </div> -<div id="outline-container-org1f27c44" class="outline-2"> -<h2 id="org1f27c44"><span class="section-number-2">17</span> Who are the sponsors?</h2> +<div id="outline-container-org9566fa7" class="outline-2"> +<h2 id="org9566fa7"><span class="section-number-2">17</span> Who are the sponsors?</h2> <div class="outline-text-2" id="text-17"> <p> The main sponsors are listed in the footer. In addition to the time @@ -519,7 +512,7 @@ for donating COVID-19 related compute time. </div> </div> <div id="postamble" class="status"> -<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-29 Fri 08:26</small>. +<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 04:54</small>. </div> </body> </html> diff --git a/doc/web/about.org b/doc/web/about.org index b6387e7..ad13bc3 100644 --- a/doc/web/about.org +++ b/doc/web/about.org @@ -33,9 +33,10 @@ The *public sequence resource* is an initiative by [[https://github.com/arvados/ ontology experts who want to create something agile and useful for the wider research community. The initiative started at the COVID-19 biohackathon in April 2020 and is ongoing. The main project drivers -are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Michael Crusoe (Common -Workflow Language), Thomas Liener (consultant, formerly EBI) and -Jerven Bolleman (Swiss Institute of Bioinformatics). +are Pjotr Prins (UTHSC), Peter Amstutz (Curii), Andrea Guarracino +(University of Rome Tor Vergata), Michael Crusoe (Common Workflow +Language), Thomas Liener (consultant, formerly EBI), Erik Garrison +(UCSC) and Jerven Bolleman (Swiss Institute of Bioinformatics). Notably, as this is a free software initiative, the project represents major work by hundreds of software developers and ontology and data @@ -48,11 +49,12 @@ bioinformatics using agile methods. Unlike the resources from large institutes we can improve things on a dime and anyone can contribute to building out this resource! -Importantly: all data is published under the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons 4.0 -attribution license]] which means it data can be published and workflows -can run in public environments allowing for improved access for -research and reproducible results. This contrasts with some other -public resources, including GISAID. +Importantly: all data is published under either the [[https://creativecommons.org/licenses/by/4.0/][Creative Commons +4.0 attribution license]] or the [[https://creativecommons.org/share-your-work/public-domain/cc0/][CC0 “No Rights Reserved” license]] which +means it data can be published and workflows can run in public +environments allowing for improved access for research and +reproducible results. This contrasts with some other public resources, +including GISAID. * Why should I upload my data here? @@ -105,6 +107,8 @@ using workflows from the High Performance Open Biology Lab defined The Swiss Institute of Bioinformatics has included this data in https://covid-19-sparql.expasy.org/ and made it part of [[https://www.uniprot.org/][Uniprot]]. +The Pantograph [[https://graph-genome.github.io/][viewer]] uses PubSeq data for their visualisations. + [[https://uthsc.edu][UTHSC]] and [[https://www.ornl.gov/news/ornl-fight-against-covid-19][ORNL]] use COVID-19 PubSeq data for protein prediction and drug development. diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml index 432877c..54736f8 100644 --- a/example/maximum_metadata_example.yaml +++ b/example/maximum_metadata_example.yaml @@ -1,5 +1,11 @@ id: placeholder +license: + license_type: http://creativecommons.org/licenses/by/4.0/ + title: "Sample" + attribution_name: "John doe, Joe Boe, Jonny Oe" + attribution_url: http://covid19.genenetwork.org/id + host: host_id: XX1 host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606 @@ -13,9 +19,9 @@ host: additional_host_information: Optional free text field for additional information sample: - sample_id: Id of the sample as defined by the submitter + sample_id: Id of the sample as defined by the submitter collector_name: Name of the person that took the sample - collecting_institution: Institute that was responsible of sampling + collecting_institution: Institute that was responsible of sampling specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835] collection_date: "2020-01-01" collection_location: http://www.wikidata.org/entity/Q148 diff --git a/example/minimal_metadata_example.yaml b/example/minimal_metadata_example.yaml index 51f8a87..1b46cc7 100644 --- a/example/minimal_metadata_example.yaml +++ b/example/minimal_metadata_example.yaml @@ -1,5 +1,9 @@ id: placeholder + +license: + license_type: http://creativecommons.org/licenses/by/4.0/ + host: host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606 @@ -15,4 +19,4 @@ technology: sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632] submitter: - authors: [John Doe]
\ No newline at end of file + authors: [John Doe] |