From b9691c7deae30bd6422fb7b0681572b7b6f78ae3 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:16:11 +0100
Subject: Web: add license to input form
---
bh20sequploader/bh20seq-schema.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index b3d4d12..29ac22c 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -15,7 +15,7 @@ $graph:
fields:
license_type:
doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
- type: string?
+ type: string
jsonldPredicate:
_id: https://creativecommons.org/ns#License
title:
@@ -258,6 +258,7 @@ $graph:
virus: virusSchema
technology: technologySchema
submitter: submitterSchema
+ license: licenseSchema
id:
doc: The subject (eg the fasta/fastq file) that the metadata describes
type: string
--
cgit v1.2.3
From f4ed46dae20abe5147871495ede2d6ac2b0854bc Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:30:56 +0100
Subject: Add RDF output
---
bh20sequploader/bh20seq-schema.yml | 9 +++++++--
bh20sequploader/bh20seq-shex.rdf | 24 +++++++++++++++++-------
doc/blog/using-covid-19-pubseq-part5.org | 2 ++
3 files changed, 26 insertions(+), 9 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 29ac22c..c690e8a 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -23,16 +23,21 @@ $graph:
type: string?
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_001167
+ attribution_name:
+ doc: Attribution NAME related to data license
+ type: string?
+ jsonldPredicate:
+ _id: https://creativecommons.org/ns#attributionName
attribution_url:
doc: Attribution URL related to data license
type: string?
jsonldPredicate:
- _id: https://creativecommons.org/ns#Work
+ _id: https://creativecommons.org/ns#attributionURL
attribution_source:
doc: Attribution source URL related to data license
type: string?
jsonldPredicate:
- _id: https://creativecommons.org/ns#Work
+ _id: https://creativecommons.org/ns#attributionSource
- name: hostSchema
type: record
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 965229c..c48267d 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -1,6 +1,7 @@
PREFIX :
PREFIX MainSchema:
PREFIX hostSchema:
+PREFIX cc:
PREFIX xsd:
PREFIX obo:
PREFIX sio:
@@ -15,10 +16,11 @@ PREFIX wikidata:
MainSchema:submitter @:submitterShape ;
MainSchema:technology @:technologyShape ;
MainSchema:virus @:virusShape;
+ MainSchema:license @:licenseShape;
}
:hostShape {
- efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
+ efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
sio:SIO_000115 xsd:string ?;
obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 obo:PATO_0001340] ?;
obo:PATO_0000011 xsd:integer ?;
@@ -32,14 +34,14 @@ PREFIX wikidata:
:sampleShape {
sio:SIO_000115 xsd:string;
- evs:C25164 xsd:string;
- obo:GAZ_00000448 [wikidata:~] ;
+ evs:C25164 xsd:string;
+ obo:GAZ_00000448 [wikidata:~] ;
obo:OBI_0001895 xsd:string ?;
obo:NCIT_C41206 xsd:string ?;
obo:OBI_0001479 IRI {0,2};
obo:OBI_0001472 xsd:string ?;
sio:SIO_001167 xsd:string ?;
- edam:data_2091 IRI {0,3};
+ edam:data_2091 IRI {0,3};
}
:submitterShape {
@@ -47,7 +49,7 @@ PREFIX wikidata:
sio:SIO_000116 xsd:string *;
sio:SIO_000172 xsd:string ?;
obo:NCIT_C37984 xsd:string ?;
- obo:NCIT_C37900 xsd:string ?;
+ obo:NCIT_C37900 xsd:string ?;
efo:EFO_0001741 xsd:string ?;
obo:NCIT_C42781 xsd:string ?;
obo:NCIT_C19026 xsd:string ?;
@@ -63,6 +65,14 @@ PREFIX wikidata:
}
:virusShape{
- edam:data_1875 [ obo:NCBITaxon_~ ] ;
- sio:SIO_010055 xsd:string ?;
+ edam:data_1875 [ obo:NCBITaxon_~ ] ;
+ sio:SIO_010055 xsd:string ?;
}
+
+:licenseShape{
+ cc:License xsd:string;
+ sio:SIO_001167 xsd:string ?;
+ cc:attributionName xsd:string ?;
+ cc:attributionURL xsd:string ?;
+ cc:attributionSource xsd:string ?;
+}
\ No newline at end of file
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index aa06d5e..cb11f43 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -125,4 +125,6 @@ To add the new fields to the form we have to modify it a little. If we
go to the upload form we need to add the license box. The schema is
loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
+With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
+
/Note: work in progress/
--
cgit v1.2.3
From 712614e5627e54df7ec6ab975dc86a1055051455 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:54:59 +0100
Subject: License RDF
---
bh20sequploader/bh20seq-schema.yml | 3 ++-
bh20sequploader/bh20seq-shex.rdf | 3 ++-
doc/blog/using-covid-19-pubseq-part5.org | 29 +++++++++++++++++++++++------
3 files changed, 27 insertions(+), 8 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index c690e8a..ef55c55 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -1,6 +1,7 @@
$base: http://biohackathon.org/bh20-seq-schema
$namespaces:
cc: http://creativecommons.org/ns#
+ dc: http://purl.org/metadata/dublin_core_elements#
sch: https://schema.org/
efo: http://www.ebi.ac.uk/efo/
obo: http://purl.obolibrary.org/obo/
@@ -22,7 +23,7 @@ $graph:
doc: Attribution title related to data license
type: string?
jsonldPredicate:
- _id: http://semanticscience.org/resource/SIO_001167
+ _id: http://purl.org/metadata/dublin_core_elements#Title
attribution_name:
doc: Attribution NAME related to data license
type: string?
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index c48267d..9fab334 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -2,6 +2,7 @@ PREFIX :
PREFIX hostSchema:
PREFIX cc:
+PREFIX dc:
PREFIX xsd:
PREFIX obo:
PREFIX sio:
@@ -71,7 +72,7 @@ PREFIX wikidata:
:licenseShape{
cc:License xsd:string;
- sio:SIO_001167 xsd:string ?;
+ dc:Title xsd:string ?;
cc:attributionName xsd:string ?;
cc:attributionURL xsd:string ?;
cc:attributionSource xsd:string ?;
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index cb11f43..98c2c31 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -14,19 +14,20 @@
- [[#how-is-the-website-generated][How is the website generated?]]
- [[#modifying-the-schema][Modifying the schema]]
- [[#adding-fields-to-the-form][Adding fields to the form]]
+ - [[#testing-the-license-fields][Testing the license fields]]
* Modify Metadata
The public sequence resource uses multiple data formats listed on the
-[[./download][DOWNLOAD]] page. One of the most exciting features is the full support
+[[http://covid19.genenetwork.org/download][download]] page. One of the most exciting features is the full support
for RDF and semantic web/linked data ontologies. This technology
allows for querying data in unprescribed ways - that is, you can
formulate your own queries without dealing with a preset model of that
data (so typical of CSV files and SQL tables). Examples of exploring
-data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
+data are listed [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part1][here]].
In this BLOG we are going to look at the metadata entered on the
-[[./][COVID-19 PubSeq]] website (or command line client). It is important to
+COVID-19 PubSeq website (or command line client). It is important to
understand that anyone, including you, can change that information!
* What is the schema?
@@ -42,8 +43,8 @@ All from that one metadata schema.
* Modifying the schema
One of the first things we want to do is to add a field for the data
-license. Initially we only support CC-4.0 as a license by default, but
-now we want to give uploaders the option to make it an even more
+license. Initially we only supported CC-4.0 as a license, but
+we wanted to give uploaders the option to use an even more
liberal CC0 license. The first step is to find a good ontology term
for the field. Searching for `creative commons cc0 rdf' rendered this
useful [[https://creativecommons.org/ns][page]]. We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI
@@ -127,4 +128,20 @@ loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e
With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
-/Note: work in progress/
+Finally, to make RDF output work we need to add expressions to bh20seq-shex.rdf. This
+was done with this [[https://github.com/arvados/bh20-seq-resource/commit/f4ed46dae20abe5147871495ede2d6ac2b0854bc][patch]]. In the end we decided to use the Dublin core title,
+http://purl.org/metadata/dublin_core_elements#Title:
+
+#+BEGIN_SRC js
+:licenseShape{
+ cc:License xsd:string;
+ dc:Title xsd:string ?;
+ cc:attributionName xsd:string ?;
+ cc:attributionURL xsd:string ?;
+ cc:attributionSource xsd:string ?;
+}
+#+END_SRC
+
+Note that cc:AttributionSource is not really defined in the cc standard.
+
+* TODO Testing the license fields
--
cgit v1.2.3
From 01b192417f858d4389226b5130a430bd3b6d4416 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 16 Jul 2020 09:25:58 +0100
Subject: Make license optional for now
---
bh20sequploader/bh20seq-schema.yml | 2 +-
bh20sequploader/bh20seq-shex.rdf | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ef55c55..ee852fa 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
fields:
license_type:
doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
- type: string
+ type: string?
jsonldPredicate:
_id: https://creativecommons.org/ns#License
title:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 9fab334..7331e86 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -71,7 +71,7 @@ PREFIX wikidata:
}
:licenseShape{
- cc:License xsd:string;
+ cc:License xsd:string ?;
dc:Title xsd:string ?;
cc:attributionName xsd:string ?;
cc:attributionURL xsd:string ?;
--
cgit v1.2.3
From a10569f51072569604b2384e6e4d583b36de73c4 Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 12:01:38 -0400
Subject: Make license optional for now
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz
---
bh20sequploader/bh20seq-schema.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ee852fa..0aead3b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
fields:
license_type:
doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
- type: string?
+ type: string
jsonldPredicate:
_id: https://creativecommons.org/ns#License
title:
@@ -264,7 +264,7 @@ $graph:
virus: virusSchema
technology: technologySchema
submitter: submitterSchema
- license: licenseSchema
+ license: ["null", licenseSchema]
id:
doc: The subject (eg the fasta/fastq file) that the metadata describes
type: string
--
cgit v1.2.3
From 0e84b18cb134855d572d1f94d5d3c43571afe7e9 Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 12:04:26 -0400
Subject: Make license optional
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz
---
bh20sequploader/bh20seq-shex.rdf | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 7331e86..bbc7309 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -17,7 +17,7 @@ PREFIX wikidata:
MainSchema:submitter @:submitterShape ;
MainSchema:technology @:technologyShape ;
MainSchema:virus @:virusShape;
- MainSchema:license @:licenseShape;
+ MainSchema:license @:licenseShape ?;
}
:hostShape {
@@ -71,7 +71,7 @@ PREFIX wikidata:
}
:licenseShape{
- cc:License xsd:string ?;
+ cc:License xsd:string ;
dc:Title xsd:string ?;
cc:attributionName xsd:string ?;
cc:attributionURL xsd:string ?;
--
cgit v1.2.3
From d34374f0e822edd1539ea5de6f8522f2b761de3f Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 14:48:22 -0400
Subject: Improve uploader reporting.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz
---
bh20sequploader/main.py | 22 ++++++++++------------
bh20simplewebuploader/main.py | 4 ++--
bh20simplewebuploader/templates/error.html | 2 +-
bh20simplewebuploader/templates/success.html | 2 +-
4 files changed, 14 insertions(+), 16 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index f744a8c..6049bf9 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -29,11 +29,10 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
try:
log.debug("Checking metadata" if do_qc else "Skipping metadata check")
if do_qc and not qc_metadata(metadata.name):
- log.warning("Failed metadata qc")
+ log.warning("Failed metadata QC")
failed = True
except Exception as e:
- log.debug(e)
- print(e)
+ log.exception("Failed metadata QC")
failed = True
target = []
@@ -45,8 +44,7 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
target[0] = ("reads_1."+target[0][0][6:], target[0][1])
target[1] = ("reads_2."+target[1][0][6:], target[0][1])
except Exception as e:
- log.debug(e)
- print(e)
+ log.exception("Failed sequence QC")
failed = True
if failed:
@@ -82,7 +80,7 @@ def main():
seqlabel = target[0][1]
if args.validate:
- print("Valid")
+ log.info("Valid")
exit(0)
col = arvados.collection.Collection(api_client=api)
@@ -91,10 +89,10 @@ def main():
if args.sequence_p2:
upload_sequence(col, target[1], args.sequence_p2)
- print("Reading metadata")
+ log.info("Reading metadata")
with col.open("metadata.yaml", "w") as f:
r = args.metadata.read(65536)
- print(r[0:20])
+ log.info(r[0:20])
while r:
f.write(r)
r = args.metadata.read(65536)
@@ -118,7 +116,7 @@ def main():
["portable_data_hash", "=", col.portable_data_hash()]]).execute()
if dup["items"]:
# This exact collection has been uploaded before.
- print("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+ log.error("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
exit(1)
if args.trusted:
@@ -131,9 +129,9 @@ def main():
(seqlabel, properties['upload_user'], properties['upload_ip']),
properties=properties, ensure_unique_name=True)
- print("Saved to %s" % col.manifest_locator())
-
- print("Done")
+ log.info("Saved to %s" % col.manifest_locator())
+ log.info("Done")
+ exit(0)
if __name__ == "__main__":
main()
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 62b68d9..c814f30 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -445,12 +445,12 @@ def receive_files():
if result.returncode != 0:
# It didn't work. Complain.
- error_message="Uploader returned value {} and said:".format(result.returncode) + str(result.stderr.decode('utf-8'))
+ error_message="Uploader returned value {} and said:\n".format(result.returncode) + str(result.stderr.decode('utf-8'))
print(error_message, file=sys.stderr)
return (render_template('error.html', error_message=error_message), 403)
else:
# It worked. Say so.
- return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace'))
+ return render_template('success.html', log=result.stderr.decode('utf-8', errors='replace'))
finally:
shutil.rmtree(dest_dir)
diff --git a/bh20simplewebuploader/templates/error.html b/bh20simplewebuploader/templates/error.html
index b1d9402..fc08aed 100644
--- a/bh20simplewebuploader/templates/error.html
+++ b/bh20simplewebuploader/templates/error.html
@@ -15,7 +15,7 @@
- Click here to try again.
+ Click here to try again.