From b9691c7deae30bd6422fb7b0681572b7b6f78ae3 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:16:11 +0100
Subject: Web: add license to input form
---
 bh20sequploader/bh20seq-schema.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index b3d4d12..29ac22c 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -15,7 +15,7 @@ $graph:
   fields:
     license_type:
       doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
-      type: string?
+      type: string
       jsonldPredicate:
           _id: https://creativecommons.org/ns#License
     title:
@@ -258,6 +258,7 @@ $graph:
     virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
+    license: licenseSchema
     id:
       doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string
-- 
cgit 1.4.1
From f4ed46dae20abe5147871495ede2d6ac2b0854bc Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:30:56 +0100
Subject: Add RDF output
---
 bh20sequploader/bh20seq-schema.yml       |  9 +++++++--
 bh20sequploader/bh20seq-shex.rdf         | 24 +++++++++++++++++-------
 doc/blog/using-covid-19-pubseq-part5.org |  2 ++
 3 files changed, 26 insertions(+), 9 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 29ac22c..c690e8a 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -23,16 +23,21 @@ $graph:
       type: string?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_001167
+    attribution_name:
+      doc: Attribution NAME related to data license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#attributionName
     attribution_url:
       doc: Attribution URL related to data license
       type: string?
       jsonldPredicate:
-          _id: https://creativecommons.org/ns#Work
+          _id: https://creativecommons.org/ns#attributionURL
     attribution_source:
       doc: Attribution source URL related to data license
       type: string?
       jsonldPredicate:
-          _id: https://creativecommons.org/ns#Work
+          _id: https://creativecommons.org/ns#attributionSource
 
 - name: hostSchema
   type: record
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 965229c..c48267d 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -1,6 +1,7 @@
 PREFIX : 
 PREFIX MainSchema: 
 PREFIX hostSchema: 
+PREFIX cc:  
 PREFIX xsd: 
 PREFIX obo: 
 PREFIX sio: 
@@ -15,10 +16,11 @@ PREFIX wikidata: 
   MainSchema:submitter @:submitterShape ;
   MainSchema:technology @:technologyShape ;
   MainSchema:virus @:virusShape;
+  MainSchema:license @:licenseShape;
 }
 
 :hostShape  {
-  	efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
+    efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
     sio:SIO_000115 xsd:string ?;
     obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 obo:PATO_0001340] ?;
     obo:PATO_0000011 xsd:integer ?;
@@ -32,14 +34,14 @@ PREFIX wikidata: 
 
 :sampleShape  {
     sio:SIO_000115 xsd:string;
-	  evs:C25164 xsd:string;
-	  obo:GAZ_00000448 [wikidata:~] ;
+    evs:C25164 xsd:string;
+    obo:GAZ_00000448 [wikidata:~] ;
     obo:OBI_0001895 xsd:string ?;
     obo:NCIT_C41206 xsd:string ?;
     obo:OBI_0001479 IRI {0,2};
     obo:OBI_0001472 xsd:string ?;
     sio:SIO_001167 xsd:string ?;
-	edam:data_2091 IRI {0,3};
+    edam:data_2091 IRI {0,3};
 }
 
 :submitterShape {
@@ -47,7 +49,7 @@ PREFIX wikidata: 
     sio:SIO_000116 xsd:string *;
     sio:SIO_000172 xsd:string ?;
     obo:NCIT_C37984 xsd:string ?;
-  	obo:NCIT_C37900 xsd:string ?;
+    obo:NCIT_C37900 xsd:string ?;
     efo:EFO_0001741 xsd:string ?;
     obo:NCIT_C42781 xsd:string ?;
     obo:NCIT_C19026 xsd:string ?;
@@ -63,6 +65,14 @@ PREFIX wikidata: 
 }
 
 :virusShape{
-	edam:data_1875 [ obo:NCBITaxon_~ ] ;
-  	sio:SIO_010055 xsd:string ?;
+    edam:data_1875 [ obo:NCBITaxon_~ ] ;
+    sio:SIO_010055 xsd:string ?;
 }
+
+:licenseShape{
+    cc:License xsd:string;
+    sio:SIO_001167 xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
\ No newline at end of file
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index aa06d5e..cb11f43 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -125,4 +125,6 @@ To add the new fields to the form we have to modify it a little. If we
 go to the upload form we need to add the license box. The schema is
 loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229][main.py]] in the 'generate_form' function.
 
+With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
+
 /Note: work in progress/
-- 
cgit 1.4.1
From 712614e5627e54df7ec6ab975dc86a1055051455 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 15 Jul 2020 14:54:59 +0100
Subject: License RDF
---
 bh20sequploader/bh20seq-schema.yml       |  3 ++-
 bh20sequploader/bh20seq-shex.rdf         |  3 ++-
 doc/blog/using-covid-19-pubseq-part5.org | 29 +++++++++++++++++++++++------
 3 files changed, 27 insertions(+), 8 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index c690e8a..ef55c55 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -1,6 +1,7 @@
 $base: http://biohackathon.org/bh20-seq-schema
 $namespaces:
   cc:  http://creativecommons.org/ns#
+  dc:  http://purl.org/metadata/dublin_core_elements#
   sch: https://schema.org/
   efo: http://www.ebi.ac.uk/efo/
   obo: http://purl.obolibrary.org/obo/
@@ -22,7 +23,7 @@ $graph:
       doc: Attribution title related to data license
       type: string?
       jsonldPredicate:
-          _id: http://semanticscience.org/resource/SIO_001167
+          _id: http://purl.org/metadata/dublin_core_elements#Title
     attribution_name:
       doc: Attribution NAME related to data license
       type: string?
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index c48267d..9fab334 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -2,6 +2,7 @@ PREFIX : 
 PREFIX hostSchema: 
 PREFIX cc:  
+PREFIX dc:  
 PREFIX xsd: 
 PREFIX obo: 
 PREFIX sio: 
@@ -71,7 +72,7 @@ PREFIX wikidata: 
 
 :licenseShape{
     cc:License xsd:string;
-    sio:SIO_001167 xsd:string ?;
+    dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
     cc:attributionSource xsd:string ?;
diff --git a/doc/blog/using-covid-19-pubseq-part5.org b/doc/blog/using-covid-19-pubseq-part5.org
index cb11f43..98c2c31 100644
--- a/doc/blog/using-covid-19-pubseq-part5.org
+++ b/doc/blog/using-covid-19-pubseq-part5.org
@@ -14,19 +14,20 @@
  - [[#how-is-the-website-generated][How is the website generated?]]
  - [[#modifying-the-schema][Modifying the schema]]
  - [[#adding-fields-to-the-form][Adding fields to the form]]
+ - [[#testing-the-license-fields][Testing the license fields]]
 
 * Modify Metadata
 
 The public sequence resource uses multiple data formats listed on the
-[[./download][DOWNLOAD]] page. One of the most exciting features is the full support
+[[http://covid19.genenetwork.org/download][download]] page. One of the most exciting features is the full support
 for RDF and semantic web/linked data ontologies. This technology
 allows for querying data in unprescribed ways - that is, you can
 formulate your own queries without dealing with a preset model of that
 data (so typical of CSV files and SQL tables). Examples of exploring
-data are listed [[./blog?id=using-covid-19-pubseq-part1][here]].
+data are listed [[http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part1][here]].
 
 In this BLOG we are going to look at the metadata entered on the
-[[./][COVID-19 PubSeq]] website (or command line client). It is important to
+COVID-19 PubSeq website (or command line client). It is important to
 understand that anyone, including you, can change that information!
 
 * What is the schema?
@@ -42,8 +43,8 @@ All from that one metadata schema.
 * Modifying the schema
 
 One of the first things we want to do is to add a field for the data
-license. Initially we only support CC-4.0 as a license by default, but
-now we want to give uploaders the option to make it an even more
+license. Initially we only supported CC-4.0 as a license, but
+we wanted to give uploaders the option to use an even more
 liberal CC0 license. The first step is to find a good ontology term
 for the field. Searching for `creative commons cc0 rdf' rendered this
 useful [[https://creativecommons.org/ns][page]].  We also find an [[https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview][overview]] where CC0 is represented as URI
@@ -127,4 +128,20 @@ loaded in [[https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e
 
 With this [[https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3][patch]] the website adds the license input fields on the form.
 
-/Note: work in progress/
+Finally, to make RDF output work we need to add expressions to bh20seq-shex.rdf. This
+was done with this [[https://github.com/arvados/bh20-seq-resource/commit/f4ed46dae20abe5147871495ede2d6ac2b0854bc][patch]]. In the end we decided to use the Dublin core title,
+http://purl.org/metadata/dublin_core_elements#Title:
+
+#+BEGIN_SRC js
+:licenseShape{
+    cc:License xsd:string;
+    dc:Title xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
+#+END_SRC
+
+Note that cc:AttributionSource is not really defined in the cc standard.
+
+* TODO Testing the license fields
-- 
cgit 1.4.1
From 01b192417f858d4389226b5130a430bd3b6d4416 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 16 Jul 2020 09:25:58 +0100
Subject: Make license optional for now
---
 bh20sequploader/bh20seq-schema.yml | 2 +-
 bh20sequploader/bh20seq-shex.rdf   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ef55c55..ee852fa 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
   fields:
     license_type:
       doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
-      type: string
+      type: string?
       jsonldPredicate:
           _id: https://creativecommons.org/ns#License
     title:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 9fab334..7331e86 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -71,7 +71,7 @@ PREFIX wikidata: 
 }
 
 :licenseShape{
-    cc:License xsd:string;
+    cc:License xsd:string ?;
     dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
-- 
cgit 1.4.1
From a10569f51072569604b2384e6e4d583b36de73c4 Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 12:01:38 -0400
Subject: Make license optional for now
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz 
---
 bh20sequploader/bh20seq-schema.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ee852fa..0aead3b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
   fields:
     license_type:
       doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
-      type: string?
+      type: string
       jsonldPredicate:
           _id: https://creativecommons.org/ns#License
     title:
@@ -264,7 +264,7 @@ $graph:
     virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
-    license: licenseSchema
+    license: ["null", licenseSchema]
     id:
       doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string
-- 
cgit 1.4.1
From 0e84b18cb134855d572d1f94d5d3c43571afe7e9 Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 12:04:26 -0400
Subject: Make license optional
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz 
---
 bh20sequploader/bh20seq-shex.rdf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 7331e86..bbc7309 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -17,7 +17,7 @@ PREFIX wikidata: 
   MainSchema:submitter @:submitterShape ;
   MainSchema:technology @:technologyShape ;
   MainSchema:virus @:virusShape;
-  MainSchema:license @:licenseShape;
+  MainSchema:license @:licenseShape ?;
 }
 
 :hostShape  {
@@ -71,7 +71,7 @@ PREFIX wikidata: 
 }
 
 :licenseShape{
-    cc:License xsd:string ?;
+    cc:License xsd:string ;
     dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
-- 
cgit 1.4.1
From d34374f0e822edd1539ea5de6f8522f2b761de3f Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 14:48:22 -0400
Subject: Improve uploader reporting.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz 
---
 bh20sequploader/main.py                      | 22 ++++++++++------------
 bh20simplewebuploader/main.py                |  4 ++--
 bh20simplewebuploader/templates/error.html   |  2 +-
 bh20simplewebuploader/templates/success.html |  2 +-
 4 files changed, 14 insertions(+), 16 deletions(-)
(limited to 'bh20sequploader')
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index f744a8c..6049bf9 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -29,11 +29,10 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
     try:
         log.debug("Checking metadata" if do_qc else "Skipping metadata check")
         if do_qc and not qc_metadata(metadata.name):
-            log.warning("Failed metadata qc")
+            log.warning("Failed metadata QC")
             failed = True
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed metadata QC")
         failed = True
 
     target = []
@@ -45,8 +44,7 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
             target[0] = ("reads_1."+target[0][0][6:], target[0][1])
             target[1] = ("reads_2."+target[1][0][6:], target[0][1])
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed sequence QC")
         failed = True
 
     if failed:
@@ -82,7 +80,7 @@ def main():
     seqlabel = target[0][1]
 
     if args.validate:
-        print("Valid")
+        log.info("Valid")
         exit(0)
 
     col = arvados.collection.Collection(api_client=api)
@@ -91,10 +89,10 @@ def main():
     if args.sequence_p2:
         upload_sequence(col, target[1], args.sequence_p2)
 
-    print("Reading metadata")
+    log.info("Reading metadata")
     with col.open("metadata.yaml", "w") as f:
         r = args.metadata.read(65536)
-        print(r[0:20])
+        log.info(r[0:20])
         while r:
             f.write(r)
             r = args.metadata.read(65536)
@@ -118,7 +116,7 @@ def main():
                                            ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
     if dup["items"]:
         # This exact collection has been uploaded before.
-        print("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+        log.error("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
         exit(1)
 
     if args.trusted:
@@ -131,9 +129,9 @@ def main():
                  (seqlabel, properties['upload_user'], properties['upload_ip']),
                  properties=properties, ensure_unique_name=True)
 
-    print("Saved to %s" % col.manifest_locator())
-
-    print("Done")
+    log.info("Saved to %s" % col.manifest_locator())
+    log.info("Done")
+    exit(0)
 
 if __name__ == "__main__":
     main()
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 62b68d9..c814f30 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -445,12 +445,12 @@ def receive_files():
 
         if result.returncode != 0:
             # It didn't work. Complain.
-            error_message="Uploader returned value {} and said:".format(result.returncode) + str(result.stderr.decode('utf-8'))
+            error_message="Uploader returned value {} and said:\n".format(result.returncode) + str(result.stderr.decode('utf-8'))
             print(error_message, file=sys.stderr)
             return (render_template('error.html', error_message=error_message), 403)
         else:
             # It worked. Say so.
-            return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace'))
+            return render_template('success.html', log=result.stderr.decode('utf-8', errors='replace'))
     finally:
         shutil.rmtree(dest_dir)
 
diff --git a/bh20simplewebuploader/templates/error.html b/bh20simplewebuploader/templates/error.html
index b1d9402..fc08aed 100644
--- a/bh20simplewebuploader/templates/error.html
+++ b/bh20simplewebuploader/templates/error.html
@@ -15,7 +15,7 @@
           
         
         
-            Click here to try again.
+            Click here to try again.