From 2bcf4c45eaba42b36cff93c9cd5e461baaf3eae1 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 28 May 2020 10:44:42 -0500
Subject: Web: debugging metadata submission

---
 bh20sequploader/bh20seq-schema.yml |  2 +-
 bh20sequploader/bh20seq-shex.rdf   |  6 +++---
 bh20sequploader/main.py            | 10 +++++++++-
 bh20sequploader/qc_metadata.py     |  5 ++++-
 4 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'bh20sequploader')

diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 8a16bd3..5b3812b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -149,7 +149,7 @@ $graph:
   fields:
     sample_sequencing_technology:
       doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string[]?
+      type: string[]
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/OBI_0600047
         _type: "@id"
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index a017805..6b196bd 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -32,8 +32,8 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 
 :sampleShape  {
     sio:SIO_000115 xsd:string;
-	evs:C25164 xsd:string;
-	obo:GAZ_00000448 [wikidata:~] ;
+	  evs:C25164 xsd:string;
+	  obo:GAZ_00000448 [wikidata:~] ;
     obo:OBI_0001895 xsd:string ?;
     obo:NCIT_C41206 xsd:string ?;
     obo:OBI_0001479 IRI {0,2};
@@ -65,4 +65,4 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 :virusShape{
 	edam:data_1875 [ obo:NCBITaxon_~ ] ;
   	sio:SIO_010055 xsd:string ?;
-}
\ No newline at end of file
+}
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 0d463f7..2cd15c2 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -3,6 +3,7 @@ import time
 import arvados
 import arvados.collection
 import json
+import logging
 import magic
 from pathlib import Path
 import urllib.request
@@ -13,6 +14,10 @@ sys.path.insert(0,'.')
 from bh20sequploader.qc_metadata import qc_metadata
 from bh20sequploader.qc_fasta import qc_fasta
 
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger(__name__ )
+log.debug("Entering sequence uploader")
+
 ARVADOS_API_HOST='lugli.arvadosapi.com'
 ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
 UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa'
@@ -26,13 +31,16 @@ def main():
 
     api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
 
+    log.debug("Checking metadata")
     if not qc_metadata(args.metadata.name):
-        print("Failed metadata qc")
+        log.warning("Failed metadata qc")
         exit(1)
 
     try:
+        log.debug("Checking FASTA QC")
         target = qc_fasta(args.sequence)
     except ValueError as e:
+        log.warning("Failed FASTA qc")
         print(e)
         exit(1)
 
diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py
index fbfd286..9122ace 100644
--- a/bh20sequploader/qc_metadata.py
+++ b/bh20sequploader/qc_metadata.py
@@ -8,7 +8,9 @@ import traceback
 from rdflib import Graph, Namespace
 from pyshex.evaluate import evaluate
 
+
 def qc_metadata(metadatafile):
+    log = logging.getLogger(__name__ )
     schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml")
     cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")}
     (document_loader,
@@ -28,10 +30,11 @@ def qc_metadata(metadatafile):
         rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")
 
         if not rslt:
+            log.debug(reason)
             print(reason)
 
         return rslt
     except Exception as e:
         traceback.print_exc()
-        logging.warn(e)
+        log.warn(e)
     return False
-- 
cgit v1.2.3