From 2bcf4c45eaba42b36cff93c9cd5e461baaf3eae1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 28 May 2020 10:44:42 -0500 Subject: Web: debugging metadata submission --- bh20sequploader/bh20seq-schema.yml | 2 +- bh20sequploader/bh20seq-shex.rdf | 6 +++--- bh20sequploader/main.py | 10 +++++++++- bh20sequploader/qc_metadata.py | 5 ++++- 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'bh20sequploader') diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index 8a16bd3..5b3812b 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -149,7 +149,7 @@ $graph: fields: sample_sequencing_technology: doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION) - type: string[]? + type: string[] jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0600047 _type: "@id" diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf index a017805..6b196bd 100644 --- a/bh20sequploader/bh20seq-shex.rdf +++ b/bh20sequploader/bh20seq-shex.rdf @@ -32,8 +32,8 @@ PREFIX wikidata: :sampleShape { sio:SIO_000115 xsd:string; - evs:C25164 xsd:string; - obo:GAZ_00000448 [wikidata:~] ; + evs:C25164 xsd:string; + obo:GAZ_00000448 [wikidata:~] ; obo:OBI_0001895 xsd:string ?; obo:NCIT_C41206 xsd:string ?; obo:OBI_0001479 IRI {0,2}; @@ -65,4 +65,4 @@ PREFIX wikidata: :virusShape{ edam:data_1875 [ obo:NCBITaxon_~ ] ; sio:SIO_010055 xsd:string ?; -} \ No newline at end of file +} diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 0d463f7..2cd15c2 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -3,6 +3,7 @@ import time import arvados import arvados.collection import json +import logging import magic from pathlib import Path import urllib.request @@ -13,6 +14,10 @@ sys.path.insert(0,'.') from bh20sequploader.qc_metadata import qc_metadata from bh20sequploader.qc_fasta import qc_fasta +logging.basicConfig(level=logging.DEBUG) +log = logging.getLogger(__name__ ) +log.debug("Entering sequence uploader") + ARVADOS_API_HOST='lugli.arvadosapi.com' ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa' @@ -26,13 +31,16 @@ def main(): api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) + log.debug("Checking metadata") if not qc_metadata(args.metadata.name): - print("Failed metadata qc") + log.warning("Failed metadata qc") exit(1) try: + log.debug("Checking FASTA QC") target = qc_fasta(args.sequence) except ValueError as e: + log.warning("Failed FASTA qc") print(e) exit(1) diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py index fbfd286..9122ace 100644 --- a/bh20sequploader/qc_metadata.py +++ b/bh20sequploader/qc_metadata.py @@ -8,7 +8,9 @@ import traceback from rdflib import Graph, Namespace from pyshex.evaluate import evaluate + def qc_metadata(metadatafile): + log = logging.getLogger(__name__ ) schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml") cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")} (document_loader, @@ -28,10 +30,11 @@ def qc_metadata(metadatafile): rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape") if not rslt: + log.debug(reason) print(reason) return rslt except Exception as e: traceback.print_exc() - logging.warn(e) + log.warn(e) return False -- cgit v1.2.3