aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/bh20seq-schema.yml2
-rw-r--r--bh20sequploader/bh20seq-shex.rdf6
-rw-r--r--bh20sequploader/main.py10
-rw-r--r--bh20sequploader/qc_metadata.py5
4 files changed, 17 insertions, 6 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 8a16bd3..5b3812b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -149,7 +149,7 @@ $graph:
fields:
sample_sequencing_technology:
doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
- type: string[]?
+ type: string[]
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0600047
_type: "@id"
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index a017805..6b196bd 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -32,8 +32,8 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
:sampleShape {
sio:SIO_000115 xsd:string;
- evs:C25164 xsd:string;
- obo:GAZ_00000448 [wikidata:~] ;
+ evs:C25164 xsd:string;
+ obo:GAZ_00000448 [wikidata:~] ;
obo:OBI_0001895 xsd:string ?;
obo:NCIT_C41206 xsd:string ?;
obo:OBI_0001479 IRI {0,2};
@@ -65,4 +65,4 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
:virusShape{
edam:data_1875 [ obo:NCBITaxon_~ ] ;
sio:SIO_010055 xsd:string ?;
-} \ No newline at end of file
+}
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 0d463f7..2cd15c2 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -3,6 +3,7 @@ import time
import arvados
import arvados.collection
import json
+import logging
import magic
from pathlib import Path
import urllib.request
@@ -13,6 +14,10 @@ sys.path.insert(0,'.')
from bh20sequploader.qc_metadata import qc_metadata
from bh20sequploader.qc_fasta import qc_fasta
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger(__name__ )
+log.debug("Entering sequence uploader")
+
ARVADOS_API_HOST='lugli.arvadosapi.com'
ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa'
@@ -26,13 +31,16 @@ def main():
api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
+ log.debug("Checking metadata")
if not qc_metadata(args.metadata.name):
- print("Failed metadata qc")
+ log.warning("Failed metadata qc")
exit(1)
try:
+ log.debug("Checking FASTA QC")
target = qc_fasta(args.sequence)
except ValueError as e:
+ log.warning("Failed FASTA qc")
print(e)
exit(1)
diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py
index fbfd286..9122ace 100644
--- a/bh20sequploader/qc_metadata.py
+++ b/bh20sequploader/qc_metadata.py
@@ -8,7 +8,9 @@ import traceback
from rdflib import Graph, Namespace
from pyshex.evaluate import evaluate
+
def qc_metadata(metadatafile):
+ log = logging.getLogger(__name__ )
schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml")
cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")}
(document_loader,
@@ -28,10 +30,11 @@ def qc_metadata(metadatafile):
rslt, reason = evaluate(g, shex, doc["id"], "https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-shex.rdf#submissionShape")
if not rslt:
+ log.debug(reason)
print(reason)
return rslt
except Exception as e:
traceback.print_exc()
- logging.warn(e)
+ log.warn(e)
return False