about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/bh20seq-schema.yml6
-rw-r--r--bh20sequploader/main.py2
-rw-r--r--bh20sequploader/qc_metadata.py29
3 files changed, 32 insertions, 5 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index cf9b015..a072bd7 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -174,9 +174,9 @@ $graph:
       jsonldPredicate:
         _id: "@id"
         #_type: "@id"
-    sequencefile:
-      doc: The subject (eg the fasta/fastq file) that this metadata describes
+    id:
+      doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string?
       jsonldPredicate:
         _id: "@id"
-        _type: "@id"
\ No newline at end of file
+        _type: "@id"
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index bfb8c51..2032508 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -22,7 +22,7 @@ def main():
 
     api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
 
-    if not qc_metadata(args.metadata.name):
+    if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name):
         print("Failed metadata qc")
         exit(1)
 
diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py
index ebe4dfc..38edcaa 100644
--- a/bh20sequploader/qc_metadata.py
+++ b/bh20sequploader/qc_metadata.py
@@ -1,12 +1,25 @@
 import schema_salad.schema
+import schema_salad.ref_resolver
 import logging
 import pkg_resources
 import logging
+import traceback
+
+class CustomFetcher(schema_salad.ref_resolver.DefaultFetcher):
+    def check_exists(sup, url):
+        if url.startswith("keep:"):
+            return True
+        else:
+            return super().check_exists(url)
+
+    def supported_schemes(self):  # type: () -> List[str]
+        return ["file", "http", "https", "mailto", "keep"]
+
 
 def qc_metadata(metadatafile):
     schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml")
     cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")}
-    (document_loader,
+    (loader,
      avsc_names,
      schema_metadata,
      metaschema_loader) = schema_salad.schema.load_schema("https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", cache=cache)
@@ -15,9 +28,23 @@ def qc_metadata(metadatafile):
         print(avsc_names)
         return False
 
+    document_loader = schema_salad.ref_resolver.Loader(
+        loader.ctx,
+        schemagraph=loader.graph,
+        foreign_properties=loader.foreign_properties,
+        idx=loader.idx,
+        cache=loader.cache,
+        fetcher_constructor=CustomFetcher,
+        skip_schemas=loader.skip_schemas,
+        url_fields=loader.url_fields,
+        allow_attachments=loader.allow_attachments,
+        session=loader.session,
+        )
+
     try:
         doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True)
         return True
     except Exception as e:
+        traceback.print_exc()
         logging.warn(e)
     return False