From ffad8c21f08aca1cf65809c398f3613846e7c8ba Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 10 Apr 2020 16:47:31 -0400 Subject: Propagating metadata to output works now. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20seqanalyzer/main.py | 4 ++-- bh20sequploader/bh20seq-schema.yml | 5 ----- bh20sequploader/qc_metadata.py | 26 +------------------------- example/metadata.yaml | 2 -- 4 files changed, 3 insertions(+), 34 deletions(-) diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 63ff067..193a268 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -29,7 +29,7 @@ def validate_upload(api, collection, validated_project, else: try: metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml")) - metadata_content["id"] = "keep:%s/metadata.yaml" % collection["portable_data_hash"] + metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"] add_lc_filename(metadata_content, metadata_content["id"]) valid = qc_metadata(metadata_content) and valid except Exception as e: @@ -146,7 +146,7 @@ def start_pangenome_analysis(api, "class": "File", "location": "keep:%s/metadata.yaml" % v["portable_data_hash"] }) - inputobj["subjects"].append("keep:%s/sequence.fasta" % v["portable_data_hash"]) + inputobj["subjects"].append("http://arvados.org/keep:%s/sequence.fasta" % v["portable_data_hash"]) run_workflow(api, analysis_project, pangenome_workflow_uuid, "Pangenome analysis", inputobj) diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index a072bd7..8a22db1 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -169,11 +169,6 @@ $graph: virus: virusSchema? technology: technologySchema submitter: submitterSchema - submission: - type: string - jsonldPredicate: - _id: "@id" - #_type: "@id" id: doc: The subject (eg the fasta/fastq file) that the metadata describes type: string? diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py index 38edcaa..e477f21 100644 --- a/bh20sequploader/qc_metadata.py +++ b/bh20sequploader/qc_metadata.py @@ -5,21 +5,10 @@ import pkg_resources import logging import traceback -class CustomFetcher(schema_salad.ref_resolver.DefaultFetcher): - def check_exists(sup, url): - if url.startswith("keep:"): - return True - else: - return super().check_exists(url) - - def supported_schemes(self): # type: () -> List[str] - return ["file", "http", "https", "mailto", "keep"] - - def qc_metadata(metadatafile): schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml") cache = {"https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml": schema_resource.read().decode("utf-8")} - (loader, + (document_loader, avsc_names, schema_metadata, metaschema_loader) = schema_salad.schema.load_schema("https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml", cache=cache) @@ -28,19 +17,6 @@ def qc_metadata(metadatafile): print(avsc_names) return False - document_loader = schema_salad.ref_resolver.Loader( - loader.ctx, - schemagraph=loader.graph, - foreign_properties=loader.foreign_properties, - idx=loader.idx, - cache=loader.cache, - fetcher_constructor=CustomFetcher, - skip_schemas=loader.skip_schemas, - url_fields=loader.url_fields, - allow_attachments=loader.allow_attachments, - session=loader.session, - ) - try: doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True) return True diff --git a/example/metadata.yaml b/example/metadata.yaml index a2f6e57..c780921 100644 --- a/example/metadata.yaml +++ b/example/metadata.yaml @@ -1,5 +1,3 @@ -submission: publicSequenceResource - host: host_id: XX1 host_species: string -- cgit v1.2.3