diff options
author | Peter Amstutz | 2020-04-07 15:28:42 -0400 |
---|---|---|
committer | Peter Amstutz | 2020-04-07 15:28:42 -0400 |
commit | 07bc4c65535437b8e9e0744f08da8cea541d0116 (patch) | |
tree | 201cd544a87aebf0ba6978c42d61df1e90d7d836 /bh20seqanalyzer | |
parent | 4215a82af730ff05b8fe98e226b759413cdf95f7 (diff) | |
download | bh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.tar.gz bh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.tar.lz bh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.zip |
Add metadata validation with schema-salad
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r-- | bh20seqanalyzer/main.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 2513ea3..78e32c9 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -6,12 +6,14 @@ import subprocess import tempfile import json import logging +import ruamel.yaml +from bh20sequploader.qc_metadata import qc_metadata logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN) -def validate_upload(api, collection, validated_project, latest_result_uuid): +def validate_upload(api, collection, validated_project): col = arvados.collection.Collection(collection["uuid"]) # validate the collection here. Check metadata, etc. @@ -20,9 +22,12 @@ def validate_upload(api, collection, validated_project, latest_result_uuid): if "sequence.fasta" not in col: valid = False logging.warn("Upload '%s' missing sequence.fasta", collection["name"]) - if "metadata.jsonld" not in col: - logging.warn("Upload '%s' missing metadata.jsonld", collection["name"]) + if "metadata.yaml" not in col: + logging.warn("Upload '%s' missing metadata.yaml", collection["name"]) valid = False + else: + metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml")) + valid = qc_metadata(metadata_content) and valid dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], ["portable_data_hash", "=", col.portable_data_hash()]]).execute() |