aboutsummaryrefslogtreecommitdiff
path: root/bh20seqanalyzer/main.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-07 15:28:42 -0400
committerPeter Amstutz2020-04-07 15:28:42 -0400
commit07bc4c65535437b8e9e0744f08da8cea541d0116 (patch)
tree201cd544a87aebf0ba6978c42d61df1e90d7d836 /bh20seqanalyzer/main.py
parent4215a82af730ff05b8fe98e226b759413cdf95f7 (diff)
downloadbh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.tar.gz
bh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.tar.lz
bh20-seq-resource-07bc4c65535437b8e9e0744f08da8cea541d0116.zip
Add metadata validation with schema-salad
Diffstat (limited to 'bh20seqanalyzer/main.py')
-rw-r--r--bh20seqanalyzer/main.py11
1 files changed, 8 insertions, 3 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 2513ea3..78e32c9 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -6,12 +6,14 @@ import subprocess
import tempfile
import json
import logging
+import ruamel.yaml
+from bh20sequploader.qc_metadata import qc_metadata
logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN)
-def validate_upload(api, collection, validated_project, latest_result_uuid):
+def validate_upload(api, collection, validated_project):
col = arvados.collection.Collection(collection["uuid"])
# validate the collection here. Check metadata, etc.
@@ -20,9 +22,12 @@ def validate_upload(api, collection, validated_project, latest_result_uuid):
if "sequence.fasta" not in col:
valid = False
logging.warn("Upload '%s' missing sequence.fasta", collection["name"])
- if "metadata.jsonld" not in col:
- logging.warn("Upload '%s' missing metadata.jsonld", collection["name"])
+ if "metadata.yaml" not in col:
+ logging.warn("Upload '%s' missing metadata.yaml", collection["name"])
valid = False
+ else:
+ metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml"))
+ valid = qc_metadata(metadata_content) and valid
dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
["portable_data_hash", "=", col.portable_data_hash()]]).execute()