diff options
author | Peter Amstutz | 2020-07-08 17:14:46 -0400 |
---|---|---|
committer | GitHub | 2020-07-08 17:14:46 -0400 |
commit | 6e0f9f18167377bac073d7715b89e7ddbf1fe72d (patch) | |
tree | 1b72a737b50e60346aefaf009ac2488d45c8abe0 /bh20seqanalyzer | |
parent | 6fa25708b46a590be82a6b84266c0a3f25a0d890 (diff) | |
parent | e821857e7a9403739f321feb7418d33d6bd8b2c7 (diff) | |
download | bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.gz bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.lz bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.zip |
Merge pull request #92 from arvados/upload-download-status
Split upload tab. Add upload status tab. Also a bunch of QC and uploader improvements.
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r-- | bh20seqanalyzer/main.py | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 1746587..0b52e6b 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -17,10 +17,11 @@ logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="% logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN) def validate_upload(api, collection, validated_project, - fastq_project, fastq_workflow_uuid): + fastq_project, fastq_workflow_uuid, + revalidate): col = arvados.collection.Collection(collection["uuid"]) - if collection.get("status") in ("validated", "rejected"): + if not revalidate and collection["properties"].get("status") in ("validated", "rejected"): return False # validate the collection here. Check metadata, etc. @@ -28,11 +29,12 @@ def validate_upload(api, collection, validated_project, errors = [] - dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], - ["portable_data_hash", "=", col.portable_data_hash()]]).execute() - if dup["items"]: - # This exact collection has been uploaded before. - errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]])) + if collection["owner_uuid"] != validated_project: + dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], + ["portable_data_hash", "=", col.portable_data_hash()]]).execute() + if dup["items"]: + # This exact collection has been uploaded before. + errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]])) if not errors: if "metadata.yaml" not in col: @@ -70,12 +72,15 @@ def validate_upload(api, collection, validated_project, if not errors: - logging.info("Added '%s' to validated sequences" % collection["name"]) # Move it to the "validated" project to be included in the next analysis + if "errors" in collection["properties"]: + del collection["properties"]["errors"] collection["properties"]["status"] = "validated" api.collections().update(uuid=collection["uuid"], body={ "owner_uuid": validated_project, - "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute() + "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())), + "properties": collection["properties"]}).execute() + logging.info("Added '%s' to validated sequences" % collection["name"]) return True else: # It is invalid @@ -155,7 +160,9 @@ def start_pangenome_analysis(api, validated_project, schema_ref, exclude_list): - validated = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", validated_project]]) + validated = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", validated_project], + ["properties.status", "=", "validated"]]) inputobj = { "inputReads": [], "metadata": [], @@ -187,14 +194,15 @@ def get_workflow_output_from_project(api, uuid): cr = api.container_requests().list(filters=[['owner_uuid', '=', uuid], ["requesting_container_uuid", "=", None]]).execute() if cr["items"] and cr["items"][0]["output_uuid"]: - return cr["items"][0] - else: - return None + container = api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute() + if container["state"] == "Complete" and container["exit_code"] == 0: + return cr["items"][0] + return None def copy_most_recent_result(api, analysis_project, latest_result_uuid): most_recent_analysis = api.groups().list(filters=[['owner_uuid', '=', analysis_project]], - order="created_at desc", limit=1).execute() + order="created_at desc").execute() for m in most_recent_analysis["items"]: wf = get_workflow_output_from_project(api, m["uuid"]) if wf: @@ -220,6 +228,7 @@ def move_fastq_to_fasta_results(api, analysis_project, uploader_project): body={"owner_uuid": uploader_project}).execute() p["properties"]["moved_output"] = True api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute() + break def upload_schema(api, workflow_def_project): @@ -297,6 +306,7 @@ def main(): parser.add_argument('--no-start-analysis', action="store_true") parser.add_argument('--once', action="store_true") parser.add_argument('--print-status', type=str, default=None) + parser.add_argument('--revalidate', action="store_true", default=None) args = parser.parse_args() api = arvados.api() @@ -330,7 +340,8 @@ def main(): at_least_one_new_valid_seq = validate_upload(api, c, args.validated_project, args.fastq_project, - args.fastq_workflow_uuid) or at_least_one_new_valid_seq + args.fastq_workflow_uuid, + args.revalidate) or at_least_one_new_valid_seq if at_least_one_new_valid_seq and not args.no_start_analysis: start_pangenome_analysis(api, |