From 6bfefe984a84fb215d61e045c49a4ab123bb7339 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 16 Jul 2020 12:32:43 -0400 Subject: Catch exceptions Add script to cleanup bad uploads. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20seqanalyzer/main.py | 19 +++++++++++-------- scripts/cleanup.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 scripts/cleanup.py diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index f2bb234..f18a93a 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -364,17 +364,20 @@ def main(): logging.info("Starting up, monitoring %s for uploads" % (args.uploader_project)) while True: - seqanalyzer.move_fastq_to_fasta_results() + try: + seqanalyzer.move_fastq_to_fasta_results() - new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]]) - at_least_one_new_valid_seq = False - for c in new_collections: - at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq + new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]]) + at_least_one_new_valid_seq = False + for c in new_collections: + at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq - if at_least_one_new_valid_seq and not args.no_start_analysis: - seqanalyzer.start_pangenome_analysis() + if at_least_one_new_valid_seq and not args.no_start_analysis: + seqanalyzer.start_pangenome_analysis() - seqanalyzer.copy_most_recent_result() + seqanalyzer.copy_most_recent_result() + except Exception as e: + logging.exeception("Error in main loop") if args.once: break diff --git a/scripts/cleanup.py b/scripts/cleanup.py new file mode 100644 index 0000000..f4bd0b4 --- /dev/null +++ b/scripts/cleanup.py @@ -0,0 +1,20 @@ +import arvados +import arvados.util + +api = arvados.api() + +patterns = [ + "%missing%`collection_location`%", + "%missing%`technology`%", + "%missing%`host_species`%", + "%QC fail: alignment%", + "%does not look like a valid URI%", + ] + +for p in patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + api.collections().delete(uuid=i["uuid"]).execute() -- cgit v1.2.3