diff options
-rw-r--r-- | bh20seqanalyzer/main.py | 19 | ||||
-rw-r--r-- | scripts/cleanup.py | 20 |
2 files changed, 31 insertions, 8 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index f2bb234..f18a93a 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -364,17 +364,20 @@ def main(): logging.info("Starting up, monitoring %s for uploads" % (args.uploader_project)) while True: - seqanalyzer.move_fastq_to_fasta_results() + try: + seqanalyzer.move_fastq_to_fasta_results() - new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]]) - at_least_one_new_valid_seq = False - for c in new_collections: - at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq + new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]]) + at_least_one_new_valid_seq = False + for c in new_collections: + at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq - if at_least_one_new_valid_seq and not args.no_start_analysis: - seqanalyzer.start_pangenome_analysis() + if at_least_one_new_valid_seq and not args.no_start_analysis: + seqanalyzer.start_pangenome_analysis() - seqanalyzer.copy_most_recent_result() + seqanalyzer.copy_most_recent_result() + except Exception as e: + logging.exeception("Error in main loop") if args.once: break diff --git a/scripts/cleanup.py b/scripts/cleanup.py new file mode 100644 index 0000000..f4bd0b4 --- /dev/null +++ b/scripts/cleanup.py @@ -0,0 +1,20 @@ +import arvados +import arvados.util + +api = arvados.api() + +patterns = [ + "%missing%`collection_location`%", + "%missing%`technology`%", + "%missing%`host_species`%", + "%QC fail: alignment%", + "%does not look like a valid URI%", + ] + +for p in patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + api.collections().delete(uuid=i["uuid"]).execute() |