From 6bfefe984a84fb215d61e045c49a4ab123bb7339 Mon Sep 17 00:00:00 2001
From: Peter Amstutz
Date: Thu, 16 Jul 2020 12:32:43 -0400
Subject: Catch exceptions

Add script to cleanup bad uploads.

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
---
 bh20seqanalyzer/main.py | 19 +++++++++++--------
 scripts/cleanup.py      | 20 ++++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 scripts/cleanup.py

diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index f2bb234..f18a93a 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -364,17 +364,20 @@ def main():
     logging.info("Starting up, monitoring %s for uploads" % (args.uploader_project))
 
     while True:
-        seqanalyzer.move_fastq_to_fasta_results()
+        try:
+            seqanalyzer.move_fastq_to_fasta_results()
 
-        new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]])
-        at_least_one_new_valid_seq = False
-        for c in new_collections:
-            at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq
+            new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]])
+            at_least_one_new_valid_seq = False
+            for c in new_collections:
+                at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq
 
-        if at_least_one_new_valid_seq and not args.no_start_analysis:
-            seqanalyzer.start_pangenome_analysis()
+            if at_least_one_new_valid_seq and not args.no_start_analysis:
+                seqanalyzer.start_pangenome_analysis()
 
-        seqanalyzer.copy_most_recent_result()
+            seqanalyzer.copy_most_recent_result()
+        except Exception as e:
+            logging.exeception("Error in main loop")
 
         if args.once:
             break
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..f4bd0b4
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,20 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+patterns = [
+    "%missing%`collection_location`%",
+    "%missing%`technology`%",
+    "%missing%`host_species`%",
+    "%QC fail: alignment%",
+    "%does not look like a valid URI%",
+    ]
+
+for p in patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        api.collections().delete(uuid=i["uuid"]).execute()
-- 
cgit v1.2.3