about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cleanup.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..6a82659
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,38 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+delete_patterns = [
+    "%missing%`collection_location`%",
+    "%missing%`technology`%",
+    "%missing%`host_species`%",
+    "%QC fail: alignment%",
+    "%does not look like a valid URI%",
+    "%Duplicate of%"
+    ]
+
+revalidate_patterns = [
+    "%missing%`license`%"
+]
+
+for p in delete_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        api.collections().delete(uuid=i["uuid"]).execute()
+
+for p in revalidate_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        pr = i["properties"]
+        if "status" in pr:
+            del pr["status"]
+        if "errors" in pr:
+            del pr["errors"]
+        api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()