aboutsummaryrefslogtreecommitdiff
path: root/scripts/cleanup.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/cleanup.py')
-rw-r--r--scripts/cleanup.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..6a82659
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,38 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+delete_patterns = [
+ "%missing%`collection_location`%",
+ "%missing%`technology`%",
+ "%missing%`host_species`%",
+ "%QC fail: alignment%",
+ "%does not look like a valid URI%",
+ "%Duplicate of%"
+ ]
+
+revalidate_patterns = [
+ "%missing%`license`%"
+]
+
+for p in delete_patterns:
+ c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["properties.errors", "like", p]])
+ for i in c:
+ print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+ api.collections().delete(uuid=i["uuid"]).execute()
+
+for p in revalidate_patterns:
+ c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["properties.errors", "like", p]])
+ for i in c:
+ print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+ pr = i["properties"]
+ if "status" in pr:
+ del pr["status"]
+ if "errors" in pr:
+ del pr["errors"]
+ api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()