aboutsummaryrefslogtreecommitdiff
path: root/scripts/cleanup.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/cleanup.py')
-rw-r--r--scripts/cleanup.py23
1 files changed, 21 insertions, 2 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
index 78f34c8..6919305 100644
--- a/scripts/cleanup.py
+++ b/scripts/cleanup.py
@@ -1,7 +1,13 @@
import arvados
import arvados.util
+import arvados.keep
+import ruamel.yaml
api = arvados.api()
+keepclient = arvados.keep.KeepClient(api_client=api)
+
+UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa'
+VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp'
delete_patterns = [
"%missing%`collection_location`%",
@@ -21,7 +27,7 @@ revalidate_patterns = [
for p in delete_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
- ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
@@ -29,7 +35,7 @@ for p in delete_patterns:
for p in revalidate_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
- ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
@@ -39,3 +45,16 @@ for p in revalidate_patterns:
if "errors" in pr:
del pr["errors"]
api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
+
+c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", VALIDATED_PROJECT],
+ ["properties.sequence_label", "exists", False]])
+for i in c:
+ col = arvados.collection.Collection(i["uuid"], api_client=api, keep_client=keepclient)
+ with col.open("metadata.yaml") as md:
+ metadata_content = ruamel.yaml.round_trip_load(md)
+ colprop = col.get_properties()
+ colprop["sequence_label"] = metadata_content["sample"]["sample_id"]
+
+ print("fixing sequence label %s %s" % (i["uuid"], colprop.get("sequence_label")))
+ api.collections().update(uuid=i["uuid"], body={"properties": colprop}).execute()