aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate
diff options
context:
space:
mode:
authorAndreaGuarracino2020-08-28 17:36:30 +0200
committerAndreaGuarracino2020-08-28 17:36:30 +0200
commitef4e55ce81448088b1e730a12d6772ebc01ce8af (patch)
treeebb66030bf502da1009914367a43a072f1821724 /workflows/pangenome-generate
parent8f1819dc9b7ace9727d396d6b89f79b3b41165f8 (diff)
downloadbh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.tar.gz
bh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.tar.lz
bh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.zip
added script to remove entries on Arvados
Diffstat (limited to 'workflows/pangenome-generate')
-rw-r--r--workflows/pangenome-generate/delete_entries_on_arvados.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/delete_entries_on_arvados.py b/workflows/pangenome-generate/delete_entries_on_arvados.py
new file mode 100644
index 0000000..d281456
--- /dev/null
+++ b/workflows/pangenome-generate/delete_entries_on_arvados.py
@@ -0,0 +1,34 @@
+import sys
+import arvados
+import arvados.collection
+
+from datetime import datetime
+
+date_time_str = '2020-08-20'
+date_time_obj = datetime.strptime(date_time_str, '%Y-%m-%d')
+
+api = arvados.api()
+keepclient = arvados.keep.KeepClient(api_client=api)
+
+validated = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", sys.argv[1]],
+# ["properties.status", "=", "validated"]
+])
+
+# validated.sort(key=lambda v: v["portable_data_hash"])
+
+num_sample_deleted = 0
+for item in validated:
+ sequence_label = item['properties']["sequence_label"]
+
+ # The SRA samples start with SRR or ERR
+ if not sequence_label.startswith('SRR') and not sequence_label.startswith('ERR'):
+ created_at_obj = datetime.strptime(item["created_at"], '%Y-%m-%dT%H:%M:%S.%fZ')
+ # print(item, created_at_obj)
+
+ if created_at_obj < date_time_obj:
+ api.collections().delete(uuid=item['current_version_uuid']).execute()
+ num_sample_deleted += 1
+ print(sequence_label)
+
+print('num_sample_deleted: {}'.format(num_sample_deleted))