diff options
author | AndreaGuarracino | 2020-08-28 17:36:30 +0200 |
---|---|---|
committer | AndreaGuarracino | 2020-08-28 17:36:30 +0200 |
commit | ef4e55ce81448088b1e730a12d6772ebc01ce8af (patch) | |
tree | ebb66030bf502da1009914367a43a072f1821724 | |
parent | 8f1819dc9b7ace9727d396d6b89f79b3b41165f8 (diff) | |
download | bh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.tar.gz bh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.tar.lz bh20-seq-resource-ef4e55ce81448088b1e730a12d6772ebc01ce8af.zip |
added script to remove entries on Arvados
-rw-r--r-- | workflows/pangenome-generate/delete_entries_on_arvados.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/workflows/pangenome-generate/delete_entries_on_arvados.py b/workflows/pangenome-generate/delete_entries_on_arvados.py new file mode 100644 index 0000000..d281456 --- /dev/null +++ b/workflows/pangenome-generate/delete_entries_on_arvados.py @@ -0,0 +1,34 @@ +import sys +import arvados +import arvados.collection + +from datetime import datetime + +date_time_str = '2020-08-20' +date_time_obj = datetime.strptime(date_time_str, '%Y-%m-%d') + +api = arvados.api() +keepclient = arvados.keep.KeepClient(api_client=api) + +validated = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", sys.argv[1]], +# ["properties.status", "=", "validated"] +]) + +# validated.sort(key=lambda v: v["portable_data_hash"]) + +num_sample_deleted = 0 +for item in validated: + sequence_label = item['properties']["sequence_label"] + + # The SRA samples start with SRR or ERR + if not sequence_label.startswith('SRR') and not sequence_label.startswith('ERR'): + created_at_obj = datetime.strptime(item["created_at"], '%Y-%m-%dT%H:%M:%S.%fZ') + # print(item, created_at_obj) + + if created_at_obj < date_time_obj: + api.collections().delete(uuid=item['current_version_uuid']).execute() + num_sample_deleted += 1 + print(sequence_label) + +print('num_sample_deleted: {}'.format(num_sample_deleted)) |