import arvados
import arvados.util
import arvados.keep
import ruamel.yaml
api = arvados.api()
keepclient = arvados.keep.KeepClient(api_client=api)
UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa'
VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp'
delete_patterns = [
"%missing%`collection_location`%",
"%missing%`technology`%",
"%missing%`host_species`%",
"%QC fail: alignment%",
"%does not look like a valid URI%",
"%Duplicate of%",
"%No matching triples found for predicate obo:NCIT_C42781%",
"%does not look like a valid URI%"
]
revalidate_patterns = [
"%missing%`license`%",
"%QC fail%"
]
for p in delete_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
api.collections().delete(uuid=i["uuid"]).execute()
for p in revalidate_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
pr = i["properties"]
if "status" in pr:
del pr["status"]
if "errors" in pr:
del pr["errors"]
api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
c = arvados.util.list_all(api.collections().list, filters=[
["owner_uuid", "=", VALIDATED_PROJECT],
["properties.sequence_label", "exists", False]])
for i in c:
col = arvados.collection.Collection(i["uuid"], api_client=api, keep_client=keepclient)
with col.open("metadata.yaml") as md:
metadata_content = ruamel.yaml.round_trip_load(md)
colprop = col.get_properties()
colprop["sequence_label"] = metadata_content["sample"]["sample_id"]
print("fixing sequence label %s %s" % (i["uuid"], colprop.get("sequence_label")))
api.collections().update(uuid=i["uuid"], body={"properties": colprop}).execute()