From 6bfefe984a84fb215d61e045c49a4ab123bb7339 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 16 Jul 2020 12:32:43 -0400 Subject: Catch exceptions Add script to cleanup bad uploads. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- scripts/cleanup.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 scripts/cleanup.py (limited to 'scripts/cleanup.py') diff --git a/scripts/cleanup.py b/scripts/cleanup.py new file mode 100644 index 0000000..f4bd0b4 --- /dev/null +++ b/scripts/cleanup.py @@ -0,0 +1,20 @@ +import arvados +import arvados.util + +api = arvados.api() + +patterns = [ + "%missing%`collection_location`%", + "%missing%`technology`%", + "%missing%`host_species`%", + "%QC fail: alignment%", + "%does not look like a valid URI%", + ] + +for p in patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + api.collections().delete(uuid=i["uuid"]).execute() -- cgit v1.2.3 From 474d15e17be63046a091615e89ba63adecdb109b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 16 Jul 2020 14:28:02 -0400 Subject: Cleanup script also clears errors for revalidate Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- scripts/cleanup.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'scripts/cleanup.py') diff --git a/scripts/cleanup.py b/scripts/cleanup.py index f4bd0b4..6a82659 100644 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -3,18 +3,36 @@ import arvados.util api = arvados.api() -patterns = [ +delete_patterns = [ "%missing%`collection_location`%", "%missing%`technology`%", "%missing%`host_species`%", "%QC fail: alignment%", "%does not look like a valid URI%", + "%Duplicate of%" ] -for p in patterns: +revalidate_patterns = [ + "%missing%`license`%" +] + +for p in delete_patterns: c = arvados.util.list_all(api.collections().list, filters=[ ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], ["properties.errors", "like", p]]) for i in c: print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) api.collections().delete(uuid=i["uuid"]).execute() + +for p in revalidate_patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + pr = i["properties"] + if "status" in pr: + del pr["status"] + if "errors" in pr: + del pr["errors"] + api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute() -- cgit v1.2.3 From b1750731b654be3322a6793f47d52fafcaaea9ac Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 16 Jul 2020 21:24:05 -0400 Subject: Report similarity == 0 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20sequploader/qc_fasta.py | 4 +--- scripts/cleanup.py | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'scripts/cleanup.py') diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py index 37eb4e8..0c7e16d 100644 --- a/bh20sequploader/qc_fasta.py +++ b/bh20sequploader/qc_fasta.py @@ -84,10 +84,8 @@ def qc_fasta(arg_sequence, check_with_clustalw=True): except Exception as e: logging.warn("QC against reference sequence using 'minimap2': %s", e, exc_info=e) - if similarity and similarity < 70.0: + if similarity < 70.0: raise ValueError("QC fail: alignment to reference was less than 70%% (was %2.2f%%)" % (similarity)) - if similarity == 0: - raise ValueError("QC fail") return ("sequence.fasta"+gz, seqlabel) elif seq_type == "text/fastq": diff --git a/scripts/cleanup.py b/scripts/cleanup.py index 6a82659..78f34c8 100644 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -9,11 +9,14 @@ delete_patterns = [ "%missing%`host_species`%", "%QC fail: alignment%", "%does not look like a valid URI%", - "%Duplicate of%" + "%Duplicate of%", + "%No matching triples found for predicate obo:NCIT_C42781%", + "%does not look like a valid URI%" ] revalidate_patterns = [ - "%missing%`license`%" + "%missing%`license`%", + "%QC fail%" ] for p in delete_patterns: -- cgit v1.2.3