aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-07-16 21:24:05 -0400
committerPeter Amstutz2020-07-16 21:24:05 -0400
commitb1750731b654be3322a6793f47d52fafcaaea9ac (patch)
tree4f49aa6539b45c9c72db8e015c2413afa61a8ae8
parenteb3bdab1109959deb5b11fd74310832bdaa50899 (diff)
downloadbh20-seq-resource-b1750731b654be3322a6793f47d52fafcaaea9ac.tar.gz
bh20-seq-resource-b1750731b654be3322a6793f47d52fafcaaea9ac.tar.lz
bh20-seq-resource-b1750731b654be3322a6793f47d52fafcaaea9ac.zip
Report similarity == 0
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
-rw-r--r--bh20sequploader/qc_fasta.py4
-rw-r--r--scripts/cleanup.py7
2 files changed, 6 insertions, 5 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 37eb4e8..0c7e16d 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -84,10 +84,8 @@ def qc_fasta(arg_sequence, check_with_clustalw=True):
except Exception as e:
logging.warn("QC against reference sequence using 'minimap2': %s", e, exc_info=e)
- if similarity and similarity < 70.0:
+ if similarity < 70.0:
raise ValueError("QC fail: alignment to reference was less than 70%% (was %2.2f%%)" % (similarity))
- if similarity == 0:
- raise ValueError("QC fail")
return ("sequence.fasta"+gz, seqlabel)
elif seq_type == "text/fastq":
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
index 6a82659..78f34c8 100644
--- a/scripts/cleanup.py
+++ b/scripts/cleanup.py
@@ -9,11 +9,14 @@ delete_patterns = [
"%missing%`host_species`%",
"%QC fail: alignment%",
"%does not look like a valid URI%",
- "%Duplicate of%"
+ "%Duplicate of%",
+ "%No matching triples found for predicate obo:NCIT_C42781%",
+ "%does not look like a valid URI%"
]
revalidate_patterns = [
- "%missing%`license`%"
+ "%missing%`license`%",
+ "%QC fail%"
]
for p in delete_patterns: