aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-06-22 16:32:54 +0000
committerPeter Amstutz2020-06-22 16:32:54 +0000
commit4001368ab29c988e94dddd29767c4b64a5bd2a5b (patch)
tree5642bf6a5cd02bc76b0aa98ae086b010f5131cf0 /bh20sequploader
parent7daa9ff2cdba742a811db00c924ccde25fa2c9b6 (diff)
downloadbh20-seq-resource-4001368ab29c988e94dddd29767c4b64a5bd2a5b.tar.gz
bh20-seq-resource-4001368ab29c988e94dddd29767c4b64a5bd2a5b.tar.lz
bh20-seq-resource-4001368ab29c988e94dddd29767c4b64a5bd2a5b.zip
Better invalid sequence QC handling
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/qc_fasta.py11
1 files changed, 8 insertions, 3 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 607c8c0..e198430 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -58,6 +58,9 @@ def qc_fasta(arg_sequence):
tmp1.write(submitlabel.encode("utf8"))
tmp1.write(("".join(submitseq)).encode("utf8"))
tmp1.flush()
+ subbp = 0
+ refbp = 0
+ similarity = 0
try:
cmd = ["clustalw", "-infile="+tmp1.name,
"-quicktree", "-iteration=none", "-type=DNA"]
@@ -78,12 +81,14 @@ def qc_fasta(arg_sequence):
except Exception as e:
logging.warn("Error trying to QC against reference sequence using 'clustalw': %s", e)
- if (subbp/refbp) < .7:
+ if refbp and (subbp/refbp) < .7:
raise ValueError("QC fail: submit sequence length is shorter than 70% reference")
- if (subbp/refbp) > 1.3:
+ if refbp and (subbp/refbp) > 1.3:
raise ValueError("QC fail: submit sequence length is greater than 130% reference")
- if similarity < 70.0:
+ if similarity and similarity < 70.0:
raise ValueError("QC fail: submit similarity is less than 70%")
+ if refbp == 0 or similarity == 0:
+ raise ValueError("QC fail")
return ("sequence.fasta"+gz, seqlabel)
elif seq_type == "text/fastq":