about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--bh20seqanalyzer/main.py36
-rw-r--r--bh20sequploader/qc_fasta.py11
2 files changed, 27 insertions, 20 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 794ce27..9164190 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -39,22 +39,25 @@ def validate_upload(api, collection, validated_project,
             logging.warn("Failed metadata qc")
 
     if valid:
-        tgt = None
-        paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
-        for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
-            if n not in col:
-                continue
-            with col.open(n, 'rb') as qf:
-                tgt = qc_fasta(qf)[0]
-                if tgt != n and tgt != paired.get(n):
-                    logging.info("Expected %s but magic says it should be %s", n, tgt)
-                    valid = False
-                elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
-                    start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n)
-                    return False
-        if tgt is None:
+        try:
+            tgt = None
+            paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
+            for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+                if n not in col:
+                    continue
+                with col.open(n, 'rb') as qf:
+                    tgt = qc_fasta(qf)[0]
+                    if tgt != n and tgt != paired.get(n):
+                        logging.info("Expected %s but magic says it should be %s", n, tgt)
+                        valid = False
+                    elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+                        start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n)
+                        return False
+            if tgt is None:
+                valid = False
+                logging.warn("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
+        except ValueError as v:
             valid = False
-            logging.warn("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
 
     dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
                                           ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
@@ -70,9 +73,8 @@ def validate_upload(api, collection, validated_project,
             "owner_uuid": validated_project,
             "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute()
     else:
-        pass
         # It is invalid, delete it.
-        #logging.warn("Deleting '%s'" % collection["name"])
+        logging.warn("Suggest deleting '%s'" % collection["name"])
         #api.collections().delete(uuid=collection["uuid"]).execute()
 
     return valid
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 607c8c0..e198430 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -58,6 +58,9 @@ def qc_fasta(arg_sequence):
             tmp1.write(submitlabel.encode("utf8"))
             tmp1.write(("".join(submitseq)).encode("utf8"))
             tmp1.flush()
+            subbp = 0
+            refbp = 0
+            similarity = 0
             try:
                 cmd = ["clustalw", "-infile="+tmp1.name,
                        "-quicktree", "-iteration=none", "-type=DNA"]
@@ -78,12 +81,14 @@ def qc_fasta(arg_sequence):
             except Exception as e:
                 logging.warn("Error trying to QC against reference sequence using 'clustalw': %s", e)
 
-            if (subbp/refbp) < .7:
+            if refbp and (subbp/refbp) < .7:
                 raise ValueError("QC fail: submit sequence length is shorter than 70% reference")
-            if (subbp/refbp) > 1.3:
+            if refbp and (subbp/refbp) > 1.3:
                 raise ValueError("QC fail: submit sequence length is greater than 130% reference")
-            if similarity < 70.0:
+            if similarity and similarity < 70.0:
                 raise ValueError("QC fail: submit similarity is less than 70%")
+            if refbp == 0 or similarity == 0:
+                raise ValueError("QC fail")
 
         return ("sequence.fasta"+gz, seqlabel)
     elif seq_type == "text/fastq":