aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader/qc_fasta.py
diff options
context:
space:
mode:
authorlltommy2020-11-11 09:56:12 +0100
committerlltommy2020-11-11 09:56:12 +0100
commitd6aa323b6fc7a82e45cc1df51fc72c2d547146eb (patch)
tree6e8b77bde4dc34fab3fa8804906f3cb821f61dae /bh20sequploader/qc_fasta.py
parentc5fe5de7e4c77bfb48b1ae2f662c2d9cc120c06e (diff)
parentc872248e43c1c66e5fed8ef341f7b4ac21d63e6f (diff)
downloadbh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.tar.gz
bh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.tar.lz
bh20-seq-resource-d6aa323b6fc7a82e45cc1df51fc72c2d547146eb.zip
Merge branch 'master' of https://github.com/arvados/bh20-seq-resource
Diffstat (limited to 'bh20sequploader/qc_fasta.py')
-rw-r--r--bh20sequploader/qc_fasta.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index f567f0a..814fb3e 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -66,7 +66,8 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
similarity = 0
try:
- cmd = ["minimap2", "-c -x asm20", tmp1.name, tmp2.name]
+ log.debug("Trying to run minimap2")
+ cmd = ["minimap2", "-c", "-x", "asm20", tmp1.name, tmp2.name]
logging.info("QC checking similarity to reference")
logging.info(" ".join(cmd))
result = subprocess.run(cmd, stdout=subprocess.PIPE)
@@ -83,9 +84,7 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
if similarity < 70.0:
raise ValueError(
- "QC fail for {}: alignment to reference was less than 70%% (was %2.2f%%)".format(
- seqlabel, similarity
- ))
+ f"QC fail for {seqlabel}: alignment to reference was less than 70% (was {similarity})")
return "sequence.fasta" + gz, seqlabel, seq_type
elif seq_type == "text/fastq":
@@ -93,4 +92,6 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
sequence.detach()
return "reads.fastq" + gz, seqlabel, seq_type
else:
+ log.debug(seqlabel)
+ log.debug(seq_type)
raise ValueError("Sequence file ({}) does not look like a DNA FASTA or FASTQ".format(arg_sequence))