diff options
author | Pjotr Prins | 2020-11-06 11:19:28 +0000 |
---|---|---|
committer | Pjotr Prins | 2020-11-06 11:19:28 +0000 |
commit | 5fdfece97fb2d50a10eab5004a6467ec0097ece8 (patch) | |
tree | 022eff03421416c082cd09e7c4d391b9527501e9 /bh20sequploader | |
parent | 951ebe949d88cdbfed028e0a2a420ce7921c3919 (diff) | |
download | bh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.tar.gz bh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.tar.lz bh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.zip |
Uploader script improvements
Diffstat (limited to 'bh20sequploader')
-rw-r--r-- | bh20sequploader/main.py | 5 | ||||
-rw-r--r-- | bh20sequploader/qc_fasta.py | 9 |
2 files changed, 8 insertions, 6 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index f89b458..ea0fa70 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -49,7 +49,7 @@ sequence for enough overlap with the reference genome failed = True except Exception as e: log.exception("Failed metadata QC") - failed = True + failed = True # continue with the FASTA checker target = [] try: @@ -64,13 +64,14 @@ sequence for enough overlap with the reference genome target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2]) if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]: - raise ValueError("The sample_id field in the metadata must be the same as the FASTA header") + raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})") except Exception as e: log.exception("Failed sequence QC") failed = True if failed: + log.debug("Bailing out!") exit(1) return target diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py index f567f0a..814fb3e 100644 --- a/bh20sequploader/qc_fasta.py +++ b/bh20sequploader/qc_fasta.py @@ -66,7 +66,8 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): similarity = 0 try: - cmd = ["minimap2", "-c -x asm20", tmp1.name, tmp2.name] + log.debug("Trying to run minimap2") + cmd = ["minimap2", "-c", "-x", "asm20", tmp1.name, tmp2.name] logging.info("QC checking similarity to reference") logging.info(" ".join(cmd)) result = subprocess.run(cmd, stdout=subprocess.PIPE) @@ -83,9 +84,7 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): if similarity < 70.0: raise ValueError( - "QC fail for {}: alignment to reference was less than 70%% (was %2.2f%%)".format( - seqlabel, similarity - )) + f"QC fail for {seqlabel}: alignment to reference was less than 70% (was {similarity})") return "sequence.fasta" + gz, seqlabel, seq_type elif seq_type == "text/fastq": @@ -93,4 +92,6 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): sequence.detach() return "reads.fastq" + gz, seqlabel, seq_type else: + log.debug(seqlabel) + log.debug(seq_type) raise ValueError("Sequence file ({}) does not look like a DNA FASTA or FASTQ".format(arg_sequence)) |