From 5fdfece97fb2d50a10eab5004a6467ec0097ece8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 6 Nov 2020 11:19:28 +0000 Subject: Uploader script improvements --- bh20sequploader/main.py | 5 +++-- bh20sequploader/qc_fasta.py | 9 +++++---- doc/INSTALL.md | 8 +++++++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index f89b458..ea0fa70 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -49,7 +49,7 @@ sequence for enough overlap with the reference genome failed = True except Exception as e: log.exception("Failed metadata QC") - failed = True + failed = True # continue with the FASTA checker target = [] try: @@ -64,13 +64,14 @@ sequence for enough overlap with the reference genome target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2]) if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]: - raise ValueError("The sample_id field in the metadata must be the same as the FASTA header") + raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})") except Exception as e: log.exception("Failed sequence QC") failed = True if failed: + log.debug("Bailing out!") exit(1) return target diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py index f567f0a..814fb3e 100644 --- a/bh20sequploader/qc_fasta.py +++ b/bh20sequploader/qc_fasta.py @@ -66,7 +66,8 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): similarity = 0 try: - cmd = ["minimap2", "-c -x asm20", tmp1.name, tmp2.name] + log.debug("Trying to run minimap2") + cmd = ["minimap2", "-c", "-x", "asm20", tmp1.name, tmp2.name] logging.info("QC checking similarity to reference") logging.info(" ".join(cmd)) result = subprocess.run(cmd, stdout=subprocess.PIPE) @@ -83,9 +84,7 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): if similarity < 70.0: raise ValueError( - "QC fail for {}: alignment to reference was less than 70%% (was %2.2f%%)".format( - seqlabel, similarity - )) + f"QC fail for {seqlabel}: alignment to reference was less than 70% (was {similarity})") return "sequence.fasta" + gz, seqlabel, seq_type elif seq_type == "text/fastq": @@ -93,4 +92,6 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True): sequence.detach() return "reads.fastq" + gz, seqlabel, seq_type else: + log.debug(seqlabel) + log.debug(seq_type) raise ValueError("Sequence file ({}) does not look like a DNA FASTA or FASTQ".format(arg_sequence)) diff --git a/doc/INSTALL.md b/doc/INSTALL.md index f54c8f2..45aca0f 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -31,7 +31,7 @@ arvados-python-client-2.0.1 ciso8601-2.1.3 future-0.18.2 google-api-python-clien 3. Run the tool directly with ```sh -guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex -- python3 bh20sequploader/main.py example/sequence.fasta example/maximum_metadata_example.yaml +guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex -- python3 bh20sequploader/main.py example/maximum_metadata_example.yaml example/sequence.fasta ``` Note that python-pyshex is packaged in @@ -44,6 +44,12 @@ repository. E.g. env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/opt/guix/bin/guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc minimap2 python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp python3 bh20sequploader/main.py --help ``` +Latest successful Guix run + +```sh +env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/opt/guix/bin/guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex python-arvados-python-client python-schema-salad minimap2 -- python3 bh20sequploader/main.py scripts/uthsc_samples/yaml/AL_UT14.yaml scripts/uthsc_samples/yaml/AL_UT14.fa +``` + ### Using the Web Uploader To run the web uploader in a GNU Guix environment/container run it with something like -- cgit v1.2.3