aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2020-11-06 11:19:28 +0000
committerPjotr Prins2020-11-06 11:19:28 +0000
commit5fdfece97fb2d50a10eab5004a6467ec0097ece8 (patch)
tree022eff03421416c082cd09e7c4d391b9527501e9
parent951ebe949d88cdbfed028e0a2a420ce7921c3919 (diff)
downloadbh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.tar.gz
bh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.tar.lz
bh20-seq-resource-5fdfece97fb2d50a10eab5004a6467ec0097ece8.zip
Uploader script improvements
-rw-r--r--bh20sequploader/main.py5
-rw-r--r--bh20sequploader/qc_fasta.py9
-rw-r--r--doc/INSTALL.md8
3 files changed, 15 insertions, 7 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index f89b458..ea0fa70 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -49,7 +49,7 @@ sequence for enough overlap with the reference genome
failed = True
except Exception as e:
log.exception("Failed metadata QC")
- failed = True
+ failed = True # continue with the FASTA checker
target = []
try:
@@ -64,13 +64,14 @@ sequence for enough overlap with the reference genome
target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2])
if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]:
- raise ValueError("The sample_id field in the metadata must be the same as the FASTA header")
+ raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})")
except Exception as e:
log.exception("Failed sequence QC")
failed = True
if failed:
+ log.debug("Bailing out!")
exit(1)
return target
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index f567f0a..814fb3e 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -66,7 +66,8 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
similarity = 0
try:
- cmd = ["minimap2", "-c -x asm20", tmp1.name, tmp2.name]
+ log.debug("Trying to run minimap2")
+ cmd = ["minimap2", "-c", "-x", "asm20", tmp1.name, tmp2.name]
logging.info("QC checking similarity to reference")
logging.info(" ".join(cmd))
result = subprocess.run(cmd, stdout=subprocess.PIPE)
@@ -83,9 +84,7 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
if similarity < 70.0:
raise ValueError(
- "QC fail for {}: alignment to reference was less than 70%% (was %2.2f%%)".format(
- seqlabel, similarity
- ))
+ f"QC fail for {seqlabel}: alignment to reference was less than 70% (was {similarity})")
return "sequence.fasta" + gz, seqlabel, seq_type
elif seq_type == "text/fastq":
@@ -93,4 +92,6 @@ def qc_fasta(arg_sequence, check_with_mimimap2=True):
sequence.detach()
return "reads.fastq" + gz, seqlabel, seq_type
else:
+ log.debug(seqlabel)
+ log.debug(seq_type)
raise ValueError("Sequence file ({}) does not look like a DNA FASTA or FASTQ".format(arg_sequence))
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index f54c8f2..45aca0f 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -31,7 +31,7 @@ arvados-python-client-2.0.1 ciso8601-2.1.3 future-0.18.2 google-api-python-clien
3. Run the tool directly with
```sh
-guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex -- python3 bh20sequploader/main.py example/sequence.fasta example/maximum_metadata_example.yaml
+guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex -- python3 bh20sequploader/main.py example/maximum_metadata_example.yaml example/sequence.fasta
```
Note that python-pyshex is packaged in
@@ -44,6 +44,12 @@ repository. E.g.
env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/opt/guix/bin/guix environment -C guix --ad-hoc git python python-flask python-pyyaml python-pycurl python-magic nss-certs python-pyshex python-pyyaml --network openssl python-pyshex python-pyshexc minimap2 python-schema-salad python-arvados-python-client --share=/export/tmp -- env TMPDIR=/export/tmp python3 bh20sequploader/main.py --help
```
+Latest successful Guix run
+
+```sh
+env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics/ ~/opt/guix/bin/guix environment guix --ad-hoc git python openssl python-pycurl python-magic nss-certs python-pyshex python-arvados-python-client python-schema-salad minimap2 -- python3 bh20sequploader/main.py scripts/uthsc_samples/yaml/AL_UT14.yaml scripts/uthsc_samples/yaml/AL_UT14.fa
+```
+
### Using the Web Uploader
To run the web uploader in a GNU Guix environment/container run it with something like