aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-07-03 21:15:48 +0000
committerPeter Amstutz2020-07-03 21:15:48 +0000
commit38340e0cedb465cd592ac40b11c9d22c75973fed (patch)
tree92e9834c68edcb93a6597d091b337318675af33d
parent04df498f5cd85015afce79e1e87a3979e596dcc6 (diff)
downloadbh20-seq-resource-38340e0cedb465cd592ac40b11c9d22c75973fed.tar.gz
bh20-seq-resource-38340e0cedb465cd592ac40b11c9d22c75973fed.tar.lz
bh20-seq-resource-38340e0cedb465cd592ac40b11c9d22c75973fed.zip
Add --skip-qc for faster batch import
-rw-r--r--bh20sequploader/main.py4
-rw-r--r--bh20sequploader/qc_fasta.py6
2 files changed, 5 insertions, 5 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index dc63bfc..cdc4c3f 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -24,7 +24,7 @@ UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa'
def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
try:
- log.debug("Checking metadata")
+ log.debug("Checking metadata" if do_qc else "Skipping metadata check")
if do_qc and not qc_metadata(metadata.name):
log.warning("Failed metadata qc")
exit(1)
@@ -36,7 +36,7 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
target = []
try:
- log.debug("Checking FASTA/FASTQ QC")
+ log.debug("FASTA/FASTQ QC" if do_qc else "Limited FASTA/FASTQ QC")
target.append(qc_fasta(sequence_p1, check_with_clustalw=do_qc))
if sequence_p2:
target.append(qc_fasta(sequence_p2))
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 944b52c..8c6ebd3 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -54,6 +54,9 @@ def qc_fasta(arg_sequence, check_with_clustalw=True):
sequence.seek(0)
sequence.detach()
+ if not check_with_clustalw:
+ return ("sequence.fasta"+gz, seqlabel)
+
with tempfile.NamedTemporaryFile() as tmp1:
refstring = pkg_resources.resource_string(__name__, "SARS-CoV-2-reference.fasta")
tmp1.write(refstring)
@@ -64,9 +67,6 @@ def qc_fasta(arg_sequence, check_with_clustalw=True):
refbp = 0
similarity = 0
try:
- if not check_with_clustalw:
- raise Exception("skipping QC")
-
cmd = ["clustalw", "-infile="+tmp1.name,
"-quicktree", "-iteration=none", "-type=DNA"]
print("QC checking similarity to reference")