aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader
diff options
context:
space:
mode:
authorAndreaGuarracino2020-08-28 11:16:24 +0200
committerAndreaGuarracino2020-08-28 11:16:24 +0200
commit3165a31e321cbf4641f9afdcbea511ee66f673bb (patch)
tree7140a760be5903c6f5e63a38da7e0f76493530a8 /bh20sequploader
parentcc8f99d50236b7d0c365990398785ecc319323ea (diff)
downloadbh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.tar.gz
bh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.tar.lz
bh20-seq-resource-3165a31e321cbf4641f9afdcbea511ee66f673bb.zip
added control (locally and in the validation) that sample_id has to be the same in the metadata and in the FASTA header #103
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/main.py13
-rw-r--r--bh20sequploader/qc_metadata.py2
2 files changed, 11 insertions, 4 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 80c33c8..d2a6c5f 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -26,11 +26,14 @@ VALIDATED_PROJECT='lugli-j7d0g-5ct8p1i1wrgyjvp'
def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
failed = False
+ sample_id = ''
try:
log.debug("Checking metadata" if do_qc else "Skipping metadata check")
- if do_qc and not qc_metadata(metadata.name):
- log.warning("Failed metadata QC")
- failed = True
+ if do_qc:
+ sample_id = qc_metadata(metadata.name)
+ if not sample_id:
+ log.warning("Failed metadata QC")
+ failed = True
except Exception as e:
log.exception("Failed metadata QC")
failed = True
@@ -46,6 +49,10 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
target[0] = ("reads_1."+target[0][0][6:], target[0][1])
target[1] = ("reads_2."+target[1][0][6:], target[0][1])
+
+ if target[0][2] == 'text/fasta' and sample_id != target[0][1]:
+ raise ValueError("The sample_id field in the metadata must be the same as the FASTA header")
+
except Exception as e:
log.exception("Failed sequence QC")
failed = True
diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py
index 27657b1..87fa0b3 100644
--- a/bh20sequploader/qc_metadata.py
+++ b/bh20sequploader/qc_metadata.py
@@ -39,4 +39,4 @@ def qc_metadata(metadatafile):
if not rslt:
raise Exception(reason)
- return True
+ return metadata['sample']['sample_id']