diff options
author | Peter Amstutz | 2020-04-10 16:20:13 -0400 |
---|---|---|
committer | Peter Amstutz | 2020-04-10 16:20:13 -0400 |
commit | f2a3aeb6d7d8ba210d060b83ea46475eab3626ac (patch) | |
tree | ad3bfe4a38739b744618618dccfe2f91ee1de769 /bh20seqanalyzer/main.py | |
parent | 1b1283131f3c684bfff2c1b165565957ac01b4be (diff) | |
download | bh20-seq-resource-f2a3aeb6d7d8ba210d060b83ea46475eab3626ac.tar.gz bh20-seq-resource-f2a3aeb6d7d8ba210d060b83ea46475eab3626ac.tar.lz bh20-seq-resource-f2a3aeb6d7d8ba210d060b83ea46475eab3626ac.zip |
Improve fasta/fastq QC
Diffstat (limited to 'bh20seqanalyzer/main.py')
-rw-r--r-- | bh20seqanalyzer/main.py | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 7626662..63ff067 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -39,20 +39,21 @@ def validate_upload(api, collection, validated_project, logging.warn("Failed metadata qc") if valid: - if "sequence.fasta" in col: - try: - qc_fasta(col.open("sequence.fasta")) - except Exception as e: - logging.warn(e) - valid = False - else: - if "reads.fastq" in col: - logging.info("Upload '%s' running fastq2fasta", collection["name"]) - start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid) - return False - else: - valid = False - logging.warn("Upload '%s' missing sequence.fasta", collection["name"]) + tgt = None + for n in ("sequence.fasta", "reads.fastq"): + if n not in col: + continue + with col.open(n) as qf: + tgt = qc_fasta(qf) + if tgt != n: + logging.info("Expected %s but magic says it should be %s", n, tgt) + valid = False + elif tgt == "reads.fastq": + start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid) + return False + if tgt is None: + valid = False + logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"]) dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], ["portable_data_hash", "=", col.portable_data_hash()]]).execute() |