diff options
Diffstat (limited to 'bh20seqanalyzer/main.py')
-rw-r--r-- | bh20seqanalyzer/main.py | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index c05b402..193a268 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -29,7 +29,7 @@ def validate_upload(api, collection, validated_project, else: try: metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml")) - metadata_content["id"] = "keep:%s/metadata.yaml" % collection["portable_data_hash"] + metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"] add_lc_filename(metadata_content, metadata_content["id"]) valid = qc_metadata(metadata_content) and valid except Exception as e: @@ -39,19 +39,21 @@ def validate_upload(api, collection, validated_project, logging.warn("Failed metadata qc") if valid: - if "sequence.fasta" in col: - try: - qc_fasta(col.open("sequence.fasta")) - except Exception as e: - logging.warn(e) - valid = False - else: - if "reads.fastq" in col: - start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid) - return False - else: - valid = False - logging.warn("Upload '%s' missing sequence.fasta", collection["name"]) + tgt = None + for n in ("sequence.fasta", "reads.fastq"): + if n not in col: + continue + with col.open(n) as qf: + tgt = qc_fasta(qf) + if tgt != n: + logging.info("Expected %s but magic says it should be %s", n, tgt) + valid = False + elif tgt == "reads.fastq": + start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid) + return False + if tgt is None: + valid = False + logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"]) dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], ["portable_data_hash", "=", col.portable_data_hash()]]).execute() @@ -144,7 +146,7 @@ def start_pangenome_analysis(api, "class": "File", "location": "keep:%s/metadata.yaml" % v["portable_data_hash"] }) - inputobj["subjects"].append("keep:%s/sequence.fasta" % v["portable_data_hash"]) + inputobj["subjects"].append("http://arvados.org/keep:%s/sequence.fasta" % v["portable_data_hash"]) run_workflow(api, analysis_project, pangenome_workflow_uuid, "Pangenome analysis", inputobj) |