about summary refs log tree commit diff
path: root/bh20seqanalyzer
diff options
context:
space:
mode:
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r--bh20seqanalyzer/__init__.py0
-rw-r--r--bh20seqanalyzer/main.py32
2 files changed, 17 insertions, 15 deletions
diff --git a/bh20seqanalyzer/__init__.py b/bh20seqanalyzer/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bh20seqanalyzer/__init__.py
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index c05b402..193a268 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -29,7 +29,7 @@ def validate_upload(api, collection, validated_project,
     else:
         try:
             metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml"))
-            metadata_content["id"] = "keep:%s/metadata.yaml" % collection["portable_data_hash"]
+            metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"]
             add_lc_filename(metadata_content, metadata_content["id"])
             valid = qc_metadata(metadata_content) and valid
         except Exception as e:
@@ -39,19 +39,21 @@ def validate_upload(api, collection, validated_project,
             logging.warn("Failed metadata qc")
 
     if valid:
-        if "sequence.fasta" in col:
-            try:
-                qc_fasta(col.open("sequence.fasta"))
-            except Exception as e:
-                logging.warn(e)
-                valid = False
-        else:
-            if "reads.fastq" in col:
-                start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
-                return False
-            else:
-                valid = False
-                logging.warn("Upload '%s' missing sequence.fasta", collection["name"])
+        tgt = None
+        for n in ("sequence.fasta", "reads.fastq"):
+            if n not in col:
+                continue
+            with col.open(n) as qf:
+                tgt = qc_fasta(qf)
+                if tgt != n:
+                    logging.info("Expected %s but magic says it should be %s", n, tgt)
+                    valid = False
+                elif tgt == "reads.fastq":
+                    start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
+                    return False
+        if tgt is None:
+            valid = False
+            logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"])
 
     dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
                                           ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
@@ -144,7 +146,7 @@ def start_pangenome_analysis(api,
             "class": "File",
             "location": "keep:%s/metadata.yaml" % v["portable_data_hash"]
         })
-        inputobj["subjects"].append("keep:%s/sequence.fasta" % v["portable_data_hash"])
+        inputobj["subjects"].append("http://arvados.org/keep:%s/sequence.fasta" % v["portable_data_hash"])
     run_workflow(api, analysis_project, pangenome_workflow_uuid, "Pangenome analysis", inputobj)