about summary refs log tree commit diff
path: root/bh20seqanalyzer
diff options
context:
space:
mode:
authorPeter Amstutz2020-06-22 15:06:10 +0000
committerPeter Amstutz2020-06-22 15:06:10 +0000
commit7daa9ff2cdba742a811db00c924ccde25fa2c9b6 (patch)
treef6f001d224c82825c08b091f221f85897f60d40d /bh20seqanalyzer
parent1554fb6b4daf263f034d46a5f5b26ebcc3e69d22 (diff)
downloadbh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.tar.gz
bh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.tar.lz
bh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.zip
Handle upload & assembly of gzipped, paired-end fastq
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r--bh20seqanalyzer/main.py48
1 files changed, 34 insertions, 14 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 31ad4c4..794ce27 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -40,20 +40,21 @@ def validate_upload(api, collection, validated_project,
 
     if valid:
         tgt = None
-        for n in ("sequence.fasta", "reads.fastq"):
+        paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
+        for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
             if n not in col:
                 continue
-            with col.open(n) as qf:
-                tgt = qc_fasta(qf)
-                if tgt != n:
+            with col.open(n, 'rb') as qf:
+                tgt = qc_fasta(qf)[0]
+                if tgt != n and tgt != paired.get(n):
                     logging.info("Expected %s but magic says it should be %s", n, tgt)
                     valid = False
-                elif tgt == "reads.fastq":
-                    start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
+                elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+                    start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n)
                     return False
         if tgt is None:
             valid = False
-            logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"])
+            logging.warn("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
 
     dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
                                           ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
@@ -95,6 +96,7 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
                tmp.name]
         logging.info("Running %s" % ' '.join(cmd))
         comp = subprocess.run(cmd, capture_output=True)
+    logging.info("Submitted %s", comp.stdout)
     if comp.returncode != 0:
         logging.error(comp.stderr.decode('utf-8'))
 
@@ -103,12 +105,10 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
 
 def start_fastq_to_fasta(api, collection,
                          analysis_project,
-                         fastq_workflow_uuid):
-    newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", {
-        "fastq_forward": {
-            "class": "File",
-            "location": "keep:%s/reads.fastq" % collection["portable_data_hash"]
-        },
+                         fastq_workflow_uuid,
+                         tgt):
+
+    params = {
         "metadata": {
             "class": "File",
             "location": "keep:%s/metadata.yaml" % collection["portable_data_hash"]
@@ -117,7 +117,24 @@ def start_fastq_to_fasta(api, collection,
             "class": "File",
             "location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta"
         }
-    })
+    }
+
+    if tgt.startswith("reads.fastq"):
+        params["fastq_forward"] = {
+            "class": "File",
+            "location": "keep:%s/%s" % (collection["portable_data_hash"], tgt)
+        }
+    elif tgt.startswith("reads_1.fastq"):
+        params["fastq_forward"] = {
+            "class": "File",
+            "location": "keep:%s/reads_1.%s" % (collection["portable_data_hash"], tgt[8:])
+        }
+        params["fastq_reverse"] = {
+            "class": "File",
+            "location": "keep:%s/reads_2.%s" % (collection["portable_data_hash"], tgt[8:])
+        }
+
+    newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", params)
     api.collections().update(uuid=collection["uuid"],
                              body={"owner_uuid": newproject["uuid"]}).execute()
 
@@ -222,6 +239,7 @@ def main():
 
     parser.add_argument('--latest-result-collection', type=str, default='lugli-4zz18-z513nlpqm03hpca', help='')
     parser.add_argument('--kickoff', action="store_true")
+    parser.add_argument('--once', action="store_true")
     args = parser.parse_args()
 
     api = arvados.api()
@@ -265,4 +283,6 @@ def main():
                                 args.pangenome_analysis_project,
                                 args.latest_result_collection)
 
+        if args.once:
+            break
         time.sleep(15)