about summary refs log tree commit diff
path: root/bh20seqanalyzer
diff options
context:
space:
mode:
authorPeter Amstutz2020-07-08 17:14:46 -0400
committerGitHub2020-07-08 17:14:46 -0400
commit6e0f9f18167377bac073d7715b89e7ddbf1fe72d (patch)
tree1b72a737b50e60346aefaf009ac2488d45c8abe0 /bh20seqanalyzer
parent6fa25708b46a590be82a6b84266c0a3f25a0d890 (diff)
parente821857e7a9403739f321feb7418d33d6bd8b2c7 (diff)
downloadbh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.gz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.lz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.zip
Merge pull request #92 from arvados/upload-download-status
Split upload tab.  Add upload status tab.  Also a bunch of QC and uploader improvements.
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r--bh20seqanalyzer/main.py41
1 files changed, 26 insertions, 15 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 1746587..0b52e6b 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -17,10 +17,11 @@ logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="%
 logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN)
 
 def validate_upload(api, collection, validated_project,
-                    fastq_project, fastq_workflow_uuid):
+                    fastq_project, fastq_workflow_uuid,
+                    revalidate):
     col = arvados.collection.Collection(collection["uuid"])
 
-    if collection.get("status") in ("validated", "rejected"):
+    if not revalidate and collection["properties"].get("status") in ("validated", "rejected"):
         return False
 
     # validate the collection here.  Check metadata, etc.
@@ -28,11 +29,12 @@ def validate_upload(api, collection, validated_project,
 
     errors = []
 
-    dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
-                                          ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
-    if dup["items"]:
-        # This exact collection has been uploaded before.
-        errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+    if collection["owner_uuid"] != validated_project:
+        dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
+                                              ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
+        if dup["items"]:
+            # This exact collection has been uploaded before.
+            errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
 
     if not errors:
         if "metadata.yaml" not in col:
@@ -70,12 +72,15 @@ def validate_upload(api, collection, validated_project,
 
 
     if not errors:
-        logging.info("Added '%s' to validated sequences" % collection["name"])
         # Move it to the "validated" project to be included in the next analysis
+        if "errors" in collection["properties"]:
+            del collection["properties"]["errors"]
         collection["properties"]["status"] = "validated"
         api.collections().update(uuid=collection["uuid"], body={
             "owner_uuid": validated_project,
-            "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute()
+            "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())),
+            "properties": collection["properties"]}).execute()
+        logging.info("Added '%s' to validated sequences" % collection["name"])
         return True
     else:
         # It is invalid
@@ -155,7 +160,9 @@ def start_pangenome_analysis(api,
                              validated_project,
                              schema_ref,
                              exclude_list):
-    validated = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", validated_project]])
+    validated = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", validated_project],
+        ["properties.status", "=", "validated"]])
     inputobj = {
         "inputReads": [],
         "metadata": [],
@@ -187,14 +194,15 @@ def get_workflow_output_from_project(api, uuid):
     cr = api.container_requests().list(filters=[['owner_uuid', '=', uuid],
                                                 ["requesting_container_uuid", "=", None]]).execute()
     if cr["items"] and cr["items"][0]["output_uuid"]:
-        return cr["items"][0]
-    else:
-        return None
+        container = api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute()
+        if container["state"] == "Complete" and container["exit_code"] == 0:
+            return cr["items"][0]
+    return None
 
 
 def copy_most_recent_result(api, analysis_project, latest_result_uuid):
     most_recent_analysis = api.groups().list(filters=[['owner_uuid', '=', analysis_project]],
-                                                  order="created_at desc", limit=1).execute()
+                                                  order="created_at desc").execute()
     for m in most_recent_analysis["items"]:
         wf = get_workflow_output_from_project(api, m["uuid"])
         if wf:
@@ -220,6 +228,7 @@ def move_fastq_to_fasta_results(api, analysis_project, uploader_project):
                                      body={"owner_uuid": uploader_project}).execute()
             p["properties"]["moved_output"] = True
             api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute()
+            break
 
 
 def upload_schema(api, workflow_def_project):
@@ -297,6 +306,7 @@ def main():
     parser.add_argument('--no-start-analysis', action="store_true")
     parser.add_argument('--once', action="store_true")
     parser.add_argument('--print-status', type=str, default=None)
+    parser.add_argument('--revalidate', action="store_true", default=None)
     args = parser.parse_args()
 
     api = arvados.api()
@@ -330,7 +340,8 @@ def main():
             at_least_one_new_valid_seq = validate_upload(api, c,
                                                          args.validated_project,
                                                          args.fastq_project,
-                                                         args.fastq_workflow_uuid) or at_least_one_new_valid_seq
+                                                         args.fastq_workflow_uuid,
+                                                         args.revalidate) or at_least_one_new_valid_seq
 
         if at_least_one_new_valid_seq and not args.no_start_analysis:
             start_pangenome_analysis(api,