aboutsummaryrefslogtreecommitdiff
path: root/bh20seqanalyzer/main.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-07-08 17:14:46 -0400
committerGitHub2020-07-08 17:14:46 -0400
commit6e0f9f18167377bac073d7715b89e7ddbf1fe72d (patch)
tree1b72a737b50e60346aefaf009ac2488d45c8abe0 /bh20seqanalyzer/main.py
parent6fa25708b46a590be82a6b84266c0a3f25a0d890 (diff)
parente821857e7a9403739f321feb7418d33d6bd8b2c7 (diff)
downloadbh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.gz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.tar.lz
bh20-seq-resource-6e0f9f18167377bac073d7715b89e7ddbf1fe72d.zip
Merge pull request #92 from arvados/upload-download-status
Split upload tab. Add upload status tab. Also a bunch of QC and uploader improvements.
Diffstat (limited to 'bh20seqanalyzer/main.py')
-rw-r--r--bh20seqanalyzer/main.py41
1 files changed, 26 insertions, 15 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 1746587..0b52e6b 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -17,10 +17,11 @@ logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="%
logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN)
def validate_upload(api, collection, validated_project,
- fastq_project, fastq_workflow_uuid):
+ fastq_project, fastq_workflow_uuid,
+ revalidate):
col = arvados.collection.Collection(collection["uuid"])
- if collection.get("status") in ("validated", "rejected"):
+ if not revalidate and collection["properties"].get("status") in ("validated", "rejected"):
return False
# validate the collection here. Check metadata, etc.
@@ -28,11 +29,12 @@ def validate_upload(api, collection, validated_project,
errors = []
- dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
- ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
- if dup["items"]:
- # This exact collection has been uploaded before.
- errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+ if collection["owner_uuid"] != validated_project:
+ dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
+ ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
+ if dup["items"]:
+ # This exact collection has been uploaded before.
+ errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
if not errors:
if "metadata.yaml" not in col:
@@ -70,12 +72,15 @@ def validate_upload(api, collection, validated_project,
if not errors:
- logging.info("Added '%s' to validated sequences" % collection["name"])
# Move it to the "validated" project to be included in the next analysis
+ if "errors" in collection["properties"]:
+ del collection["properties"]["errors"]
collection["properties"]["status"] = "validated"
api.collections().update(uuid=collection["uuid"], body={
"owner_uuid": validated_project,
- "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute()
+ "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())),
+ "properties": collection["properties"]}).execute()
+ logging.info("Added '%s' to validated sequences" % collection["name"])
return True
else:
# It is invalid
@@ -155,7 +160,9 @@ def start_pangenome_analysis(api,
validated_project,
schema_ref,
exclude_list):
- validated = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", validated_project]])
+ validated = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", validated_project],
+ ["properties.status", "=", "validated"]])
inputobj = {
"inputReads": [],
"metadata": [],
@@ -187,14 +194,15 @@ def get_workflow_output_from_project(api, uuid):
cr = api.container_requests().list(filters=[['owner_uuid', '=', uuid],
["requesting_container_uuid", "=", None]]).execute()
if cr["items"] and cr["items"][0]["output_uuid"]:
- return cr["items"][0]
- else:
- return None
+ container = api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute()
+ if container["state"] == "Complete" and container["exit_code"] == 0:
+ return cr["items"][0]
+ return None
def copy_most_recent_result(api, analysis_project, latest_result_uuid):
most_recent_analysis = api.groups().list(filters=[['owner_uuid', '=', analysis_project]],
- order="created_at desc", limit=1).execute()
+ order="created_at desc").execute()
for m in most_recent_analysis["items"]:
wf = get_workflow_output_from_project(api, m["uuid"])
if wf:
@@ -220,6 +228,7 @@ def move_fastq_to_fasta_results(api, analysis_project, uploader_project):
body={"owner_uuid": uploader_project}).execute()
p["properties"]["moved_output"] = True
api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute()
+ break
def upload_schema(api, workflow_def_project):
@@ -297,6 +306,7 @@ def main():
parser.add_argument('--no-start-analysis', action="store_true")
parser.add_argument('--once', action="store_true")
parser.add_argument('--print-status', type=str, default=None)
+ parser.add_argument('--revalidate', action="store_true", default=None)
args = parser.parse_args()
api = arvados.api()
@@ -330,7 +340,8 @@ def main():
at_least_one_new_valid_seq = validate_upload(api, c,
args.validated_project,
args.fastq_project,
- args.fastq_workflow_uuid) or at_least_one_new_valid_seq
+ args.fastq_workflow_uuid,
+ args.revalidate) or at_least_one_new_valid_seq
if at_least_one_new_valid_seq and not args.no_start_analysis:
start_pangenome_analysis(api,