From 99af299197dfc527155cd3257388b784810518c4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Jul 2020 00:16:23 -0400 Subject: Split upload tab. Add upload status tab. Add more direct links on download page. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20simplewebuploader/main.py | 63 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) (limited to 'bh20simplewebuploader/main.py') diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 3100dfd..1c96d36 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -13,12 +13,15 @@ import pkg_resources from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify import os.path import requests +import io +import arvados +from markupsafe import Markup logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__ ) log.debug("Entering web uploader") -if not os.path.isfile('bh20sequploader/mainx.py'): +if not os.path.isfile('bh20sequploader/main.py'): print("WARNING: run FLASK from the root of the source repository!", file=sys.stderr) app = Flask(__name__, static_url_path='/static', static_folder='static') @@ -224,12 +227,21 @@ METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20 FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS) @app.route('/') +def send_home(): + """ + Send the front page. + """ + + return render_template('home.html', menu='HOME') + + +@app.route('/upload') def send_form(): """ Send the file upload form/front page. """ - return render_template('form.html', fields=FORM_ITEMS, menu='HOME') + return render_template('form.html', fields=FORM_ITEMS, menu='UPLOAD') class FileTooBigError(RuntimeError): """ @@ -439,7 +451,52 @@ def get_html_body(fn): @app.route('/download') def download_page(): buf = get_html_body('doc/web/download.html') - return render_template('about.html',menu='DOWNLOAD',embed=buf) + return render_template('resource.html',menu='DOWNLOAD',embed=buf) + +@app.route('/status') +def status_page(): + """ + Processing status + """ + + api = arvados.api() + uploader_project = 'lugli-j7d0g-n5clictpuvwk8aa' + pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", uploader_project]]) + out = [] + status = {} + for p in pending: + prop = p["properties"] + out.append(prop) + if "status" not in prop: + prop["status"] = "pending" + prop["created_at"] = p["created_at"] + prop["uuid"] = p["uuid"] + status[prop["status"]] = status.get(prop["status"], 0) + 1 + + output = io.StringIO() + for s in status: + output.write("

%s sequences %s QC

" % (status[s], s)) + output.write( +""" + + + + + +""") + for r in out: + output.write("") + output.write("" % (r["uuid"], r["uuid"])) + output.write("" % Markup.escape(r["sequence_label"])) + output.write("" % r["status"]) + output.write("" % Markup.escape("\n".join(r.get("errors", [])))) + output.write("") + output.write( +""" +
CollectionSequence labelStatusErrors
%s%s%s
%s
+""") + + return render_template('status.html', table=Markup(output.getvalue()), menu='STATUS') @app.route('/demo') def demo_page(): -- cgit v1.2.3 From 44e3c21ba89aa19c066148fdd18745f45e168d50 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Jul 2020 10:28:28 -0400 Subject: Fetch status as anonymous user Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20simplewebuploader/main.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'bh20simplewebuploader/main.py') diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 1c96d36..d602288 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -17,6 +17,10 @@ import io import arvados from markupsafe import Markup +ARVADOS_API = 'lugli.arvadosapi.com' +ANONYMOUS_TOKEN = '5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh' +UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa' + logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__ ) log.debug("Entering web uploader") @@ -459,9 +463,8 @@ def status_page(): Processing status """ - api = arvados.api() - uploader_project = 'lugli-j7d0g-n5clictpuvwk8aa' - pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", uploader_project]]) + api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN) + pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", UPLOADER_PROJECT]]) out = [] status = {} for p in pending: -- cgit v1.2.3 From cbb191cb23f4e40b1a1d3024401960939fda9bc3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Jul 2020 14:02:11 -0400 Subject: Get count from arvados Separate pending/rejected tables "Lastest results" is latest successful run. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20seqanalyzer/main.py | 10 +-- bh20simplewebuploader/main.py | 93 +++++++++++++++++---------- bh20simplewebuploader/templates/resource.html | 1 + 3 files changed, 65 insertions(+), 39 deletions(-) (limited to 'bh20simplewebuploader/main.py') diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 1746587..ce9a723 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -187,14 +187,15 @@ def get_workflow_output_from_project(api, uuid): cr = api.container_requests().list(filters=[['owner_uuid', '=', uuid], ["requesting_container_uuid", "=", None]]).execute() if cr["items"] and cr["items"][0]["output_uuid"]: - return cr["items"][0] - else: - return None + container = api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute() + if container["state"] == "Complete" and container["exit_code"] == 0: + return cr["items"][0] + return None def copy_most_recent_result(api, analysis_project, latest_result_uuid): most_recent_analysis = api.groups().list(filters=[['owner_uuid', '=', analysis_project]], - order="created_at desc", limit=1).execute() + order="created_at desc").execute() for m in most_recent_analysis["items"]: wf = get_workflow_output_from_project(api, m["uuid"]) if wf: @@ -220,6 +221,7 @@ def move_fastq_to_fasta_results(api, analysis_project, uploader_project): body={"owner_uuid": uploader_project}).execute() p["properties"]["moved_output"] = True api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute() + break def upload_schema(api, workflow_def_project): diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index d602288..d4adbda 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -20,6 +20,7 @@ from markupsafe import Markup ARVADOS_API = 'lugli.arvadosapi.com' ANONYMOUS_TOKEN = '5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh' UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa' +VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp' logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__ ) @@ -457,6 +458,47 @@ def download_page(): buf = get_html_body('doc/web/download.html') return render_template('resource.html',menu='DOWNLOAD',embed=buf) +def pending_table(output, items): + output.write( +""" + + + +""") + for r in items: + if r["status"] != "pending": + continue + output.write("") + output.write("" % (r["uuid"], r["uuid"])) + output.write("" % Markup.escape(r["sequence_label"])) + output.write("") + output.write( +""" +
CollectionSequence label
%s%s
+""") + +def rejected_table(output, items): + output.write( +""" + + + + +""") + for r in items: + if r["status"] != "rejected": + continue + output.write("") + output.write("" % (r["uuid"], r["uuid"])) + output.write("" % Markup.escape(r["sequence_label"])) + output.write("" % Markup.escape("\n".join(r.get("errors", [])))) + output.write("") + output.write( +""" +
CollectionSequence labelErrors
%s%s
%s
+""") + + @app.route('/status') def status_page(): """ @@ -477,27 +519,18 @@ def status_page(): status[prop["status"]] = status.get(prop["status"], 0) + 1 output = io.StringIO() - for s in status: - output.write("

%s sequences %s QC

" % (status[s], s)) - output.write( -""" - - - - - -""") - for r in out: - output.write("") - output.write("" % (r["uuid"], r["uuid"])) - output.write("" % Markup.escape(r["sequence_label"])) - output.write("" % r["status"]) - output.write("" % Markup.escape("\n".join(r.get("errors", [])))) - output.write("") - output.write( -""" -
CollectionSequence labelStatusErrors
%s%s%s
%s
-""") + + validated = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute() + status["passed"] = validated["items_available"] + + for s in (("passed", "/download"), ("pending", "#pending"), ("rejected", "#rejected")): + output.write("

%s sequences QC %s

" % (s[1], status.get(s[0], 0), s[0])) + + output.write("

Pending

") + pending_table(output, out) + + output.write("

Rejected

") + rejected_table(output, out) return render_template('status.html', table=Markup(output.getvalue()), menu='STATUS') @@ -534,20 +567,10 @@ baseURL='http://sparql.genenetwork.org/sparql/' @app.route('/api/getCount', methods=['GET']) def getCount(): - query=""" -PREFIX pubseq: -select (COUNT(distinct ?dataset) as ?num) -{ - ?dataset pubseq:submitter ?id . - ?id ?p ?submitter -} -""" - payload = {'query': query, 'format': 'json'} - r = requests.get(baseURL, params=payload) - result = r.json()['results']['bindings'] - # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}] - # print(result, file=sys.stderr) - return jsonify({'sequences': int(result[0]["num"]["value"])}) + api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN) + c = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute() + + return jsonify({'sequences': c["items_available"]}) @app.route('/api/getAllaccessions', methods=['GET']) def getAllaccessions(): diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html index e0d344f..91b6c20 100644 --- a/bh20simplewebuploader/templates/resource.html +++ b/bh20simplewebuploader/templates/resource.html @@ -7,6 +7,7 @@

+

All sequences project

All sequences (FASTA) relabled and deduplicated

Metadata (RDF) for all sequences

All sequences in Graphical Fragment Assembly (GFA) - More about GFA

-- cgit v1.2.3 From cd25f56f4d9aea1d30cc17ee923a6d8dd6c63e5a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Jul 2020 20:16:42 +0000 Subject: Add --revalidate and fix checking/updating properties. --- bh20seqanalyzer/main.py | 31 ++++++++++++++++++++----------- bh20simplewebuploader/main.py | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'bh20simplewebuploader/main.py') diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index ce9a723..0b52e6b 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -17,10 +17,11 @@ logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="% logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN) def validate_upload(api, collection, validated_project, - fastq_project, fastq_workflow_uuid): + fastq_project, fastq_workflow_uuid, + revalidate): col = arvados.collection.Collection(collection["uuid"]) - if collection.get("status") in ("validated", "rejected"): + if not revalidate and collection["properties"].get("status") in ("validated", "rejected"): return False # validate the collection here. Check metadata, etc. @@ -28,11 +29,12 @@ def validate_upload(api, collection, validated_project, errors = [] - dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], - ["portable_data_hash", "=", col.portable_data_hash()]]).execute() - if dup["items"]: - # This exact collection has been uploaded before. - errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]])) + if collection["owner_uuid"] != validated_project: + dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], + ["portable_data_hash", "=", col.portable_data_hash()]]).execute() + if dup["items"]: + # This exact collection has been uploaded before. + errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]])) if not errors: if "metadata.yaml" not in col: @@ -70,12 +72,15 @@ def validate_upload(api, collection, validated_project, if not errors: - logging.info("Added '%s' to validated sequences" % collection["name"]) # Move it to the "validated" project to be included in the next analysis + if "errors" in collection["properties"]: + del collection["properties"]["errors"] collection["properties"]["status"] = "validated" api.collections().update(uuid=collection["uuid"], body={ "owner_uuid": validated_project, - "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute() + "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())), + "properties": collection["properties"]}).execute() + logging.info("Added '%s' to validated sequences" % collection["name"]) return True else: # It is invalid @@ -155,7 +160,9 @@ def start_pangenome_analysis(api, validated_project, schema_ref, exclude_list): - validated = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", validated_project]]) + validated = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", validated_project], + ["properties.status", "=", "validated"]]) inputobj = { "inputReads": [], "metadata": [], @@ -299,6 +306,7 @@ def main(): parser.add_argument('--no-start-analysis', action="store_true") parser.add_argument('--once', action="store_true") parser.add_argument('--print-status', type=str, default=None) + parser.add_argument('--revalidate', action="store_true", default=None) args = parser.parse_args() api = arvados.api() @@ -332,7 +340,8 @@ def main(): at_least_one_new_valid_seq = validate_upload(api, c, args.validated_project, args.fastq_project, - args.fastq_workflow_uuid) or at_least_one_new_valid_seq + args.fastq_workflow_uuid, + args.revalidate) or at_least_one_new_valid_seq if at_least_one_new_valid_seq and not args.no_start_analysis: start_pangenome_analysis(api, diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index d4adbda..9132453 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -422,7 +422,7 @@ def receive_files(): # Try and upload files to Arvados using the sequence uploader CLI - cmd = ['python3','bh20sequploader/main.py', fasta_dest, metadata_dest] + cmd = ['python3','bh20sequploader/main.py', metadata_dest, fasta_dest] print(" ".join(cmd),file=sys.stderr) result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -- cgit v1.2.3