From 2c69ca48ce1a9ab431afcc891bdad8c495c9768e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 6 Apr 2020 14:11:41 -0400 Subject: Add service to automatically run a workflow on collections uploaded to a project. --- bh20seqanalyzer/main.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ bh20sequploader/main.py | 1 - setup.py | 5 +++-- 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 bh20seqanalyzer/main.py diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py new file mode 100644 index 0000000..23e58e9 --- /dev/null +++ b/bh20seqanalyzer/main.py @@ -0,0 +1,55 @@ +import argparse +import arvados +import time +import subprocess +import tempfile +import json + +def start_analysis(api, collection, analysis_project, workflow_uuid): + project = api.groups().create(body={ + "group_class": "project", + "name": "Analysis of %s" % collection["name"], + "owner_uuid": analysis_project, + }, ensure_unique_name=True).execute() + + with tempfile.NamedTemporaryFile() as tmp: + inputobj = json.dumps({ + "sequence": { + "class": "File", + "location": "keep:%s/sequence.fasta" % collection["portable_data_hash"] + }, + "metadata": { + "class": "File", + "location": "keep:%s/metadata.jsonld" % collection["portable_data_hash"] + } + }, indent=2) + tmp.write(inputobj.encode('utf-8')) + tmp.flush() + cmd = ["arvados-cwl-runner", + "--submit", + "--no-wait", + "--debug", + "--project-uuid=%s" % project["uuid"], + "arvwf:%s" % workflow_uuid, + tmp.name] + print("Running %s" % ' '.join(cmd)) + comp = subprocess.run(cmd, capture_output=True) + if comp.returncode != 0: + print(comp.stderr.decode('utf-8')) + else: + api.collections().update(uuid=collection["uuid"], body={"owner_uuid": project['uuid']}).execute() + +def main(): + parser = argparse.ArgumentParser(description='Analyze collections uploaded to a project') + parser.add_argument('--uploader-project', type=str, default='lugli-j7d0g-n5clictpuvwk8aa', help='') + parser.add_argument('--analysis-project', type=str, default='lugli-j7d0g-y4k4uswcqi3ku56', help='') + parser.add_argument('--workflow-uuid', type=str, default='lugli-7fd4e-mqfu9y3ofnpnho1', help='') + args = parser.parse_args() + + api = arvados.api() + + while True: + new_collections = api.collections().list(filters=[['owner_uuid', '=', args.uploader_project]]).execute() + for c in new_collections["items"]: + start_analysis(api, c, args.analysis_project, args.workflow_uuid) + time.sleep(10) diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 049a21c..9de51f4 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -10,7 +10,6 @@ import getpass ARVADOS_API_HOST='lugli.arvadosapi.com' ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa' -WORKFLOW='' def main(): parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis') diff --git a/setup.py b/setup.py index d5c71b2..9e73ff0 100644 --- a/setup.py +++ b/setup.py @@ -29,13 +29,14 @@ setup( author="Peter Amstutz", author_email="peter.amstutz@curii.com", license="Apache 2.0", - packages=["bh20sequploader"], + packages=["bh20sequploader", "bh20seqanalyzer"], install_requires=install_requires, setup_requires=[] + pytest_runner, tests_require=["pytest<5"], entry_points={ "console_scripts": [ - "bh20-seq-uploader=bh20sequploader.main:main" + "bh20-seq-uploader=bh20sequploader.main:main", + "bh20-seq-analyzer=bh20seqanalyzer.main:main" ] }, zip_safe=True, -- cgit v1.2.3