diff options
91 files changed, 3152 insertions, 1310 deletions
diff --git a/.dir-locals.el b/.dir-locals.el index 6b07349..153cb4a 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -14,4 +14,5 @@ (eval . (put 'call-with-inferior 'scheme-indent-function 1)) (eval . (put 'maybe-let* 'scheme-indent-function 1)) (eval . (put 'maybe-assoc-set 'scheme-indent-function 1)) + (eval . (put 'run-with-state 'scheme-indent-function 0)) (eval . (put 'state-let* 'scheme-indent-function 1)))) diff --git a/.guix/cwl-conformance.scm b/.guix/cwl-conformance.scm new file mode 100644 index 0000000..b71821e --- /dev/null +++ b/.guix/cwl-conformance.scm @@ -0,0 +1,117 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (cwl-conformance) + #:use-module ((cwltest-package) #:select (cwltest)) + #:use-module ((ravanan-package) #:select (ravanan)) + #:use-module ((gnu packages python) #:select (python)) + #:use-module ((gnu packages python-web) #:select (python-pybadges)) + #:use-module (guix gexp) + #:use-module (guix git-download) + #:use-module (guix packages) + #:use-module (guix profiles) + #:use-module (guix utils) + #:use-module (ice-9 match) + #:export (cwltest-suite-gexp)) + +(define* (cwltest-suite-gexp cwltest-suite manifest-file #:key (skip-tests '())) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (ice-9 match)) + + (match (command-line) + ((_ cwltest-args ...) + ;; Guix peeks into HOME. + (setenv "HOME" (getcwd)) + ;; cwltest writes out output directories to TMPDIR, but does not + ;; clean up after. So, we set TMPDIR to our own temporary directory + ;; that we can manage easily. See pending issue on cleaning up + ;; temporary output directories: + ;; https://github.com/common-workflow-language/cwltest/issues/249 + (mkdir "tmpdir") + (setenv "TMPDIR" "tmpdir") + (apply invoke + #$(file-append cwltest "/bin/cwltest") + "--test" #$cwltest-suite + "--tool" #$(file-append ravanan "/bin/ravanan") + "--badgedir" "badges" + (append '#$(match skip-tests + (() '()) + (_ (list "-S" (string-join skip-tests ",")))) + cwltest-args + (list "--" + "--store=store" + (string-append "--guix-manifest=" #$manifest-file))))))))) + +(define cwl-v1.2-conformance-suite + (let ((version "1.2.1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/common-workflow-language/cwl-v1.2") + (commit (string-append "v" version)))) + (file-name (git-file-name "cwl-v1.2" version)) + (sha256 + (base32 + "03q8pd0niaaff52n6sn07l3rjnvwi4da649lnc8mn928sh0vywf3"))))) + +(define-public cwl-v1.2-conformance + (program-file "cwl-v1.2-conformance" + (cwltest-suite-gexp + (file-append cwl-v1.2-conformance-suite + "/conformance_tests.yaml") + (local-file "../cwl-conformance/manifest.scm") + ;; With these tests, evil things happen and too much memory is + ;; consumed. So, disable for now. + #:skip-tests (list "env_home_tmpdir" + "env_home_tmpdir_docker" + "env_home_tmpdir_docker_no_return_code")))) + +(define generate-badges-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (ice-9 match)) + + (match (command-line) + ((_ cwltest-badgedir output-directory) + (set-path-environment-variable + "GUIX_PYTHONPATH" + '(#$(string-append "lib/python" + (version-major+minor (package-version python)) + "/site-packages")) + (list #$(profile + (content (packages->manifest + (list python python-pybadges)))))) + (invoke #$(file-append python "/bin/python3") + #$(local-file "../cwl-conformance/badgegen.py") + cwltest-badgedir + #$(local-file "../cwl-conformance/commonwl.svg") + output-directory)) + ((program _ ...) + (format (current-error-port) + "Usage: ~a CWLTEST_BADGEDIR OUTPUT-DIRECTORY~%" + program) + (exit #f)))))) + +(define-public generate-badges + (program-file "generate-badges" + generate-badges-gexp)) + +cwl-v1.2-conformance diff --git a/.guix/cwltest-package.scm b/.guix/cwltest-package.scm new file mode 100644 index 0000000..f3eeb9d --- /dev/null +++ b/.guix/cwltest-package.scm @@ -0,0 +1,117 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (cwltest-package) + #:use-module ((gnu packages bioinformatics) + #:select (cwltool python-schema-salad)) + #:use-module ((gnu packages check) + #:select (python-pytest python-pytest-xdist)) + #:use-module ((gnu packages node) #:select (node)) + #:use-module ((gnu packages python-build) + #:select (python-setuptools python-setuptools-scm python-wheel)) + #:use-module ((gnu packages python-check) #:select (python-junit-xml)) + #:use-module ((gnu packages xml) #:select (python-defusedxml)) + #:use-module (guix build-system pyproject) + #:use-module (guix build-system python) + #:use-module (guix download) + #:use-module (guix gexp) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (guix packages)) + +;; cwltest uses cwltool as a library. so, create a library version of cwltool +;; where inputs become propagated inputs. +(define python-cwltool + (package + (inherit cwltool) + (name "python-cwltool") + (inputs + (list node)) + (propagated-inputs + (modify-inputs (package-inputs cwltool) + (delete "node"))))) + +;; cwltest requires cwl-runner, the implementation-agnostic entry point to +;; cwltool, for its tests. +(define cwl-runner + (file-union "cwl-runner" + `(("bin/cwl-runner" ,(file-append cwltool "/bin/cwltool"))))) + +(define-public cwltest + (package + (name "cwltest") + (version "2.6.20250314152537") + (source + (origin + (method url-fetch) + (uri (pypi-uri "cwltest" version)) + (sha256 + (base32 "0h2w9bllb6cz8d5pja5lbbd1kj08yga40jdi3300anwllczcnfq6")))) + (build-system pyproject-build-system) + (arguments + (list #:modules `((rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (srfi srfi-171) + (guix build pyproject-build-system) + (guix build utils)) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'disable-docker-in-tests + (lambda _ + ;; Remove DockerRequirement (lines 7–10). + (let* ((file "tests/test-data/v1.0/cat1-testcli.cwl") + (lines (call-with-input-file file + (cut port-transduce + identity + rcons + get-line + <>)))) + (call-with-output-file file + (lambda (port) + (for-each (lambda (line) + (display line port) + (newline port)) + (append (take lines 6) + (drop lines 10)))))))) + (add-after 'install 'fix-permissions + (lambda* (#:key inputs outputs #:allow-other-keys) + ;; Make test scripts executable. + (for-each (lambda (file) + (chmod (string-append (site-packages inputs outputs) + "/cwltest/tests/test-data/" + file) + #o755)) + (list "dummy-executor.sh" + "mock_cwl_runner.py"))))))) + (inputs (list python-defusedxml + python-junit-xml + python-pytest + python-schema-salad)) + (native-inputs (list cwl-runner + python-cwltool + python-pytest + python-pytest-xdist + python-setuptools + python-setuptools-scm + python-wheel)) + (home-page "https://github.com/common-workflow-language/cwltest") + (synopsis "Common Workflow Language testing framework") + (description "Common Workflow Language testing framework.") + (license license:asl2.0))) + +cwltest diff --git a/.guix/e2e-tests.scm b/.guix/e2e-tests.scm new file mode 100644 index 0000000..442198d --- /dev/null +++ b/.guix/e2e-tests.scm @@ -0,0 +1,100 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (e2e-tests) + #:use-module ((cwl-conformance) #:select (cwltest-suite-gexp)) + #:use-module ((gnu packages bioinformatics) #:select (ccwl) #:prefix guix:) + #:use-module ((gnu packages guile-xyz) #:select (guile-run64)) + #:use-module (guix gexp) + #:use-module (guix git-download) + #:use-module (guix packages) + #:use-module (ice-9 match)) + +(define ccwl + (let ((commit "5f6e0d93bb08446d2c6f99484523697a2bda7b4d") + (revision "1")) + (package + (inherit guix:ccwl) + (name "ccwl") + (version (git-version "0.4.0" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://git.systemreboot.net/ccwl") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "19hwkgj8gsfw24fs682b4wxs5chnwj6hv0rvs23vmq7309mgf242")))) + (native-inputs + (modify-inputs (package-native-inputs guix:ccwl) + (prepend guile-run64)))))) + +(define (e2e-tools-gexp sources-directory doc-hello-world) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (rnrs io ports) + (srfi srfi-26) + (ice-9 match) + (guix build utils)) + + (mkdir #$output) + ;; Compile ccwl sources. + (for-each (lambda (source-file) + (invoke #$(file-append ccwl "/bin/ccwl") + "compile" + (string-append "--output=" + #$output "/" + (basename source-file ".scm") ".cwl") + source-file)) + (find-files #$sources-directory "\\.scm$")) + ;; Copy CWL files. + (for-each (lambda (cwl-file) + (copy-file cwl-file + (string-append #$output "/" (basename cwl-file)))) + (find-files #$sources-directory "\\.cwl$")) + ;; Copy Hello World workflow from documentation. + (copy-file #$doc-hello-world + (string-append #$output "/doc-hello-world.cwl"))))) + +(define e2e-tools + (computed-file "e2e-tools" + (e2e-tools-gexp (local-file "../e2e-tests/tools" + #:recursive? #t) + (local-file "../doc/hello-world.cwl")))) + +(define e2e-jobs + (directory-union "e2e-jobs" + (list (local-file "../e2e-tests/jobs" + #:recursive? #t) + (file-union "doc-hello-world-inputs" + `(("doc-hello-world.yaml" + ,(local-file "../doc/hello-world-inputs.yaml"))))))) + +(define e2e-test-suite + (file-union "e2e-test-suite" + `(("tests.yaml" ,(local-file "../e2e-tests/tests.yaml")) + ("tools" ,e2e-tools) + ("jobs" ,e2e-jobs)))) + +(define-public e2e-tests + (program-file "e2e-tests" + (cwltest-suite-gexp (file-append e2e-test-suite "/tests.yaml") + (local-file "../e2e-tests/manifest.scm")))) + +e2e-tests diff --git a/.guix/ravanan-package.scm b/.guix/ravanan-package.scm index a59ec68..db95372 100644 --- a/.guix/ravanan-package.scm +++ b/.guix/ravanan-package.scm @@ -18,8 +18,11 @@ (define-module (ravanan-package) #:use-module ((gnu packages bioinformatics) #:select (ravanan) #:prefix guix:) + #:use-module ((gnu packages guile-xyz) #:select (guile-run64)) + #:use-module (guix build-system gnu) #:use-module (guix gexp) #:use-module (guix git-download) + #:use-module ((guix licenses) #:prefix license:) #:use-module (guix packages) #:use-module (guix utils)) @@ -29,7 +32,20 @@ (source (local-file ".." "ravanan-checkout" #:recursive? #t - #:select? (or (git-predicate (dirname (current-source-directory))) - (const #t)))))) + #:select? (lambda (file stat) + ;; If .guix is included, changes to other + ;; files under .guix—such as the CWL + ;; conformance tests—unnecessarily trigger a + ;; rebuild of ravanan. This could be a + ;; nuisance when hacking on the CWL + ;; conformance test scripts. + (and (not (string-contains file "/.guix/")) + (not (string-contains file "/e2e-tests/")) + ((or (git-predicate (dirname (current-source-directory))) + (const #t)) + file stat))))) + (native-inputs + (modify-inputs (package-native-inputs guix:ravanan) + (prepend guile-run64))))) ravanan diff --git a/.guix/ravanan-release.scm b/.guix/ravanan-release.scm new file mode 100644 index 0000000..01fa328 --- /dev/null +++ b/.guix/ravanan-release.scm @@ -0,0 +1,68 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (ravanan-release) + #:use-module ((gnu packages base) #:select (gnu-make)) + #:use-module ((gnu packages compression) #:select (lzip)) + #:use-module ((gnu packages version-control) #:select (git-minimal)) + #:use-module (guix gexp)) + +(define ravanan-git-repo + (local-file "../.git" + "ravanan-git-repo" + #:recursive? #t)) + +(define ravanan-release-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (ice-9 match) + (ice-9 popen) + (srfi srfi-26) + (rnrs io ports)) + + (define (call-with-input-pipe command proc) + "Call @var{proc} with input pipe to @var{command}. @var{command} is a list of +program arguments." + (match command + ((prog args ...) + (let ((port #f)) + (dynamic-wind + (lambda () + (set! port (apply open-pipe* OPEN_READ prog args))) + (cut proc port) + (cut close-pipe port)))))) + + (define (git-version) + (call-with-input-pipe (list "git" "tag" "--sort=-taggerdate" "--list" "v*") + (compose (cut substring <> (string-length "v")) + get-line))) + + (set-path-environment-variable + "PATH" '("bin") '(#$git-minimal #$gnu-make #$lzip)) + (invoke "git" "clone" (string-append "file://" #$ravanan-git-repo) (getcwd)) + (let ((version (git-version))) + (invoke "make" "dist" (string-append "version=" version)) + (copy-file (string-append "ravanan-" version ".tar.lz") + #$output))))) + +(define ravanan-release + (computed-file "ravanan.tar.lz" + ravanan-release-gexp)) + +ravanan-release diff --git a/.guix/ravanan-website.scm b/.guix/ravanan-website.scm new file mode 100644 index 0000000..010ac0d --- /dev/null +++ b/.guix/ravanan-website.scm @@ -0,0 +1,104 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (ravanan-website) + #:use-module ((gnu packages fonts) #:select (font-charter font-fira-code)) + #:use-module ((gnu packages haskell-xyz) #:select (pandoc)) + #:use-module (guix gexp) + #:use-module (guix git-download) + #:use-module (guix packages) + #:use-module (guix utils) + #:use-module ((ravanan-package) #:select (ravanan))) + +(define ravanan-website-home-page-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (copy-file #$(file-append (package-source ravanan) + "/README.md") + "README.md") + (substitute* "README.md" + ;; Add Download section. + (("^- \\[Building from source\\]\\(#building-from-source\\)" all) + (string-append "- [Download](#download)\n" + all)) + (("^# Building from source" all) + (string-append "# Download + +Download release tarballs. + +- 2025-11-24 [ravanan-0.2.0.tar.lz](releases/ravanan-0.2.0.tar.lz) [GPG Signature](releases/ravanan-0.2.0.tar.lz.asc) +- 2025-01-28 [ravanan-0.1.0.tar.lz](releases/ravanan-0.1.0.tar.lz) [GPG Signature](releases/ravanan-0.1.0.tar.lz.asc) + +Download [public signing key](https://systemreboot.net/about/arunisaac.pub). + +Browse the development version on [cgit](https://git.systemreboot.net/ravanan) or on [GitHub](https://github.com/arunisaac/ravanan/). +" + all)) + ;; Link to LICENSE web page. + (("\\[license details\\]\\(images/LICENSE.md\\)") + "[license details](images/LICENSE)")) + (invoke #$(file-append pandoc "/bin/pandoc") + "--standalone" + "--metadata" "title=ravanan" + "--metadata" "document-css=false" + "--css=style.css" + "--from=gfm" + (string-append "--output=" #$output) + "README.md")))) + +(define images-license-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (invoke #$(file-append pandoc "/bin/pandoc") + "--standalone" + "--metadata" "title=License" + "--metadata" "document-css=false" + "--css=style.css" + "--from=gfm" + (string-append "--output=" #$output) + #$(file-append (package-source ravanan) + "/images/LICENSE.md"))))) + +(define-public ravanan-website + (file-union "ravanan-website" + `(("index.html" + ,(computed-file "ravanan-website-home-page.html" + ravanan-website-home-page-gexp)) + ("images/ravanan-king-of-lanka.jpg" + ,(local-file "../images/ravanan-king-of-lanka.jpg")) + ("images/LICENSE.html" + ,(computed-file "images-license.html" + images-license-gexp)) + ("style.css" ,(local-file "../website/style.css")) + ("releases" ,(local-file "../releases" + #:recursive? #t)) + ("fonts/charter_regular.woff2" + ,(file-append font-charter + "/share/fonts/web/charter_regular.woff2")) + ("fonts/FiraCode-Regular.woff2" + ,(file-append font-fira-code + "/share/fonts/web/FiraCode-Regular.woff2")) + ("fonts/FiraCode-SemiBold.woff2" + ,(file-append font-fira-code + "/share/fonts/web/FiraCode-SemiBold.woff2"))))) + +ravanan-website diff --git a/HACKING.md b/HACKING.md new file mode 100644 index 0000000..4d8b6fe --- /dev/null +++ b/HACKING.md @@ -0,0 +1,60 @@ +# Set up development environment + +Drop into a development environment using `guix shell`. This shell includes additional packages required for development, and not simply the dependencies required to build ravanan. +``` +guix shell -L .guix -m manifest.scm +``` + +# Run end-to-end tests + +ravanan comes with a suite of end-to-end tests under `e2e-tests`. End-to-end tests require a running Guix daemon. To run them, create and change into a new empty directory. +``` +mkdir rundir +cd rundir +``` +Then, build and run the tests. +``` +$(guix build -L ../.guix -f ../.guix/e2e-tests.scm) +``` +Since ravanan depends on guix, and that guix may be too old, you may need to run this command outside the usual development environment. + +## Run specific end-to-end test + +When hacking on ravanan, you may be trying to get a specific test to pass, and may want to repeatedly run that specific test alone. You can do this by passing additional cwltest arguments. For example, to only run the `hello-world` test: +``` +$(guix build -L ../.guix -f ../.guix/e2e-tests.scm) -s hello-world +``` + +# Run the CWL v1.2 conformance test suite + +The CWL v1.2 conformance test suite is run similar to the end-to-end tests. Create and change into a new empty directory. +``` +mkdir rundir +cd rundir +``` +Then, build and run the tests. +``` +$(guix build -L ../.guix -f ../.guix/cwl-conformance.scm) +``` +To run a specific test alone (say, the `wf_simple` test): +``` +$(guix build -L ../.guix -f ../.guix/cwl-conformance.scm) -s wf_simple +``` +Since ravanan depends on guix, and that guix may be too old, you may need to run these commands outside the usual development environment. + +# Make a release +## Bump version +Bump the `version` variable in `Makefile`. +## Tag a release +Tag a release `vx.x.x` putting news into the tag message. +## Create a release tarball, test it, and sign it +``` +cp $(guix build -f .guix/ravanan-release.scm) ravanan-x.x.x.tar.lz +guix build --with-source=ravanan=ravanan-x.x.x.tar.lz -f .guix/ravanan-package.scm +make distsign +``` +## Publish release tarball +Add release tarball and signature to website and GitHub. +## Update Guix package +## Publicize +Publicize on the ravanan@systemreboot.net and guix-science@gnu.org mailing lists, and on the [CWL Discourse forum](https://cwl.discourse.group/). diff --git a/Makefile b/Makefile index aba1a35..c70af5c 100644 --- a/Makefile +++ b/Makefile @@ -17,15 +17,16 @@ # along with ravanan. If not, see <https://www.gnu.org/licenses/>. project = ravanan -version = 0.1.0 +version = 0.2.0 -GIT ?= git -GPG ?= gpg -GUILD ?= guild -GUILE ?= guile -LZIP ?= lzip -NODE ?= node -SED ?= sed +GIT = git +GPG = gpg +GUILD = guild +GUILE = guile +GUILE_RUN64 = guile-run64 +LZIP = lzip +NODE = node +SED = sed prefix ?= /usr/local bindir ?= $(prefix)/bin @@ -43,8 +44,9 @@ sources = $(filter-out $(config_file), \ objects = $(sources:.scm=.go) $(config_file:.scm=.go) scripts = $(wildcard bin/*) tests = $(wildcard tests/*.scm) $(wildcard tests/work/*.scm) +test_data = $(wildcard test-data/*) distribute_files = $(sources) $(config_file_template) $(scripts) \ - $(tests) pre-inst-env guix.scm \ + $(tests) $(test_data) pre-inst-env guix.scm \ .guix/ravanan-package.scm Makefile \ COPYING README.md @@ -62,9 +64,7 @@ all: $(objects) $(config_file) GUILE_AUTO_COMPILE=0 $(GUILD) compile -L . -o $@ $< check: - for test in $(tests); do \ - $(GUILE) --no-auto-compile -L . $$test; \ - done + ./pre-inst-env $(GUILE_RUN64) $(tests) install: $(sources) $(config_file) $(objects) $(scripts) install -D $(scripts) --target-directory $(bindir) diff --git a/README.md b/README.md index f511d61..b65c8a3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[](https://ci.systemreboot.net/jobs/ravanan) +[](https://ci.systemreboot.net/jobs/ravanan) [](https://ci.systemreboot.net/jobs/ravanan-end-to-end-tests) [](#cwl-v1.2-conformance) ravanan (pronounced rah-vun-un, IPA: rɑːvʌnʌn, Shavian: 𐑮𐑭𐑝𐑳𐑯𐑳𐑯) is a [Common Workflow Language (CWL)](https://www.commonwl.org/) implementation that is powered by [GNU Guix](https://guix.gnu.org/) and provides strong reproducibility guarantees. ravanan provides strong bullet-proof caching ([work reuse](https://www.commonwl.org/v1.2/CommandLineTool.html#WorkReuse)) so you never run the same step of your workflow twice, nor do you have to keep track of which steps were run and with what parameters: ravanan remembers everything for you. ravanan captures logs from every step of your workflow so you can always trace back in case of job failures. @@ -19,6 +19,7 @@ ravanan currently runs on single machines and on slurm via its [API](https://slu - [On HPC using slurm](#on-hpc-using-slurm) - [Using a specific version of Guix](#using-a-specific-version-of-guix) - [Referencing local scripts](#referencing-local-scripts) +- [CWL conformance](#cwl-conformance) - [License](#license) - [The Name](#the-name) @@ -53,11 +54,16 @@ make check ravanan requires a running Guix daemon. ravanan maintains its own *store* (this is distinct from Guix's store) to cache job outputs. -You can run a workflow `hello-world.cwl` with inputs in `hello-world-inputs.json` using +You can run a workflow `hello-world.cwl` with inputs in `hello-world-inputs.yaml` using ``` -ravanan hello-world.cwl hello-world-inputs.json --store=store --guix-manifest=manifest.scm +ravanan --store=store --guix-manifest=manifest.scm hello-world.cwl hello-world-inputs.yaml +``` +`--store` specifies a directory to use as ravanan's store. `--guix-manifest` specifies a Guix manifest file that lists the dependencies required for the workflow. + +An example `hello-world.cwl` and `hello-world-inputs.json` are provided in the `doc` directory of this repository. You can run it like so: +``` +ravanan --store=store --guix-manifest=doc/hello-world-manifest.scm doc/hello-world.cwl doc/hello-world-inputs.yaml ``` -`--store` specifies a directory to use as ravanan's store. `--guix-manifest` specifies a Guix manifest file that lists the dependencies required for the workflow. An example `hello-world.cwl` and `hello-world-inputs.json` are provided in the `doc` directory of this repository. ## On HPC using slurm @@ -70,7 +76,7 @@ Jobs run by ravanan do not directly write to the store. Instead, they operate on Putting it all together, a typical ravanan invocation on a slurm HPC might look like ``` -ravanan hello-world.cwl hello-world-inputs.json --store=store --guix-manifest=manifest.scm --batch-system=slurm-api --scratch=/scratch --slurm-jwt=jwt +ravanan --store=store --guix-manifest=manifest.scm --batch-system=slurm-api --scratch=/scratch --slurm-jwt=jwt hello-world.cwl hello-world-inputs.json ``` ## Using a specific version of Guix @@ -151,6 +157,16 @@ Thanks to Guix, a better way is possible. Guix, in its magnificent elegance, doe Please send questions, feedback, bug reports and patches to [ravanan@systemreboot.net](mailto:ravanan@systemreboot.net). You may also browse the [archives](https://lists.systemreboot.net/ravanan) of previous conversations. We do not accept issues or pull requests on GitHub. Thank you! +# CWL conformance + +CWL conformance is a work in progress. We're getting there! + +## [CWL v1.2 conformance](https://ci.systemreboot.net/jobs/ravanan-cwl-v1.2-conformance) + + + +                           + # License ravanan is free software released under the terms of the [GNU General Public License](https://www.gnu.org/licenses/gpl.html), either version 3 of the License, or (at your option) any later version. diff --git a/bin/ravanan b/bin/ravanan index 6ad5167..2cb05e2 100755 --- a/bin/ravanan +++ b/bin/ravanan @@ -24,6 +24,7 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (rnrs io ports) (srfi srfi-26) (srfi srfi-37) + (srfi srfi-71) (ice-9 filesystem) (ice-9 match) (web uri) @@ -36,7 +37,9 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (ravanan utils) (ravanan verbosity) (ravanan workflow) - (ravanan work utils)) + (ravanan work ui) + (ravanan work utils) + (ravanan work vectors)) (define %options (list (option (list "batch-system" "batchSystem") #t #f @@ -44,7 +47,7 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (if (member arg (list "single-machine" "slurm-api")) (acons 'batch-system (string->symbol arg) result) - (error "Unknown batch system" arg)))) + (user-error "Unknown batch system ~a" arg)))) (option (list "guix-channels") #t #f (lambda (opt name arg result) (acons 'guix-channels-file arg result))) @@ -79,6 +82,14 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (lambda (opt name arg result) (acons 'outdir arg result))) + (option (list "log-level") #t #f + (lambda (opt name arg result) + (let ((accepted-values (list "debug" "info" "warning" + "error" "critical"))) + (if (member (string-downcase arg) accepted-values) + (acons 'log-level (string->symbol (string-downcase arg)) + result) + (user-error "Unknown log level ~a" arg))))) (option (list "trace") #t #f (lambda (opt name arg result) (let ((accepted-values (list "slurm-api"))) @@ -87,7 +98,11 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (cons 'traces (cons (string->symbol arg) (assq-ref result 'traces)))) - (error "Unknown trace subsystem" arg))))) + (user-error "Unknown trace subsystem ~a" arg))))) + (option (list "quiet") #f #f + (lambda (opt name arg result) + (acons 'quiet arg + result))) (option (list "help") #f #t (lambda (opt name arg result) (acons 'help #t result))) @@ -96,7 +111,13 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (acons 'version #t result))))) (define (invalid-option opt name arg result) - (error "Invalid option" name)) + (user-error "Invalid option ~a" name)) + +(define (print-short-usage program) + (format (current-error-port) + "Usage: ~a [OPTIONS] CWL-WORKFLOW INPUTS +Run CWL-WORKFLOW with INPUTS.~%" + program)) (define (print-usage program) (format (current-error-port) @@ -129,9 +150,14 @@ Slurm API batch system options: Debugging options: + --log-level=LEVEL log messages with higher severity than LEVEL + (accepted values: debug, info, warning, + error, critical) --trace=SUBSYSTEM enable tracing on subsystem; - repeat to trace multiple subsystems + repeat to trace more than one subsystem (accepted values: slurm-api) + --quiet disable all logging and tracing; + overrides --log-level and --trace " program)) @@ -200,53 +226,61 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (slurm-api-endpoint . ,(build-uri 'http #:host "localhost" #:port 6820)) + (log-level . info) (traces . ()))))) - (when (assq-ref args 'help) - (print-usage program) - (exit #t)) - (when (assq-ref args 'version) - (format (current-output-port) - "~a ~a~%" - %project %version) - (exit #t)) - ;; Check for required arguments. - (unless (assq-ref args 'store) - (error "--store not specified")) - (case (assq-ref args 'batch-system) - ((slurm-api) - (unless (assq-ref args 'scratch) - (error "--scratch not specified")) - (unless (assq-ref args 'slurm-jwt) - (error "--slurm-jwt not specified")))) - (match (reverse (assq-ref args 'args)) - ((workflow-file inputs-file) - ;; We must not try to compile guix manifest files. - (set! %load-should-auto-compile #f) - (make-store (assq-ref args 'store)) - (let* ( ;; FIXME: This is a bit of a hack to avoid canonizing remote - ;; paths. - (store (if (file-name-absolute? (assq-ref args 'store)) - (assq-ref args 'store) - (canonicalize-path (assq-ref args 'store)))) - (outputs (guard (c ((manifest-file-error? c) - ;; Steps may provide their own - ;; SoftwareRequirement. So, at this point, we do - ;; not know if a manifest file is required and - ;; can't check for these manifest file errors - ;; right away. Instead, we depend on exceptions - ;; bubbled up from lower down the stack. - (let ((file (manifest-file-error-file c))) - (cond - ((not file) - (error "--guix-manifest not specified")) - ((not (file-exists? file)) - (error "Manifest file ~a does not exist" - file)) - (else - (error "Error loading manifest file" - file) - (raise-exception c)))))) - (parameterize ((%traces (assq-ref args 'traces))) + (parameterize ((%log-level (and (not (assq-ref args 'quiet)) + (assq-ref args 'log-level))) + (%traces (if (assq-ref args 'quiet) + '() + (assq-ref args 'traces)))) + (when (assq-ref args 'help) + (print-usage program) + (exit #t)) + (when (assq-ref args 'version) + (format (current-output-port) + "~a ~a~%" + %project %version) + (exit #t)) + ;; Check for required arguments. + (unless (assq-ref args 'store) + (user-error "--store not specified")) + (case (assq-ref args 'batch-system) + ((slurm-api) + (unless (assq-ref args 'scratch) + (user-error "--scratch not specified")) + (unless (assq-ref args 'slurm-jwt) + (user-error "--slurm-jwt not specified")))) + (match (reverse (assq-ref args 'args)) + ((workflow-file inputs-file) + ;; We must not try to compile guix manifest files. + (set! %load-should-auto-compile #f) + (make-store (assq-ref args 'store)) + (let* ( ;; FIXME: This is a bit of a hack to avoid canonizing remote + ;; paths. + (store (if (file-name-absolute? (assq-ref args 'store)) + (assq-ref args 'store) + (canonicalize-path (assq-ref args 'store)))) + (workflow inputs (read-workflow+inputs workflow-file + inputs-file)) + (outputs (guard (c ((manifest-file-error? c) + ;; Steps may provide their own + ;; SoftwareRequirement. So, at this + ;; point, we do not know if a manifest + ;; file is required and can't check for + ;; these manifest file errors right away. + ;; Instead, we depend on exceptions + ;; bubbled up from lower down the stack. + (let ((file (manifest-file-error-file c))) + (cond + ((not file) + (user-error "--guix-manifest not specified")) + ((not (file-exists? file)) + (user-error "Manifest file ~a does not exist" + file)) + (else + (user-error "Error loading manifest file ~a" + file) + (raise-exception c)))))) (run-workflow (file-name-stem workflow-file) (and (assq 'guix-manifest-file args) (canonicalize-path @@ -257,8 +291,8 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (assq-ref args 'guix-channels-file)) #:modules '((guile) (guix channels)))) - (read-workflow workflow-file) - (read-inputs inputs-file) + workflow + inputs (case (assq-ref args 'batch-system) ((single-machine) (or (assq-ref args 'scratch) @@ -275,11 +309,14 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (read-jwt (assq-ref args 'slurm-jwt))) (assq-ref args 'slurm-partition) (assq-ref args 'slurm-nice)))) - #:guix-daemon-socket (assq-ref args 'guix-daemon-socket)))))) - (scm->json (if (assq-ref args 'outdir) - (symlink-to-output-directory store - (assq-ref args 'outdir) - outputs) - outputs) - #:pretty #t)) - (newline))))))) + #:guix-daemon-socket (assq-ref args 'guix-daemon-socket))))) + (scm->json (if (assq-ref args 'outdir) + (symlink-to-output-directory store + (assq-ref args 'outdir) + outputs) + outputs) + #:pretty #t)) + (newline)) + (_ + (print-short-usage program) + (exit #f)))))))) diff --git a/cwl-conformance/LICENSE b/cwl-conformance/LICENSE new file mode 100644 index 0000000..b1e57b6 --- /dev/null +++ b/cwl-conformance/LICENSE @@ -0,0 +1,2 @@ +commonwl.svg is under the ISC license and is sourced from +https://github.com/badgen/badgen-icons/blob/master/icons/commonwl.svg \ No newline at end of file diff --git a/cwl-conformance/badgegen.py b/cwl-conformance/badgegen.py new file mode 100644 index 0000000..1e8f5f0 --- /dev/null +++ b/cwl-conformance/badgegen.py @@ -0,0 +1,37 @@ +### ravanan --- High-reproducibility CWL runner powered by Guix +### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +### +### This file is part of ravanan. +### +### ravanan is free software: you can redistribute it and/or modify it +### under the terms of the GNU General Public License as published by +### the Free Software Foundation, either version 3 of the License, or +### (at your option) any later version. +### +### ravanan is distributed in the hope that it will be useful, but +### WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +### General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +import json +from pathlib import Path +from pybadges import badge +import sys + +match sys.argv: + case [_, cwltest_badgedir, cwl_icon_file, output_directory]: + for summary_file in Path(cwltest_badgedir).glob("*.json"): + with open(summary_file) as file: + summary = json.load(file) + with (Path(output_directory) / (summary_file.stem + ".svg")).open("w") as file: + file.write(badge(left_text=summary["subject"], + right_text=summary["status"], + right_color=summary["color"], + logo=cwl_icon_file, + embed_logo=True)) + case [program, *_]: + print(f"Usage: {program} CWLTEST_BADGEDIR CWL_ICON_FILE OUTPUT_DIRECTORY") + sys.exit(1) diff --git a/cwl-conformance/commonwl.svg b/cwl-conformance/commonwl.svg new file mode 100644 index 0000000..0eeb627 --- /dev/null +++ b/cwl-conformance/commonwl.svg @@ -0,0 +1 @@ +<svg fill="#fff" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12.063 9.205l-3.565 3.568 3.518 3.512 1.545-1.545-1.975-1.967 2.023-2.023-1.546-1.545zm1.543 13.218L11.6 20.375l3.786-3.815-1.559-1.573-5.33 5.403.115.114-.015.008 3.456 3.484zm1.896-13.565l-3.685-3.541 3.685-3.722L13.936 0 8.598 5.352l.108.101v.007l5.252 4.943z"/></svg> diff --git a/cwl-conformance/manifest.scm b/cwl-conformance/manifest.scm new file mode 100644 index 0000000..9cf33d5 --- /dev/null +++ b/cwl-conformance/manifest.scm @@ -0,0 +1,2 @@ +(specifications->manifest + (list "coreutils" "python-wrapper")) diff --git a/doc/hello-world-inputs.json b/doc/hello-world-inputs.json deleted file mode 100644 index eb32ec3..0000000 --- a/doc/hello-world-inputs.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "name": "Foo" -} diff --git a/doc/hello-world-inputs.yaml b/doc/hello-world-inputs.yaml new file mode 100644 index 0000000..a604a97 --- /dev/null +++ b/doc/hello-world-inputs.yaml @@ -0,0 +1 @@ +name: Foo diff --git a/e2e-tests/jobs/boolean-false-input-value.yaml b/e2e-tests/jobs/boolean-false-input-value.yaml new file mode 100644 index 0000000..0729f40 --- /dev/null +++ b/e2e-tests/jobs/boolean-false-input-value.yaml @@ -0,0 +1,2 @@ +message: Hello world! +no-newline?: false diff --git a/e2e-tests/jobs/capture-output-file-with-parameter-reference.yaml b/e2e-tests/jobs/capture-output-file-with-parameter-reference.yaml new file mode 100644 index 0000000..892dbe7 --- /dev/null +++ b/e2e-tests/jobs/capture-output-file-with-parameter-reference.yaml @@ -0,0 +1,4 @@ +archive: + class: File + location: hello.tar +extractfile: hello.txt diff --git a/e2e-tests/jobs/capture-output-file.yaml b/e2e-tests/jobs/capture-output-file.yaml new file mode 100644 index 0000000..faf24d2 --- /dev/null +++ b/e2e-tests/jobs/capture-output-file.yaml @@ -0,0 +1,3 @@ +archive: + class: File + location: hello.tar diff --git a/e2e-tests/jobs/checksum.yaml b/e2e-tests/jobs/checksum.yaml new file mode 100644 index 0000000..dce679c --- /dev/null +++ b/e2e-tests/jobs/checksum.yaml @@ -0,0 +1,3 @@ +file: + class: File + location: hello.tar diff --git a/e2e-tests/jobs/decompress-compile-run.yaml b/e2e-tests/jobs/decompress-compile-run.yaml new file mode 100644 index 0000000..e2a0b74 --- /dev/null +++ b/e2e-tests/jobs/decompress-compile-run.yaml @@ -0,0 +1,3 @@ +compressed_source: + class: File + location: hello.c.gz diff --git a/e2e-tests/jobs/empty.yaml b/e2e-tests/jobs/empty.yaml new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/e2e-tests/jobs/empty.yaml @@ -0,0 +1 @@ +{} diff --git a/e2e-tests/jobs/hello-world.yaml b/e2e-tests/jobs/hello-world.yaml new file mode 100644 index 0000000..5329e73 --- /dev/null +++ b/e2e-tests/jobs/hello-world.yaml @@ -0,0 +1 @@ +message: Hello world! diff --git a/e2e-tests/jobs/hello.c b/e2e-tests/jobs/hello.c new file mode 100644 index 0000000..07f4fc2 --- /dev/null +++ b/e2e-tests/jobs/hello.c @@ -0,0 +1,7 @@ +#include <stdio.h> + +int main () +{ + printf("Hello World!\n"); + return 0; +} diff --git a/e2e-tests/jobs/hello.c.gz b/e2e-tests/jobs/hello.c.gz new file mode 100644 index 0000000..81c4d84 --- /dev/null +++ b/e2e-tests/jobs/hello.c.gz Binary files differdiff --git a/e2e-tests/jobs/hello.tar b/e2e-tests/jobs/hello.tar new file mode 100644 index 0000000..edcc4a8 --- /dev/null +++ b/e2e-tests/jobs/hello.tar Binary files differdiff --git a/e2e-tests/jobs/inline-javascript-requirement.yaml b/e2e-tests/jobs/inline-javascript-requirement.yaml new file mode 100644 index 0000000..73b1beb --- /dev/null +++ b/e2e-tests/jobs/inline-javascript-requirement.yaml @@ -0,0 +1 @@ +number: 13 diff --git a/e2e-tests/jobs/int-for-float-input.yaml b/e2e-tests/jobs/int-for-float-input.yaml new file mode 100644 index 0000000..0d12299 --- /dev/null +++ b/e2e-tests/jobs/int-for-float-input.yaml @@ -0,0 +1,2 @@ +message1: 1 +message2: 1.2 diff --git a/e2e-tests/jobs/prefix-arguments.yaml b/e2e-tests/jobs/prefix-arguments.yaml new file mode 100644 index 0000000..6e888d9 --- /dev/null +++ b/e2e-tests/jobs/prefix-arguments.yaml @@ -0,0 +1,2 @@ +separator: "," +last: 10 diff --git a/e2e-tests/jobs/scatter.yaml b/e2e-tests/jobs/scatter.yaml new file mode 100644 index 0000000..702d6f3 --- /dev/null +++ b/e2e-tests/jobs/scatter.yaml @@ -0,0 +1,5 @@ +message: foo +other_messages: + - bar + - foobar + - barbar diff --git a/e2e-tests/jobs/spell-check-dictionary b/e2e-tests/jobs/spell-check-dictionary new file mode 100644 index 0000000..71df044 --- /dev/null +++ b/e2e-tests/jobs/spell-check-dictionary @@ -0,0 +1,8 @@ +brown +dog +fox +jumps +lazy +over +quick +the \ No newline at end of file diff --git a/e2e-tests/jobs/spell-check-text.txt b/e2e-tests/jobs/spell-check-text.txt new file mode 100644 index 0000000..a116b9a --- /dev/null +++ b/e2e-tests/jobs/spell-check-text.txt @@ -0,0 +1 @@ +The quick brown fox jumps over the laazy dog. diff --git a/e2e-tests/jobs/spell-check.yaml b/e2e-tests/jobs/spell-check.yaml new file mode 100644 index 0000000..b01a8e1 --- /dev/null +++ b/e2e-tests/jobs/spell-check.yaml @@ -0,0 +1,6 @@ +text-file: + class: File + location: spell-check-text.txt +dictionary: + class: File + location: spell-check-dictionary diff --git a/e2e-tests/jobs/staging-input-files.yaml b/e2e-tests/jobs/staging-input-files.yaml new file mode 100644 index 0000000..94b5224 --- /dev/null +++ b/e2e-tests/jobs/staging-input-files.yaml @@ -0,0 +1,3 @@ +file: + class: File + location: hello.c diff --git a/e2e-tests/manifest.scm b/e2e-tests/manifest.scm new file mode 100644 index 0000000..dfa4780 --- /dev/null +++ b/e2e-tests/manifest.scm @@ -0,0 +1,2 @@ +(specifications->manifest + (list "coreutils" "gcc-toolchain" "gzip" "tar")) diff --git a/e2e-tests/tests.yaml b/e2e-tests/tests.yaml new file mode 100644 index 0000000..d14b93a --- /dev/null +++ b/e2e-tests/tests.yaml @@ -0,0 +1,243 @@ +- id: hello-world + doc: Hello world! + tool: tools/hello-world.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: doc-hello-world + doc: Hello world workflow from the documentation + tool: tools/doc-hello-world.cwl + job: jobs/doc-hello-world.yaml + output: + message: + class: File + size: 10 + checksum: sha1$d4322d1d18549a84abb5e19558784d819785a884 +- id: capture-output-file + doc: Capture output file using a glob + tool: tools/capture-output-file.cwl + job: jobs/capture-output-file.yaml + output: + extracted_file: + class: File + basename: hello.txt + nameroot: hello + nameext: .txt + size: 13 + checksum: sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b +- id: capture-output-file-with-parameter-reference + doc: Capture output file using a glob and a parameter reference + tool: tools/capture-output-file-with-parameter-reference.cwl + job: jobs/capture-output-file-with-parameter-reference.yaml + output: + extracted_file: + class: File + basename: hello.txt + nameroot: hello + nameext: .txt + size: 13 + checksum: sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b +- id: capture-stdout + doc: Capture standard output as a file + tool: tools/capture-stdout.cwl + job: jobs/hello-world.yaml + output: + printed_message: + class: File + basename: printed-message-output.txt + nameroot: printed-message-output + nameext: .txt + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: checksum + doc: Compute several hashes + tool: tools/checksum.cwl + job: jobs/checksum.yaml + output: + md5: + class: File + basename: md5 + nameroot: md5 + nameext: "" + size: 44 + checksum: sha1$9fa7df2256ce2e726bd9e89485d596b9985971a7 + sha1: + class: File + basename: sha1 + nameroot: sha1 + nameext: "" + size: 52 + checksum: sha1$5beabf5a3d41d2c4a494d4342419894d9d7800e0 + sha256: + class: File + basename: sha256 + nameroot: sha256 + nameext: "" + size: 76 + checksum: sha1$1d4d44c8563d8e5279dd8d0eb1e3e599c7c3ef3f +- id: decompress-compile-run + doc: Decompress a compressed C source file, compile and run it + tool: tools/decompress-compile-run.cwl + job: jobs/decompress-compile-run.yaml + output: + stdout: + class: File + basename: run-output.txt + nameroot: run-output + nameext: .txt + size: 13 + checksum: sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b +- id: inline-javascript-requirement + doc: Run javascript for InlineJavascriptRequirement + tool: tools/inline-javascript-requirement.cwl + job: jobs/inline-javascript-requirement.yaml + output: + sum: + class: File + size: 3 + checksum: sha1$030514d80869744a4e2f60d2fd37d6081f5ed01a +- id: pass-stdin + doc: Pass standard input into command + tool: tools/pass-stdin.cwl + job: jobs/checksum.yaml + output: + bytes: + class: File + size: 6 + checksum: sha1$0929a71b2f03c579afed345f79635816595c2041 +- id: prefix-arguments + doc: Command with prefix arguments + tool: tools/prefix-arguments.cwl + job: jobs/prefix-arguments.yaml + output: + sequence: + class: File + size: 21 + checksum: sha1$474f936f3e4a4db3f01ee46f9fa8fc928a07c2ab +- id: scatter + doc: Workflow with a scattering step + tool: tools/scatter.cwl + job: jobs/scatter.yaml + output: + printed_output: + - class: File + size: 8 + checksum: sha1$d53a205a336e07cf9eac45471b3870f9489288ec + - class: File + size: 11 + checksum: sha1$f784299d4fcaf81aba0b0647264a959fdfe8180d + - class: File + size: 11 + checksum: sha1$b7ee58e14913c07256e9255e69310ddcde4bdb51 +- id: spell-check + doc: Spell-check workflow using Unix utilities + tool: tools/spell-check.cwl + job: jobs/spell-check.yaml + output: + misspellings: + class: File + basename: misspelt-words + size: 6 + checksum: sha1$e701a33e7681ea185a709168c44e13217497d220 +- id: staging-input-files + doc: Stage input files to working directory + tool: tools/staging-input-files.cwl + job: jobs/staging-input-files.yaml + output: + output_file: + class: File + size: 76 + checksum: sha1$a43e1394643339f6a203147ccbf82cc3be1777e0 +- id: command-line-tool-with-default-input + doc: CommandLineTool with default input + tool: tools/command-line-tool-with-default-input.cwl + job: jobs/empty.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: command-line-tool-step-with-default-input + doc: CommandLineTool step with default input + tool: tools/command-line-tool-step-with-default-input.cwl + job: jobs/empty.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: command-line-tool-step-with-optional-and-required-inputs + doc: CommandLineTool step with optional and required inputs + tool: tools/command-line-tool-step-with-optional-and-required-inputs.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 26 + checksum: sha1$2544f363c82bf304dc363829620ecf213e1a2c1e +- id: workflow-with-default-input + doc: Workflow with default input + tool: tools/workflow-with-default-input.cwl + job: jobs/empty.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: workflow-step-with-default-input + doc: Workflow step with default input + tool: tools/workflow-step-with-default-input.cwl + job: jobs/empty.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: workflow-step-with-optional-and-required-inputs + doc: Workflow step with optional and required inputs + tool: tools/workflow-step-with-optional-and-required-inputs.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 26 + checksum: sha1$2544f363c82bf304dc363829620ecf213e1a2c1e +- id: boolean-false-default-value + doc: Workflow with a boolean false default value + tool: tools/boolean-false-default-value.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: boolean-false-input-value + doc: Boolean false input value + tool: tools/boolean-false-input-value.cwl + job: jobs/boolean-false-input-value.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b +- id: int-for-float-input + doc: int supplied for float input must be coerced into a float + tool: tools/int-for-float-input.cwl + job: jobs/int-for-float-input.yaml + output: + output_message: + class: File + size: 6 + checksum: sha1$d493abaf2d0ffe0bec8d302fdee38a43e80d2a91 +- id: command-line-tool-without-arguments + doc: CommandLineTool without the arguments field + tool: tools/command-line-tool-without-arguments.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b diff --git a/e2e-tests/tools/boolean-false-default-value.scm b/e2e-tests/tools/boolean-false-default-value.scm new file mode 100644 index 0000000..58b460a --- /dev/null +++ b/e2e-tests/tools/boolean-false-default-value.scm @@ -0,0 +1,3 @@ +(command #:inputs (message #:type string) (no-newline? #:type boolean #:default #f) + #:run "echo" ("-n" no-newline?) message + #:outputs (output_message #:type stdout)) diff --git a/e2e-tests/tools/boolean-false-input-value.scm b/e2e-tests/tools/boolean-false-input-value.scm new file mode 100644 index 0000000..08db60d --- /dev/null +++ b/e2e-tests/tools/boolean-false-input-value.scm @@ -0,0 +1,3 @@ +(command #:inputs (message #:type string) (no-newline? #:type boolean) + #:run "echo" ("-n" no-newline?) message + #:outputs (output_message #:type stdout)) diff --git a/e2e-tests/tools/capture-output-file-with-parameter-reference.scm b/e2e-tests/tools/capture-output-file-with-parameter-reference.scm new file mode 100644 index 0000000..1f0d619 --- /dev/null +++ b/e2e-tests/tools/capture-output-file-with-parameter-reference.scm @@ -0,0 +1,9 @@ +(define extract-specific-file + (command #:inputs (archive #:type File) (extractfile #:type string) + #:run "tar" "--no-same-owner" "--extract" "--file" archive extractfile + #:outputs (extracted_file + #:type File + #:binding ((glob . "$(inputs.extractfile)"))))) + +(workflow ((archive #:type File) (extractfile #:type string)) + (extract-specific-file #:archive archive #:extractfile extractfile)) diff --git a/e2e-tests/tools/capture-output-file.scm b/e2e-tests/tools/capture-output-file.scm new file mode 100644 index 0000000..aa1dca0 --- /dev/null +++ b/e2e-tests/tools/capture-output-file.scm @@ -0,0 +1,9 @@ +(define extract + (command #:inputs (archive #:type File) + #:run "tar" "--no-same-owner" "--extract" "--file" archive + #:outputs (extracted_file + #:type File + #:binding ((glob . "hello.txt"))))) + +(workflow ((archive #:type File)) + (extract #:archive archive)) diff --git a/e2e-tests/tools/capture-stdout.scm b/e2e-tests/tools/capture-stdout.scm new file mode 100644 index 0000000..0901f4d --- /dev/null +++ b/e2e-tests/tools/capture-stdout.scm @@ -0,0 +1,8 @@ +(define print + (command #:inputs (message #:type string) + #:run "echo" message + #:outputs (printed_message #:type stdout) + #:stdout "printed-message-output.txt")) + +(workflow ((message #:type string)) + (print #:message message)) diff --git a/e2e-tests/tools/checksum.scm b/e2e-tests/tools/checksum.scm new file mode 100644 index 0000000..27dec78 --- /dev/null +++ b/e2e-tests/tools/checksum.scm @@ -0,0 +1,22 @@ +(define md5sum + (command #:inputs (file #:type File #:stage? #t) + #:run "md5sum" "$(inputs.file.basename)" + #:outputs (md5 #:type stdout) + #:stdout "md5")) + +(define sha1sum + (command #:inputs (file #:type File #:stage? #t) + #:run "sha1sum" "$(inputs.file.basename)" + #:outputs (sha1 #:type stdout) + #:stdout "sha1")) + +(define sha256sum + (command #:inputs (file #:type File #:stage? #t) + #:run "sha256sum" "$(inputs.file.basename)" + #:outputs (sha256 #:type stdout) + #:stdout "sha256")) + +(workflow ((file #:type File)) + (tee (md5sum #:file file) + (sha1sum #:file file) + (sha256sum #:file file))) diff --git a/e2e-tests/tools/command-line-tool-step-with-default-input.scm b/e2e-tests/tools/command-line-tool-step-with-default-input.scm new file mode 100644 index 0000000..cb9ab44 --- /dev/null +++ b/e2e-tests/tools/command-line-tool-step-with-default-input.scm @@ -0,0 +1,8 @@ +(define echo + (command #:inputs (message #:type string + #:default "Hello world!") + #:run "echo" message + #:outputs (output_message #:type stdout))) + +(workflow () + (echo)) diff --git a/e2e-tests/tools/command-line-tool-step-with-optional-and-required-inputs.scm b/e2e-tests/tools/command-line-tool-step-with-optional-and-required-inputs.scm new file mode 100644 index 0000000..e6d0478 --- /dev/null +++ b/e2e-tests/tools/command-line-tool-step-with-optional-and-required-inputs.scm @@ -0,0 +1,8 @@ +(define echo + (command #:inputs (message1 #:type string) (message2 #:type string + #:default "Hello world!") + #:run "echo" message1 message2 + #:outputs (output_message #:type stdout))) + +(workflow ((message #:type string)) + (echo #:message1 message)) diff --git a/e2e-tests/tools/command-line-tool-with-default-input.scm b/e2e-tests/tools/command-line-tool-with-default-input.scm new file mode 100644 index 0000000..82801a4 --- /dev/null +++ b/e2e-tests/tools/command-line-tool-with-default-input.scm @@ -0,0 +1,4 @@ +(command #:inputs (message #:type string + #:default "Hello world!") + #:run "echo" message + #:outputs (output_message #:type stdout)) diff --git a/e2e-tests/tools/command-line-tool-without-arguments.cwl b/e2e-tests/tools/command-line-tool-without-arguments.cwl new file mode 100644 index 0000000..f0440b0 --- /dev/null +++ b/e2e-tests/tools/command-line-tool-without-arguments.cwl @@ -0,0 +1,12 @@ +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: + - echo +inputs: + message: + type: string + inputBinding: + position: 1 +outputs: + output_message: + type: stdout diff --git a/e2e-tests/tools/decompress-compile-run.scm b/e2e-tests/tools/decompress-compile-run.scm new file mode 100644 index 0000000..884f6a4 --- /dev/null +++ b/e2e-tests/tools/decompress-compile-run.scm @@ -0,0 +1,22 @@ +(define decompress + (command #:inputs (compressed #:type File) + #:run "gzip" "--stdout" "--decompress" compressed + #:outputs (decompressed #:type stdout))) + +(define compile + (command #:inputs (source #:type File) + #:run "gcc" "-x" "c" source + #:outputs (executable + #:type File + #:binding ((glob . "a.out"))))) + +(define run + (command #:inputs executable + #:run executable + #:outputs (stdout #:type stdout) + #:stdout "run-output.txt")) + +(workflow ((compressed_source #:type File)) + (pipe (decompress #:compressed compressed_source) + (compile #:source decompressed) + (run #:executable executable))) diff --git a/e2e-tests/tools/hello-world.scm b/e2e-tests/tools/hello-world.scm new file mode 100644 index 0000000..a037a36 --- /dev/null +++ b/e2e-tests/tools/hello-world.scm @@ -0,0 +1,3 @@ +(command #:inputs (message #:type string) + #:run "echo" message + #:outputs (output_message #:type stdout)) diff --git a/e2e-tests/tools/inline-javascript-requirement.scm b/e2e-tests/tools/inline-javascript-requirement.scm new file mode 100644 index 0000000..cd6a4e5 --- /dev/null +++ b/e2e-tests/tools/inline-javascript-requirement.scm @@ -0,0 +1,4 @@ +(command #:inputs (number #:type int) + #:run "echo" "$(1 + inputs.number)" + #:outputs (sum #:type stdout) + #:requirements ((InlineJavascriptRequirement))) diff --git a/e2e-tests/tools/int-for-float-input.scm b/e2e-tests/tools/int-for-float-input.scm new file mode 100644 index 0000000..a2f351e --- /dev/null +++ b/e2e-tests/tools/int-for-float-input.scm @@ -0,0 +1,3 @@ +(command #:inputs (message1 #:type float) (message2 #:type float) + #:run "echo" message1 message2 + #:outputs (output_message #:type stdout)) diff --git a/e2e-tests/tools/pass-stdin.scm b/e2e-tests/tools/pass-stdin.scm new file mode 100644 index 0000000..4ba251c --- /dev/null +++ b/e2e-tests/tools/pass-stdin.scm @@ -0,0 +1,8 @@ +(define count-bytes + (command #:inputs (file #:type File) + #:run "wc" "-c" + #:outputs (bytes #:type stdout) + #:stdin file)) + +(workflow ((file #:type File)) + (count-bytes #:file file)) diff --git a/e2e-tests/tools/prefix-arguments.scm b/e2e-tests/tools/prefix-arguments.scm new file mode 100644 index 0000000..d680ace --- /dev/null +++ b/e2e-tests/tools/prefix-arguments.scm @@ -0,0 +1,3 @@ +(command #:inputs (last #:type int) (separator #:type string) + #:run "seq" ("-s" separator) last + #:outputs (sequence #:type stdout)) diff --git a/e2e-tests/tools/scatter.scm b/e2e-tests/tools/scatter.scm new file mode 100644 index 0000000..805f4df --- /dev/null +++ b/e2e-tests/tools/scatter.scm @@ -0,0 +1,8 @@ +(define print + (command #:inputs (message #:type string) (other-message #:type string) + #:run "echo" message other-message + #:outputs (printed_output #:type stdout))) + +(workflow ((message #:type string) (other_messages #:type (array string))) + (scatter (print #:message message) + #:other-message other_messages)) diff --git a/e2e-tests/tools/spell-check.scm b/e2e-tests/tools/spell-check.scm new file mode 100644 index 0000000..d5ccebb --- /dev/null +++ b/e2e-tests/tools/spell-check.scm @@ -0,0 +1,33 @@ +(define split-words + (command #:inputs text + #:run "tr" "--complement" "--squeeze-repeats" "A-Za-z" "\\n" + #:stdin text + #:outputs (words #:type stdout))) + +(define downcase + (command #:inputs words + #:run "tr" "A-Z" "a-z" + #:stdin words + #:outputs (downcased-words #:type stdout))) + +(define sort + (command #:inputs words + #:run "sort" "--unique" + #:stdin words + #:outputs (sorted #:type stdout))) + +(define find-misspellings + (command #:inputs words dictionary + #:run "comm" "-23" words dictionary + #:outputs (misspellings #:type stdout) + #:stdout "misspelt-words")) + +(workflow (text-file dictionary) + (pipe (tee (pipe (split-words #:text text-file) + (downcase #:words words) + (sort (sort-words) #:words downcased-words) + (rename #:sorted-words sorted)) + (pipe (sort (sort-dictionary) #:words dictionary) + (rename #:sorted-dictionary sorted))) + (find-misspellings #:words sorted-words + #:dictionary sorted-dictionary))) diff --git a/e2e-tests/tools/staging-input-files.scm b/e2e-tests/tools/staging-input-files.scm new file mode 100644 index 0000000..f81ee47 --- /dev/null +++ b/e2e-tests/tools/staging-input-files.scm @@ -0,0 +1,4 @@ +(command #:inputs (file #:type File + #:stage? #t) + #:run "cat" "./$(inputs.file.basename)" + #:outputs (output_file #:type stdout)) diff --git a/e2e-tests/tools/workflow-step-with-default-input.scm b/e2e-tests/tools/workflow-step-with-default-input.scm new file mode 100644 index 0000000..9a3f981 --- /dev/null +++ b/e2e-tests/tools/workflow-step-with-default-input.scm @@ -0,0 +1,12 @@ +(define echo + (command #:inputs (message #:type string) + #:run "echo" message + #:outputs (output_message #:type stdout))) + +(define echo-workflow + (workflow ((message #:type string + #:default "Hello world!")) + (echo #:message message))) + +(workflow () + (echo-workflow)) diff --git a/e2e-tests/tools/workflow-step-with-optional-and-required-inputs.scm b/e2e-tests/tools/workflow-step-with-optional-and-required-inputs.scm new file mode 100644 index 0000000..aa71940 --- /dev/null +++ b/e2e-tests/tools/workflow-step-with-optional-and-required-inputs.scm @@ -0,0 +1,14 @@ +(define echo + (command #:inputs (message1 #:type string) (message2 #:type string) + #:run "echo" message1 message2 + #:outputs (output_message #:type stdout))) + +(define echo-workflow + (workflow ((message1 #:type string) + (message2 #:type string + #:default "Hello world!")) + (echo #:message1 message1 + #:message2 message2))) + +(workflow ((message #:type string)) + (echo-workflow #:message1 message)) diff --git a/e2e-tests/tools/workflow-with-default-input.scm b/e2e-tests/tools/workflow-with-default-input.scm new file mode 100644 index 0000000..7f1a671 --- /dev/null +++ b/e2e-tests/tools/workflow-with-default-input.scm @@ -0,0 +1,8 @@ +(define echo + (command #:inputs (message #:type string) + #:run "echo" message + #:outputs (output_message #:type stdout))) + +(workflow ((message #:type string + #:default "Hello world!")) + (echo #:message message)) diff --git a/images/LICENSE.md b/images/LICENSE.md index 836364d..4615864 100644 --- a/images/LICENSE.md +++ b/images/LICENSE.md @@ -3,4 +3,4 @@ - Author: [Gane Kumaraswamy](https://www.flickr.com/photos/19396720@N00/4336566764) - Source: https://commons.wikimedia.org/wiki/File:Ravanan_-_King_of_Lanka.jpg https://www.flickr.com/photos/19396720@N00/4336566764 -- License: [Creative Commons Attribution 2.0 Generic license](https://creativecommons.org/licenses/by/2.0/deed.en). +- License: [Creative Commons Attribution 2.0 Generic license](https://creativecommons.org/licenses/by/2.0/deed.en) diff --git a/manifest.scm b/manifest.scm new file mode 100644 index 0000000..12ba0e0 --- /dev/null +++ b/manifest.scm @@ -0,0 +1,16 @@ +(use-modules ((gnu packages bioinformatics) #:select (ccwl)) + ((cwltest-package) #:select (cwltest)) + ((ravanan-package) #:select (ravanan)) + (srfi srfi-1)) + +(define (manifest-cons* . args) + "ARGS is of the form (PACKAGES ... ONTO-MANIFEST). Return a manifest +with PACKAGES and all packages in ONTO-MANIFEST." + (let ((packages (drop-right args 1)) + (onto-manifest (last args))) + (manifest (append (map package->manifest-entry packages) + (manifest-entries onto-manifest))))) + +(manifest-cons* ccwl + cwltest + (package->development-manifest ravanan)) diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm index c47bb3c..fd988f7 100644 --- a/ravanan/command-line-tool.scm +++ b/ravanan/command-line-tool.scm @@ -18,7 +18,7 @@ (define-module (ravanan command-line-tool) #:use-module ((rnrs base) #:select (assertion-violation error)) - #:use-module (rnrs conditions) + #:use-module ((rnrs conditions) #:select (condition define-condition-type)) #:use-module (rnrs exceptions) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9 gnu) @@ -53,9 +53,11 @@ #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) - #:export (run-command-line-tool + #:export (build-command-line-tool-script + run-command-line-tool check-requirements inherit-requirements + find-requirement %command-line-tool-supported-requirements command-line-tool-supported-requirements capture-command-line-tool-output @@ -95,15 +97,6 @@ (secondary-files formal-output-secondary-files) (binding formal-output-binding)) -(define-immutable-record-type <command-line-binding> - (command-line-binding position prefix type value item-separator) - command-line-binding? - (position command-line-binding-position) - (prefix command-line-binding-prefix) - (type command-line-binding-type) - (value command-line-binding-value) - (item-separator command-line-binding-item-separator)) - (define-immutable-record-type <output-binding> (output-binding glob load-contents? load-listing? output-eval) output-binding? @@ -118,7 +111,7 @@ supported-requirements #:optional hint?) "Error out if any of @var{requirements} are not supported by @var{batch-system}. -If @var{hint?} is @code{#t}, only print a warning. +If @var{hint?} is @code{#t}, only log a warning. @var{supported-requirements-for-batch-system} is a function that when passed a batch system returns the requirements supported by it. @var{supported-requirements} is the list of requirements supported by at least @@ -184,212 +177,45 @@ keys @code{input}, @code{self} and @code{runtime}." ;; Not a javascript expression, but some other JSON tree. Return it as is. expression)) -(define (build-command cwl inputs) - "Return a list of @code{<command-line-binding>} objects for the -@code{CommandLineTool} class workflow @var{cwl} and @var{inputs}. The -@code{value} field of the returned @code{<command-line-binding>} objects may be -strings or G-expressions. The G-expressions may reference @var{inputs} and -@var{runtime} variables that must be defined in the context in which the -G-expressions are inserted." - (define (argument->command-line-binding i argument) - (command-line-binding (cond - ((assoc-ref argument "position") - => string->number) - (else i)) - (maybe-assoc-ref (just argument) "prefix") - 'string - #~(value->string - #$(coerce-expression - (assoc-ref* argument "valueFrom"))) - %nothing)) - - (define (collect-bindings ids+inputs+types+bindings) - (append-map id+input+type-tree+binding->command-line-binding - ids+inputs+types+bindings)) - - (define id+input+type-tree+binding->command-line-binding - (match-lambda - ;; We stretch the idea of an input id, by making it an address that - ;; identifies the exact location of a value in a tree that possibly - ;; contains array types. For example, '("foo") identifies the input "foo"; - ;; '("foo" 1) identifies the 1th element of the array input "foo"; '("foo" - ;; 37 1) identifies the 1th element of the 37th element of the array input - ;; "foo"; etc. - ((id input type-tree binding) - ;; Check type. - (let* ((type (formal-parameter-type type-tree)) - (matched-type (match-type input type))) - (unless matched-type - (error input "Type mismatch" input type)) - (let ((position - (from-maybe - (maybe-let* ((position (maybe-assoc-ref binding "position"))) - (just (string->number position))) - ;; FIXME: Why a default value of 0? - 0)) - (prefix (maybe-assoc-ref binding "prefix"))) - (cond - ;; Recurse over array types. - ;; TODO: Implement record and enum types. - ((array-type? matched-type) - (list (command-line-binding - position - prefix - matched-type - (append-map (lambda (i input) - (id+input+type-tree+binding->command-line-binding - (list (append id (list i)) - input - (assoc-ref type-tree "items") - (maybe-assoc-ref (just type-tree) - "inputBinding")))) - (iota (vector-length input)) - (vector->list input)) - (maybe-assoc-ref binding "itemSeparator")))) - (else - (list (command-line-binding position - prefix - matched-type - ;; We defer access of input values to - ;; runtime when inputs have been fully - ;; resolved, staging is complete, etc. - #~(apply json-ref inputs '#$id) - %nothing))))))))) - - ;; For details of this algorithm, see §4.1 Input binding of the CWL - ;; 1.2 CommandLineTool specification: - ;; https://www.commonwl.org/v1.2/CommandLineTool.html#Input_binding - (append - ;; Insert elements from baseCommand. - (vector->list (or (assoc-ref cwl "baseCommand") - (vector))) - (sort - (append - ;; Collect CommandLineBinding objects from arguments; assign a sorting key. - (vector->list - (vector-map-indexed argument->command-line-binding - (or (assoc-ref cwl "arguments") - #()))) - ;; Collect CommandLineBinding objects from the inputs schema; assign a - ;; sorting key. - (collect-bindings - (filter-map (lambda (formal-input) - ;; Exclude formal inputs without an inputBinding. - (and (assoc "inputBinding" formal-input) - (let ((id (assoc-ref formal-input "id"))) - (list (list id) - (or (assoc-ref inputs id) - (assoc-ref formal-input "default") - 'null) - (or (assoc-ref formal-input "type") - (user-error "Type of input ~a not specified" - id)) - (maybe-assoc-ref (just formal-input) - "inputBinding"))))) - (vector->list (assoc-ref cwl "inputs"))))) - ;; Sort elements using the assigned sorting keys. - (lambda (binding1 binding2) - (< (command-line-binding-position binding1) - (command-line-binding-position binding2)))))) - -(define (command-line-binding->args binding) - "Return a list of arguments for @var{binding}. The returned list may -contain strings or G-expressions. The G-expressions may reference an -@code{inputs-directory} variable that must be defined in the context -in which the G-expressions are inserted." - (let ((prefix (command-line-binding-prefix binding)) - (type (command-line-binding-type binding)) - (value (command-line-binding-value binding))) - (cond - ((eq? type 'boolean) - (if value - ;; TODO: Error out if boolean input has no prefix? - (maybe->list prefix) - (list))) - ((eq? type 'null) (list)) - ((array-type? type) - (match value - ;; Empty arrays should be noops. - (() (list)) - (_ - (let ((args (append-map command-line-binding->args - value))) - (append (maybe->list prefix) - (from-maybe - (maybe-let* ((item-separator (command-line-binding-item-separator binding))) - (just (list #~(string-join (list #$@args) - #$item-separator)))) - args)))))) - (else - (append (maybe->list prefix) - (list (case type - ((string) - value) - ((int float) - #~(number->string #$value)) - ((File) - #~(assoc-ref* #$value "path")) - (else - (user-error "Invalid formal input type ~a" - type))))))))) - -(define* (build-gexp-script name exp #:optional guix-daemon-socket) - "Build script named @var{name} using G-expression @var{exp}. - -When @var{guix-daemon-socket} is provided, connect to that Guix daemon." - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-gexp-script name exp)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv (gexp->script name exp))) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) - -(define* (run-command-line-tool name manifest-file channels cwl inputs - scratch store batch-system - #:key guix-daemon-socket) - "Run @code{CommandLineTool} class workflow @var{cwl} named @var{name} with -@var{inputs} using tools from Guix manifest in @var{manifest-file}. Return a -state-monadic job state object. - -@var{channels}, @var{scratch}, @var{store}, @var{batch-system} and -@var{guix-daemon-socket} are the same as in @code{run-workflow} from +(define* (build-gexp-script name exp) + "Build script named @var{name} using G-expression @var{exp}. Return the path to +the built script as a monadic value." + (mlet %store-monad ((drv (gexp->script name exp))) + (mbegin %store-monad + (built-derivations (list drv)) + (return (derivation->output-path drv))))) + +(define* (run-command-line-tool name script inputs resource-requirement + store batch-system) + "Run @code{CommandLineTool} class workflow @var{script} named @var{name} with +@var{inputs}. Return a state-monadic job state object. + +@var{resource-requirement} is the @code{ResourceRequirement} of the workflow. +@var{store} and @var{batch-system} are the same as in @code{run-workflow} from @code{(ravanan workflow)}." - (let* ((script - (build-command-line-tool-script name manifest-file channels cwl inputs - scratch store batch-system - guix-daemon-socket)) - (requirements (inherit-requirements (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()))) - (cpus (from-maybe - (maybe-bind (maybe-assoc-ref (find-requirement requirements - "ResourceRequirement") - "coresMin") - (compose just - inexact->exact - ceiling - (cut coerce-type <> 'number) - (cut coerce-expression - <> - `(("inputs" . ,inputs))))) - 1)) - (store-files-directory (script->store-files-directory script store)) - (store-data-file (script->store-data-file script store)) - (stdout-file (script->store-stdout-file script store)) - (stderr-file (script->store-stderr-file script store))) + (let ((cpus (from-maybe + (maybe-bind (maybe-assoc-ref resource-requirement + "coresMin") + (compose just + inexact->exact + ceiling + (cut coerce-type <> 'int) + (cut coerce-expression + <> + `(("inputs" . ,inputs))))) + 1)) + (store-files-directory (step-store-files-directory script inputs store)) + (store-data-file (step-store-data-file script inputs store)) + (stdout-file (step-store-stdout-file script inputs store)) + (stderr-file (step-store-stderr-file script inputs store))) (if (file-exists? store-data-file) ;; Return a dummy success state object if script has already ;; been run successfully. (state-return (begin - (format (current-error-port) - "~a previously run; retrieving result from store~%" - script) - (single-machine-job-state script #t))) + (log-debug "~a previously run; retrieving result from store" + script) + (single-machine-job-state script inputs #t))) ;; Run script if it has not already been run. (begin ;; Delete output files directory if an incomplete one exists @@ -401,44 +227,40 @@ state-monadic job state object. (when (file-exists? store-files-directory) (delete-file-recursively store-files-directory)) (mkdir store-files-directory) - (cond - ((eq? batch-system 'single-machine) - (state-let* ((success? (single-machine:submit-job - `(("WORKFLOW_OUTPUT_DIRECTORY" . - ,store-files-directory) - ("WORKFLOW_OUTPUT_DATA_FILE" . - ,store-data-file)) - stdout-file - stderr-file - script))) - (state-return (single-machine-job-state script success?)))) - ((slurm-api-batch-system? batch-system) - (state-let* ((job-id - (slurm:submit-job `(("WORKFLOW_OUTPUT_DIRECTORY" . - ,store-files-directory) - ("WORKFLOW_OUTPUT_DATA_FILE" . - ,store-data-file)) - stdout-file - stderr-file - cpus - name - script - #:api-endpoint (slurm-api-batch-system-endpoint batch-system) - #:jwt (slurm-api-batch-system-jwt batch-system) - #:partition (slurm-api-batch-system-partition batch-system) - #:nice (slurm-api-batch-system-nice batch-system)))) - (format (current-error-port) - "~a submitted as job ID ~a~%" - script - job-id) - (state-return (slurm-job-state script job-id)))) - (else - (assertion-violation batch-system "Invalid batch system"))))))) - -(define (capture-command-line-tool-output script store) + (let ((environment + `(("WORKFLOW_INPUTS" . ,(scm->json-string inputs)) + ("WORKFLOW_OUTPUT_DIRECTORY" . ,store-files-directory) + ("WORKFLOW_OUTPUT_DATA_FILE" . ,store-data-file)))) + (cond + ((eq? batch-system 'single-machine) + (state-let* ((success? (single-machine:submit-job environment + stdout-file + stderr-file + script))) + (state-return (single-machine-job-state script inputs success?)))) + ((slurm-api-batch-system? batch-system) + (state-let* ((job-id + (slurm:submit-job environment + stdout-file + stderr-file + cpus + name + script + #:api-endpoint (slurm-api-batch-system-endpoint batch-system) + #:jwt (slurm-api-batch-system-jwt batch-system) + #:partition (slurm-api-batch-system-partition batch-system) + #:nice (slurm-api-batch-system-nice batch-system)))) + (log-info "~a submitted as job ID ~a~%" + script + job-id) + (state-return (slurm-job-state script inputs job-id)))) + (else + (assertion-violation batch-system "Invalid batch system")))))))) + +(define (capture-command-line-tool-output script inputs store) "Capture and return output of @code{CommandLineTool} class workflow that ran -@var{script}. @var{store} is the path to the ravanan store." - (let* ((store-data-file (script->store-data-file script store)) +@var{script} with @var{inputs}. @var{store} is the path to the ravanan store." + (let* ((store-data-file (step-store-data-file script inputs store)) (output-json (call-with-input-file store-data-file json->scm))) ;; Recursively rewrite file paths in output JSON. @@ -455,7 +277,7 @@ state-monadic job state object. (string=? (assoc-ref tree "class") "File")) (let* ((store-files-directory - (script->store-files-directory script store)) + (step-store-files-directory script inputs store)) (path (expand-file-name (relative-file-name (assoc-ref tree "path") store-files-directory) @@ -478,52 +300,6 @@ state-monadic job state object. (call-with-input-file store-data-file json->scm))) -(define (copy-input-files-gexp inputs) - "Return a G-expression that copies @code{File} type inputs (along with secondary -files) from @var{inputs} into @code{inputs-directory} and return a new -association list with updated @code{location} and @code{path} fields. - -The returned G-expression will reference an @code{inputs-directory} variable." - (define (copy-input-files input) - (cond - ((vector? input) - #~,(list->vector - `#$(map copy-input-files - (vector->list input)))) - ((eq? (object-type input) - 'File) - #~,(let ((path-in-inputs-directory - (expand-file-name #$(store-item-name (assoc-ref input "path")) - inputs-directory))) - (copy-file #$(assoc-ref input "path") - path-in-inputs-directory) - (maybe-assoc-set '#$input - (cons "location" - (just path-in-inputs-directory)) - (cons "path" - (just path-in-inputs-directory)) - (cons "basename" - (just (basename path-in-inputs-directory))) - (cons "nameroot" - (just (file-name-stem path-in-inputs-directory))) - (cons "nameext" - (just (file-name-extension path-in-inputs-directory))) - (cons "secondaryFiles" - #$(from-maybe - (maybe-let* ((secondary-files - (maybe-assoc-ref (just input) "secondaryFiles"))) - (just #~(just (list->vector - `#$(vector-map->list copy-input-files - secondary-files))))) - #~%nothing))))) - (else input))) - - #~(list->dotted-list - `#$(map (match-lambda - ((id . input) - (list id (copy-input-files input)))) - inputs))) - (define (find-requirement requirements class) "Find requirement of @var{class} among @var{requirements} and return a maybe-monadic value." @@ -548,64 +324,60 @@ maybe-monadic value." (guix profiles)))) (raise-exception (manifest-file-error manifest-file)))) -(define (call-with-inferior inferior proc) - "Call @var{proc} with @var{inferior} and return the return value of @var{proc}. -Close @var{inferior} when done, even if @var{proc} exits non-locally." - (dynamic-wind (const #t) - (cut proc inferior) - (cut close-inferior inferior))) - -(define (manifest-file->environment manifest-file channels guix-daemon-socket) - "Build @var{manifest-file} and return an association list of environment -variables to set to use the built profile. Connect to the Guix daemon specified -by @var{guix-daemon-socket}. If @var{channels} is not @code{#f}, build manifest -in a Guix inferior with @var{channels}." - (if channels - (call-with-inferior (inferior-for-channels channels) - (cut inferior-eval - `(begin - (use-modules (ice-9 match) - (guix search-paths) - (gnu packages) - (guile) - (guix gexp) - (guix profiles)) - - (define (build-derivation drv guix-daemon-socket) - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-derivation drv)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv drv)) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) - - ;; Do not auto-compile manifest files. - (set! %load-should-auto-compile #f) - (let ((manifest (load ,(canonicalize-path manifest-file)))) - (map (match-lambda - ((specification . value) - (cons (search-path-specification-variable specification) - value))) - (evaluate-search-paths - (manifest-search-paths manifest) - (list (build-derivation - (profile-derivation manifest - #:allow-collisions? #t) - ,guix-daemon-socket)))))) - <>)) - (manifest->environment (load-manifest manifest-file) - guix-daemon-socket))) - -(define (software-packages->environment packages channels guix-daemon-socket) - "Build a profile with @var{packages} and return an association list -of environment variables to set to use the built profile. @var{packages} is a -vector of @code{SoftwarePackage} assocation lists as defined in the CWL -standard. Connect to the Guix daemon specified by @var{guix-daemon-socket}. If -@var{channels} is not @code{#f}, look up packages in a Guix inferior with -@var{channels}." +;; Monadic version of inferior-eval-with-store; Argument order is rearranged +;; slightly to suit store-lift. +(define inferior-meval-with-store + (store-lift (lambda (store inferior proc) + (inferior-eval-with-store inferior store proc)))) + +(define (manifest-file->search-path-sexps manifest-file inferior) + "Return a list of search path S-expressions for a profile described by +@var{manifest-file}. Load manifest in @var{inferior} unless it is @code{#f}. +Return value is monadic." + (define proc + `(lambda (store) + ;; Do not auto-compile manifest files. + (set! %load-should-auto-compile #f) + (map search-path-specification->sexp + (manifest-search-paths (load ,manifest-file))))) + + (if inferior + (begin + (inferior-eval '(use-modules (guix profiles) + (guix search-paths)) + inferior) + (mbegin %store-monad + (inferior-meval-with-store inferior proc))) + (mbegin %store-monad + (return (map search-path-specification->sexp + (manifest-search-paths (load-manifest manifest-file))))))) + +(define (manifest-file->profile-derivation manifest-file inferior) + "Return a derivation to build @var{manifest-file}. Build manifest in +@var{inferior} unless it is @code{#f}. Return value is monadic." + (define proc + `(lambda (store) + ;; Do not auto-compile manifest files. + (set! %load-should-auto-compile #f) + (derivation-file-name + (run-with-store store + (profile-derivation (load ,manifest-file) + #:allow-collisions? #t))))) + + (if inferior + (begin + (inferior-eval '(use-modules (guix profiles)) + inferior) + (mlet %store-monad ((drv-file (inferior-meval-with-store inferior proc))) + (return (read-derivation-from-file drv-file)))) + (let ((manifest (load-manifest manifest-file))) + (profile-derivation manifest + #:allow-collisions? #t)))) + +(define (software-packages->manifest packages inferior) + "Return a manifest with @var{packages}. @var{packages} is a vector of +@code{SoftwarePackage} assocation lists as defined in the CWL standard. Look up +packages in @var{inferior} unless it is @code{#f}." (define (software-package->package-specification package) (string-append (assoc-ref* package "package") (from-maybe @@ -614,63 +386,48 @@ standard. Connect to the Guix daemon specified by @var{guix-daemon-socket}. If (cut string-append "@" <>))) ""))) - (define packages->environment - (compose (cut manifest->environment <> guix-daemon-socket) - packages->manifest)) - - (if channels - (call-with-inferior (inferior-for-channels channels) - (lambda (inferior) - (packages->environment - (vector-map->list (lambda (package) - (let ((name (assoc-ref package "package")) - (version (assoc-ref package "version"))) - (match (lookup-inferior-packages inferior - name - version) - ((inferior-package _ ...) - inferior-package)))) - packages)))) - (packages->environment + (packages->manifest + (if inferior + (vector-map->list (lambda (package) + (let ((name (assoc-ref package "package")) + (version (assoc-ref package "version"))) + (match (lookup-inferior-packages inferior + name + version) + ((inferior-package _ ...) + inferior-package)))) + packages) (vector-map->list (compose specification->package software-package->package-specification) packages)))) -(define (manifest->environment manifest guix-daemon-socket) - "Build @var{manifest} and return an association list of environment -variables to set to use the built profile. Connect to the Guix daemon specified -by @var{guix-daemon-socket}." - (define (build-derivation drv guix-daemon-socket) - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-derivation drv #f)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv drv)) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) - - (map (match-lambda - ((specification . value) - (cons (search-path-specification-variable specification) - value))) - (evaluate-search-paths - (manifest-search-paths manifest) - (list (build-derivation - (profile-derivation manifest - #:allow-collisions? #t) - guix-daemon-socket))))) +(define (software-packages->search-path-sexps packages inferior) + "Return a list of search path S-expressions for a profile with @var{packages}. +@var{packages} is a vector of @code{SoftwarePackage} assocation lists as defined +in the CWL standard. Look up packages in @var{inferior} unless it is @code{#f}. +Return value is monadic." + (mbegin %store-monad + (return (map search-path-specification->sexp + (manifest-search-paths + (software-packages->manifest packages inferior)))))) + +(define (software-packages->profile-derivation packages inferior) + "Return a derivation to build a profile with @var{packages}. @var{packages} is a +vector of @code{SoftwarePackage} assocation lists as defined in the CWL +standard. Look up packages in @var{inferior} unless it is @code{#f}. Return +value is monadic." + (profile-derivation (software-packages->manifest packages inferior) + #:allow-collisions? #t)) -(define (build-command-line-tool-script name manifest-file channels cwl inputs - scratch store batch-system - guix-daemon-socket) +(define (build-command-line-tool-script name manifest-file inferior cwl + scratch store batch-system) "Build and return script to run @code{CommandLineTool} class workflow @var{cwl} -named @var{name} with @var{inputs} using tools from Guix manifest in -@var{manifest-file} and on @var{batch-system}. +named @var{name} using tools from Guix manifest in @var{manifest-file} and on +@var{batch-system}. Use @var{inferior} to build manifests, unless it is +@code{#f}. Return value is monadic. -@var{channels}, @var{scratch}, @var{store} and @var{guix-daemon-socket} are the -same as in @code{run-workflow} from @code{(ravanan workflow)}." +@var{scratch} and @var{store} are the same as in @code{run-workflow} from +@code{(ravanan workflow)}." (define (environment-variables env-var-requirement) (just (vector-map->list (lambda (environment-definition) #~(list #$(assoc-ref* environment-definition @@ -697,12 +454,12 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (define (cores batch-system) (cond - ((slurm-api-batch-system? batch-system) - #~(string->number (getenv "SLURM_CPUS_ON_NODE"))) - ((eq? batch-system 'single-machine) - #~(total-processor-count)) - (else - (assertion-violation batch-system "Unknown batch system")))) + ((slurm-api-batch-system? batch-system) + #~(string->number (getenv "SLURM_CPUS_ON_NODE"))) + ((eq? batch-system 'single-machine) + #~(total-processor-count)) + (else + (assertion-violation batch-system "Unknown batch system")))) (define stdout-filename (cond @@ -725,12 +482,36 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (list 'File 'Directory)) (error #f "glob output binding not specified")))) + (define (coerce-argument argument) + (assoc-set argument + (cons "valueFrom" + (coerce-expression (assoc-ref* argument "valueFrom"))))) + + (define (vector->gexp vec) + ;; FIXME: G-expressions in vectors are not properly substituted. Fix this in + ;; Guix. + #~(vector #$@(vector->list vec))) + + (define (alist->gexp alist) + ;; FIXME: G-expressions as values in dotted alists are not properly + ;; substituted. Fix this in Guix. + #~(list #$@(map (match-lambda + ((key . value) + #~(cons #$key #$value))) + alist))) + (define run-command-gexp - #~(run-command (list #$@(append-map (lambda (arg) - (if (command-line-binding? arg) - (command-line-binding->args arg) - (list arg))) - (build-command cwl inputs))) + #~(run-command (append-map (lambda (arg) + (if (command-line-binding? arg) + (command-line-binding->args arg) + (list arg))) + (build-command #$(assoc-ref cwl "baseCommand") + #$(vector->gexp + (vector-map (compose alist->gexp coerce-argument) + (or (assoc-ref cwl "arguments") + #()))) + #$(assoc-ref cwl "inputs") + inputs)) #$(coerce-expression (assoc-ref cwl "stdin")) #$stdout-filename '#$(from-maybe @@ -812,7 +593,7 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (maybe-let* ((work-reuse (find-requirement requirements "WorkReuse"))) (and (not (coerce-type (assoc-ref* work-reuse "enableReuse") 'boolean)) - (user-error "Disabling WorkReuse is not supported. ravanan's strong caching using Guix makes it unnecessary.")))) + (user-error "Disabling WorkReuse is not supported. With ravanan's strong caching using Guix, there is no need to disable WorkReuse.")))) (maybe-let* ((hints (maybe-assoc-ref (just cwl) "hints"))) (check-requirements hints batch-system @@ -823,276 +604,330 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (maybe-let* ((work-reuse (find-requirement hints "WorkReuse"))) (and (not (coerce-type (assoc-ref* work-reuse "enableReuse") 'boolean)) - (warning "Ignoring disable of WorkReuse. ravanan's strong caching using Guix makes it unnecessary.")))) - ;; Copy input files and update corresponding input objects. - (build-gexp-script name - (let* ((requirements (inherit-requirements (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()))) - (initial-work-dir-requirement (find-requirement requirements - "InitialWorkDirRequirement")) - (manifest-file - (from-maybe (maybe-assoc-ref - (find-requirement requirements "SoftwareRequirement") - "manifest") - manifest-file)) - (packages - (from-maybe (maybe-assoc-ref - (find-requirement requirements "SoftwareRequirement") - "packages") - #()))) - (with-imported-modules (source-module-closure '((ravanan work command-line-tool) - (ravanan work monads) - (ravanan work ui) - (ravanan work vectors) - (ravanan glob) - (guix search-paths)) - #:select? (match-lambda - (('ravanan work . _) #t) - (('guix . _) #t) - (('json . _) #t) - (_ #f))) - (with-extensions (list guile-filesystem guile-gcrypt) - #~(begin - (use-modules (ravanan work command-line-tool) - (ravanan work monads) - (ravanan work types) - (ravanan work ui) - (ravanan work utils) - (ravanan work vectors) - (ravanan glob) - (rnrs io ports) - (srfi srfi-1) - (srfi srfi-26) - (ice-9 filesystem) - (ice-9 match) - (ice-9 threads) - (guix search-paths) - (json)) + (warning "Ignoring disable of WorkReuse. With ravanan's strong caching using Guix, there is no need to disable WorkReuse.")))) - (define (copy-file-value value directory) - ;; Copy file represented by value to directory and return the - ;; new File value. - (let* ((path (assoc-ref* value "path")) - (destination-path (expand-file-name (basename path) - directory))) - (copy-file path destination-path) - (assoc-set value - (cons "location" (string-append "file://" destination-path)) - (cons "path" destination-path)))) - - (define (capture-secondary-file path secondary-file - workflow-output-directory) - "Capture @var{secondary-file} for primary @var{path} and return its + (let* ((requirements (inherit-requirements (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #()))) + (initial-work-dir-requirement (find-requirement requirements + "InitialWorkDirRequirement")) + (manifest-file + (from-maybe (maybe-assoc-ref + (find-requirement requirements "SoftwareRequirement") + "manifest") + manifest-file)) + (packages + (from-maybe (maybe-assoc-ref + (find-requirement requirements "SoftwareRequirement") + "packages") + #()))) + (mlet %store-monad ((search-path-sexps + (match packages + ;; No package specifications; try the manifest file. + (#() + (manifest-file->search-path-sexps manifest-file + inferior)) + ;; Use package specifications to build an + ;; environment. + (_ + (software-packages->search-path-sexps packages + inferior)))) + (profile-derivation + (match packages + ;; No package specifications; try the manifest file. + (#() + (manifest-file->profile-derivation manifest-file + inferior)) + ;; Use package specifications to build an + ;; environment. + (_ + (software-packages->profile-derivation packages + inferior))))) + (build-gexp-script name + (with-imported-modules (source-module-closure '((ravanan work command-line-tool) + (ravanan work monads) + (ravanan work ui) + (ravanan work vectors) + (ravanan glob) + (guix search-paths)) + #:select? (match-lambda + (('ravanan work . _) #t) + (('guix . _) #t) + (('json . _) #t) + (_ #f))) + (with-extensions (list guile-filesystem guile-gcrypt) + #~(begin + (use-modules (ravanan work command-line-tool) + (ravanan work monads) + (ravanan work types) + (ravanan work ui) + (ravanan work utils) + (ravanan work vectors) + (ravanan glob) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 filesystem) + (ice-9 match) + (ice-9 threads) + (guix search-paths) + (json)) + + (define (copy-input-files input inputs-directory) + ;; Copy input files and update corresponding input objects. + (cond + ((vector? input) + (vector-map (cut copy-input-files <> inputs-directory) + input)) + ((eq? (object-type input) + 'File) + (let ((path-in-inputs-directory + ;; Input files may have the same filename. So, we take + ;; the additional precaution of copying input files + ;; into their own hash-prefixed subdirectories, just + ;; like they are in the ravanan store. + (expand-file-name (file-name-join + (take-right (file-name-split + (assoc-ref input "path")) + 2)) + inputs-directory))) + (make-directories (file-dirname path-in-inputs-directory)) + (copy-file (assoc-ref input "path") + path-in-inputs-directory) + (maybe-assoc-set input + (cons "location" + (just path-in-inputs-directory)) + (cons "path" + (just path-in-inputs-directory)) + (cons "basename" + (just (basename path-in-inputs-directory))) + (cons "nameroot" + (just (file-name-stem path-in-inputs-directory))) + (cons "nameext" + (just (file-name-extension path-in-inputs-directory))) + (cons "secondaryFiles" + (maybe-let* ((secondary-files + (maybe-assoc-ref (just input) "secondaryFiles"))) + (just (vector-map (cut copy-input-files <> inputs-directory) + secondary-files))))))) + (else input))) + + (define (copy-file-value value directory) + ;; Copy file represented by value to directory and return the + ;; new File value. + (let* ((path (assoc-ref* value "path")) + (destination-path (expand-file-name (basename path) + directory))) + (copy-file path destination-path) + (assoc-set value + (cons "location" (string-append "file://" destination-path)) + (cons "path" destination-path)))) + + (define (capture-secondary-file path secondary-file + workflow-output-directory) + "Capture @var{secondary-file} for primary @var{path} and return its canonicalized value. If @var{required} is @code{#t} and no such secondary file is found, error out. @var{workflow-output-directory} is path to the output directory of the workflow." - (let* ((secondary-file-path (secondary-path path secondary-file)) - (secondary-file-value - (and (file-exists? secondary-file-path) - (copy-file-value (canonicalize-file-value - `(("class" . "File") - ("path" . ,secondary-file-path))) - workflow-output-directory)))) - (if (and (assoc-ref* secondary-file "required") - (not secondary-file-value)) - (user-error "Secondary file ~a missing for output path ~a" - pattern - path) - secondary-file-value))) - - (define (path+sha1->value path sha1 workflow-output-directory maybe-secondary-files) - (maybe-assoc-set (copy-file-value (canonicalize-file-value - `(("class" . "File") - ("path" . ,path) - ("checksum" . ,(string-append "sha1$" sha1)))) - workflow-output-directory) - (cons "secondaryFiles" - (maybe-let* ((secondary-files maybe-secondary-files)) - (just (vector-filter-map (cut capture-secondary-file - path - <> - workflow-output-directory) - secondary-files)))))) - - (define (path->value path workflow-output-directory maybe-secondary-files) - (path+sha1->value path - (sha1-hash path) - workflow-output-directory - maybe-secondary-files)) - - (define (stdout-output->value workflow-output-directory - stdout-directory - stdout-filename - output) - (cons (assoc-ref output "id") - (let ((sha1 (sha1-hash stdout-filename))) - ;; Use path+sha1->value instead of path->value to avoid - ;; recomputing the SHA1 hash. - (path+sha1->value - (if (string=? stdout-filename - (file-name-join* stdout-directory "stdout")) - ;; If stdout filename is unspecified, rename it to a - ;; hash of its contents. - (let ((hashed-filename - (file-name-join* stdout-directory sha1))) - (rename-file stdout-filename - hashed-filename) - hashed-filename) - ;; Else, return the stdout filename as it is. - stdout-filename) - sha1 - workflow-output-directory - %nothing)))) - - (define (other-output->value workflow-output-directory - output-id output-type-tree - maybe-secondary-files glob-pattern) - (cons output-id - ;; TODO: Support all types. - (let* ((output-type (formal-parameter-type output-type-tree)) - (paths (glob glob-pattern)) - (matched-type (glob-match-type paths output-type))) - (unless matched-type - (user-error "Type ~a mismatch for globbed paths ~a" - output-type - paths)) - ;; Coerce output value into matched type. - (let ((output-values (map (cut path->value - <> - workflow-output-directory - maybe-secondary-files) - paths))) - (cond - ((memq matched-type (list 'File 'Directory)) - (match output-values - ((output-file) - output-file))) - ;; TODO: Recurse. - ((and (array-type? matched-type) - (memq (array-type-subtype matched-type) - (list 'File 'Directory))) - (list->vector output-values)) - ((eq? matched-type 'null) - 'null)))))) - - (define (stage-file file entry-name) - ;; Stage file as entry-name and return the staged File value. - (rename-file (assoc-ref* file "path") - entry-name) - (canonicalize-file-value - (maybe-assoc-set `(("class" . "File") - ("path" . ,entry-name)) - (cons "secondaryFiles" - (maybe-let* ((secondary-files - (maybe-assoc-ref (just file) "secondaryFiles"))) - (just (vector-map (lambda (file) - (stage-file file (assoc-ref* file "basename"))) - secondary-files))))))) - - ;; Stage files. - ;; We currently support File and Dirent only. TODO: Support others. - (define (stage-files entries outputs-directory) - ;; Stage entries and return an association list mapping files - ;; (presumably input files) that were staged. - (filter-map (match-lambda - ((entry-name entry) - (cond - ;; Stuff string literal into a file. - ((string? entry) - (call-with-input-file entry-name - (cut put-string <> entry)) - #f) - ;; Symlink to the file. - ((eq? (object-type entry) - 'File) - (cons entry - (stage-file entry entry-name)))))) - entries)) - - (define (set-staged-path input staging-mapping) - ;; If input is a File type input that was staged, return new - ;; staged value. Else, return as is. - (cond - ;; Recurse on vector inputs. - ((vector? input) - (list->vector - (map (cut set-staged-path <> staging-mapping) - (vector->list input)))) - ;; Try to replace File input value with staged value. - ((eq? (object-type input) - 'File) - (or (any (match-lambda - ((old-value . new-value) - (and (alist=? input old-value) - new-value))) - staging-mapping) - input)) - ;; Else, return as is. - (else input))) - - ;; Set search paths for manifest. - (for-each (match-lambda - ((name . value) - (setenv name value))) - '#$(match packages - ;; No package specifications; try the manifest - ;; file. - (#() - (manifest-file->environment manifest-file - channels - guix-daemon-socket)) - ;; Use package specifications to build an - ;; environment. - (_ - (software-packages->environment packages - channels - guix-daemon-socket)))) - - (call-with-temporary-directory - (lambda (inputs-directory) - ;; We need to canonicalize JSON trees before inserting them - ;; into G-expressions. If we don't, we would have degenerate - ;; G-expressions that produce exactly the same result. - (let ((inputs #$(copy-input-files-gexp (canonicalize-json inputs))) - (runtime `(("cores" . ,#$(cores batch-system))))) - - ;; Set environment defined by workflow. - (for-each (match-lambda - ((name value) - (setenv name value))) - (list #$@(from-maybe - (maybe-bind - (find-requirement requirements - "EnvVarRequirement") - environment-variables) - (list)))) - - (call-with-temporary-directory - (lambda (stdout-directory) - (call-with-temporary-directory - (lambda (outputs-directory) - (call-with-current-directory outputs-directory - (lambda () - (let* ((staging-mapping - (stage-files (list #$@(from-maybe - (maybe-bind initial-work-dir-requirement - (compose just files-to-stage)) - (list))) - outputs-directory)) - (inputs - (map (match-lambda - ((id . input) - (cons id - (set-staged-path input - staging-mapping)))) - inputs))) - ;; Actually run the command. - #$run-command-gexp - ;; Capture outputs. - #$capture-outputs-gexp)))) - #$scratch)) - #$scratch))) - #$scratch))))) - guix-daemon-socket)) + (let* ((secondary-file-path (secondary-path path secondary-file)) + (secondary-file-value + (and (file-exists? secondary-file-path) + (copy-file-value (canonicalize-file-value + `(("class" . "File") + ("path" . ,secondary-file-path))) + workflow-output-directory)))) + (if (and (assoc-ref* secondary-file "required") + (not secondary-file-value)) + (user-error "Secondary file ~a missing for output path ~a" + pattern + path) + secondary-file-value))) + + (define (path+sha1->value path sha1 workflow-output-directory maybe-secondary-files) + (maybe-assoc-set (copy-file-value (canonicalize-file-value + `(("class" . "File") + ("path" . ,path) + ("checksum" . ,(string-append "sha1$" sha1)))) + workflow-output-directory) + (cons "secondaryFiles" + (maybe-let* ((secondary-files maybe-secondary-files)) + (just (vector-filter-map (cut capture-secondary-file + path + <> + workflow-output-directory) + secondary-files)))))) + + (define (path->value path workflow-output-directory maybe-secondary-files) + (path+sha1->value path + (sha1-hash path) + workflow-output-directory + maybe-secondary-files)) + + (define (stdout-output->value workflow-output-directory + stdout-directory + stdout-filename + output) + (cons (assoc-ref output "id") + (let ((sha1 (sha1-hash stdout-filename))) + ;; Use path+sha1->value instead of path->value to avoid + ;; recomputing the SHA1 hash. + (path+sha1->value + (if (string=? stdout-filename + (file-name-join* stdout-directory "stdout")) + ;; If stdout filename is unspecified, rename it to a + ;; hash of its contents. + (let ((hashed-filename + (file-name-join* stdout-directory sha1))) + (rename-file stdout-filename + hashed-filename) + hashed-filename) + ;; Else, return the stdout filename as it is. + stdout-filename) + sha1 + workflow-output-directory + %nothing)))) + + (define (other-output->value workflow-output-directory + output-id output-type-tree + maybe-secondary-files glob-pattern) + (cons output-id + ;; TODO: Support all types. + (let* ((output-type (formal-parameter-type output-type-tree)) + (paths (glob glob-pattern)) + (matched-type (glob-match-type paths output-type))) + (unless matched-type + (user-error "Type ~a mismatch for globbed paths ~a" + output-type + paths)) + ;; Coerce output value into matched type. + (let ((output-values (map (cut path->value + <> + workflow-output-directory + maybe-secondary-files) + paths))) + (cond + ((memq matched-type (list 'File 'Directory)) + (match output-values + ((output-file) + output-file))) + ;; TODO: Recurse. + ((and (cwl-array-type? matched-type) + (memq (cwl-array-type-subtype matched-type) + (list 'File 'Directory))) + (list->vector output-values)) + ((eq? matched-type 'null) + 'null)))))) + + (define (stage-file file entry-name) + ;; Stage file as entry-name and return the staged File value. + (rename-file (assoc-ref* file "path") + entry-name) + (canonicalize-file-value + (maybe-assoc-set `(("class" . "File") + ("path" . ,entry-name)) + (cons "secondaryFiles" + (maybe-let* ((secondary-files + (maybe-assoc-ref (just file) "secondaryFiles"))) + (just (vector-map (lambda (file) + (stage-file file (assoc-ref* file "basename"))) + secondary-files))))))) + + ;; Stage files. + ;; We currently support File and Dirent only. TODO: Support others. + (define (stage-files entries outputs-directory) + ;; Stage entries and return an association list mapping files + ;; (presumably input files) that were staged. + (filter-map (match-lambda + ((entry-name entry) + (cond + ;; Stuff string literal into a file. + ((string? entry) + (call-with-input-file entry-name + (cut put-string <> entry)) + #f) + ;; Symlink to the file. + ((eq? (object-type entry) + 'File) + (cons entry + (stage-file entry entry-name)))))) + entries)) + + (define (set-staged-path input staging-mapping) + ;; If input is a File type input that was staged, return new + ;; staged value. Else, return as is. + (cond + ;; Recurse on vector inputs. + ((vector? input) + (list->vector + (map (cut set-staged-path <> staging-mapping) + (vector->list input)))) + ;; Try to replace File input value with staged value. + ((eq? (object-type input) + 'File) + (or (any (match-lambda + ((old-value . new-value) + (and (alist=? input old-value) + new-value))) + staging-mapping) + input)) + ;; Else, return as is. + (else input))) + + ;; Set search paths for manifest. + (for-each (match-lambda + ((specification . value) + (setenv (search-path-specification-variable specification) + value))) + (evaluate-search-paths + (map sexp->search-path-specification + '#$search-path-sexps) + '(#$profile-derivation))) + (call-with-temporary-directory + (lambda (inputs-directory) + (let ((inputs (map (match-lambda + ((id . input) + (cons id + (copy-input-files input inputs-directory)))) + (json-string->scm + (getenv "WORKFLOW_INPUTS")))) + (self 'null) + (runtime `(("cores" . ,#$(cores batch-system))))) + + ;; Set environment defined by workflow. + (for-each (match-lambda + ((name value) + (setenv name value))) + (list #$@(from-maybe + (maybe-bind + (find-requirement requirements + "EnvVarRequirement") + environment-variables) + (list)))) + + (call-with-temporary-directory + (lambda (stdout-directory) + (call-with-temporary-directory + (lambda (outputs-directory) + (call-with-current-directory outputs-directory + (lambda () + (let* ((staging-mapping + (stage-files (list #$@(from-maybe + (maybe-bind initial-work-dir-requirement + (compose just files-to-stage)) + (list))) + outputs-directory)) + (inputs + (map (match-lambda + ((id . input) + (cons id + (set-staged-path input + staging-mapping)))) + inputs))) + ;; Actually run the command. + #$run-command-gexp + ;; Capture outputs. + #$capture-outputs-gexp)))) + #$scratch)) + #$scratch))) + #$scratch)))))))) diff --git a/ravanan/javascript.scm b/ravanan/javascript.scm index d57a02d..fa63fff 100644 --- a/ravanan/javascript.scm +++ b/ravanan/javascript.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024–2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -122,25 +122,27 @@ keys @code{\"inputs\"}, @code{\"self\"} and @code{\"runtime\"}. (define (evaluate-using-node expression context expression-lib) "This function is the same as @code{evaluate-parameter-reference-1} but uses the node javascript engine." - (define (set-variable name value) - (string-append name " = " (scm->json-string value) ";")) - - (define preamble - (string-join (append expression-lib - (filter-map (match-lambda - (((and (or "inputs" "self" "runtime") - name) - . value) - (set-variable name value)) - (_ #f)) - (or context - (list)))))) + (define (context-value name) + (scm->json-string (assoc-ref context name))) (if context ;; Evaluate immediately. - (evaluate-javascript %node expression preamble) + (evaluate-javascript %node + expression + (string-append (string-join expression-lib) + "inputs = " (context-value "inputs") ";" + "self = " (context-value "self") ";" + "runtime = " (context-value "runtime") ";")) ;; Compile to a G-expression that evaluates expression. - #~(evaluate-javascript #$%worker-node #$expression #$preamble))) + #~(evaluate-javascript #$%worker-node + #$expression + ;; Context variables are only fully available at + ;; runtime. So, defer their reference to the + ;; G-expression. + (string-append #$(string-join expression-lib) + "inputs = " (scm->json-string inputs) ";" + "self = " (scm->json-string self) ";" + "runtime = " (scm->json-string runtime) ";")))) (define (tokenize-parameter-references str) "Split @var{str} into tokens of parameter reference and literal strings." diff --git a/ravanan/job-state.scm b/ravanan/job-state.scm index a769698..2894618 100644 --- a/ravanan/job-state.scm +++ b/ravanan/job-state.scm @@ -34,18 +34,21 @@ slurm-job-state job-state-script + job-state-inputs job-state-status)) (define-immutable-record-type <single-machine-job-state> - (single-machine-job-state script success?) + (single-machine-job-state script inputs success?) single-machine-job-state? (script single-machine-job-state-script) + (inputs single-machine-job-state-inputs) (success? single-machine-job-state-success?)) (define-immutable-record-type <slurm-job-state> - (slurm-job-state script job-id) + (slurm-job-state script inputs job-id) slurm-job-state? (script slurm-job-state-script) + (inputs slurm-job-state-inputs) (job-id slurm-job-state-job-id)) (define (job-state-script state) @@ -56,6 +59,14 @@ slurm-job-state-script)) state)) +(define (job-state-inputs state) + ((cond + ((single-machine-job-state? state) + single-machine-job-state-inputs) + ((slurm-job-state? state) + slurm-job-state-inputs)) + state)) + (define* (job-state-status state batch-system) "Return current status of job with @var{state} on @var{batch-system}. The status is one of the symbols @code{completed}, @code{failed} or @code{pending} diff --git a/ravanan/propnet.scm b/ravanan/propnet.scm index 34624b5..610991d 100644 --- a/ravanan/propnet.scm +++ b/ravanan/propnet.scm @@ -26,22 +26,26 @@ #:use-module (ravanan work monads) #:use-module (ravanan work utils) #:export (propnet + propnet? propnet-propagators propnet-value=? propnet-merge-values propnet-scheduler propagator + propagator? propagator-name propagator-proc propagator-inputs propagator-optional-inputs propagator-outputs scheduler + scheduler? scheduler-schedule scheduler-poll scheduler-capture-output schedule-propnet state+status + state+status? state+status-state state+status-status poll-propnet @@ -64,6 +68,12 @@ (optional-inputs propagator-optional-inputs) (outputs propagator-outputs)) +(set-record-type-printer! <propagator> + (lambda (record port) + (display "#<<propagator> " port) + (write (propagator-name record) port) + (display ">" port))) + (define-immutable-record-type <scheduler> (scheduler schedule poll capture-output) scheduler? @@ -86,6 +96,18 @@ (propagators-in-flight propnet-state-propagators-in-flight) (propagators-inbox propnet-state-propagators-inbox)) +(set-record-type-printer! <propnet-state> + (lambda (record port) + (display "#<<propnet-state> cells: " port) + (write (run-with-state (propnet-state-cells record)) port) + (display " cells-inbox: " port) + (write (run-with-state (propnet-state-cells-inbox record)) port) + (display " propagators-in-flight: " port) + (write (run-with-state (propnet-state-propagators-in-flight record)) port) + (display " propagators-inbox: " port) + (write (run-with-state (propnet-state-propagators-inbox record)) port) + (display ">" port))) + (define (partition-map pred proc lst) "Partition @var{lst} into two lists using @var{pred} like @code{partition}. Then, map @var{proc} over both the lists and return the resulting lists." diff --git a/ravanan/reader.scm b/ravanan/reader.scm index a4e57c7..bb825ed 100644 --- a/ravanan/reader.scm +++ b/ravanan/reader.scm @@ -17,21 +17,28 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan reader) + #:use-module (rnrs conditions) + #:use-module (rnrs exceptions) #:use-module (rnrs io ports) #:use-module (srfi srfi-26) #:use-module (ice-9 filesystem) #:use-module (ice-9 match) #:use-module (json) - #:use-module (yaml) + #:use-module ((yaml) #:prefix yaml:) #:use-module (ravanan work command-line-tool) #:use-module (ravanan work monads) #:use-module (ravanan work types) + #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) - #:export (read-workflow - read-inputs + #:export (read-workflow+inputs coerce-type)) +(define-condition-type &type-coercion-violation &violation + type-coercion-violation type-coercion-violation? + (value type-coercion-violation-value) + (type type-coercion-violation-type)) + (define (preprocess-include tree) (cond ;; Arrays @@ -101,18 +108,31 @@ each association list of the returned vector of association lists. If (define (normalize-env-var-requirement env-var-requirement) (assoc-set env-var-requirement - (cons "envDef" - (coerce-alist->vector - (assoc-ref env-var-requirement "envDef") - "envName" "envValue")))) + (cons "envDef" + (coerce-alist->vector + (assoc-ref env-var-requirement "envDef") + "envName" "envValue")))) + +(define (normalize-software-requirement software-requirement) + (maybe-assoc-set software-requirement + ;; Canonicalize manifest file path so that we look it up with respect to the + ;; path of the workflow file. + (cons "manifest" + (maybe-bind (maybe-assoc-ref (just software-requirement) + "manifest") + (compose just canonicalize-path))))) (define (normalize-requirements maybe-requirements) (maybe-let* ((requirements maybe-requirements)) (just (vector-map (lambda (requirement) - (if (string=? (assoc-ref requirement "class") - "EnvVarRequirement") - (normalize-env-var-requirement requirement) - requirement)) + (let ((class (assoc-ref requirement "class"))) + (cond + ((string=? class "EnvVarRequirement") + (normalize-env-var-requirement requirement)) + ((string=? class "SoftwareRequirement") + (normalize-software-requirement requirement)) + (else + requirement)))) (coerce-alist->vector requirements "class"))))) (define (normalize-secondary-files secondary-files default-required) @@ -136,35 +156,26 @@ the @code{required} field when it is not specified." (vector `(("pattern" . ,secondary-files) ("required" . ,default-required)))))) -(define (some-file-type? type) - "Return @code{#t} if @var{type} is a @code{File}, an array of @code{File}s, an -array of array of @code{File}s, etc. Else, return @code{#f}" - (or (eq? type 'File) - (and (array-type? type) - (some-file-type? (array-type-subtype type))))) - (define (normalize-formal-input input) "Normalize formal @var{input}." - (if (some-file-type? (formal-parameter-type (assoc-ref input "type"))) - (maybe-assoc-set input - (cons "default" - (maybe-bind (maybe-assoc-ref (just input) "default") - normalize-input)) - (cons "secondaryFiles" - (maybe-bind (maybe-assoc-ref (just input) "secondaryFiles") - (compose just - (cut normalize-secondary-files <> #t))))) - input)) + (maybe-assoc-set input + (cons "default" + (maybe-let* ((default (maybe-assoc-ref (just input) "default"))) + (just (normalize-input + (coerce-type default + (formal-parameter-type (assoc-ref input "type"))))))) + (cons "secondaryFiles" + (maybe-bind (maybe-assoc-ref (just input) "secondaryFiles") + (compose just + (cut normalize-secondary-files <> #t)))))) (define (normalize-formal-output output) "Normalize formal @var{output}." - (if (some-file-type? (formal-parameter-type (assoc-ref output "type"))) - (maybe-assoc-set output - (cons "secondaryFiles" - (maybe-bind (maybe-assoc-ref (just output) "secondaryFiles") - (compose just - (cut normalize-secondary-files <> #f))))) - output)) + (maybe-assoc-set output + (cons "secondaryFiles" + (maybe-bind (maybe-assoc-ref (just output) "secondaryFiles") + (compose just + (cut normalize-secondary-files <> #f)))))) (define (normalize-base-command maybe-base-command) "Normalize @var{base-command} of @code{CommandLineTool} class workflow." @@ -250,10 +261,23 @@ array of array of @code{File}s, etc. Else, return @code{#f}" (define (read-workflow workflow-file) "Read CWL workflow (of any class) from @var{workflow-file}." - (call-with-current-directory (dirname workflow-file) - ;; TODO: Implement $import directive. - (cut normalize-workflow - (preprocess-include (read-yaml-file (basename workflow-file)))))) + (guard (c ((and (who-condition? c) + (eq? (condition-who c) + 'read-yaml-file) + (irritants-condition? c)) + (user-error "Unable to read workflow file ~a: ~a" + workflow-file + (string-join (condition-irritants c) + ": ")))) + (let ((workflow-path (guard (c ((unsupported-uri-scheme? c) + (user-error "Unsupported URI scheme ~a in ~a" + (unsupported-uri-scheme-scheme c) + workflow-file))) + (location->path workflow-file)))) + (call-with-current-directory (dirname workflow-path) + ;; TODO: Implement $import directive. + (cut normalize-workflow + (preprocess-include (read-yaml-file (basename workflow-path)))))))) (define (normalize-input input) "Normalize actual @var{input}." @@ -266,34 +290,112 @@ array of array of @code{File}s, etc. Else, return @code{#f}" (canonicalize-file-value input)) (else input))) -(define (read-inputs inputs-file) - "Read @var{inputs-file} resolving file paths if any." - (call-with-current-directory (dirname inputs-file) - (lambda () - (map (match-lambda - ((input-id . input) - (cons input-id - (normalize-input input)))) - ;; Even though YAML is a superset of JSON, use the JSON - ;; reader if possible; it is simpler and less prone to type - ;; ambiguities. - (if (string=? (file-name-extension inputs-file) - ".json") - (call-with-input-file (basename inputs-file) - json->scm) - (read-yaml-file (basename inputs-file))))))) +(define (read-inputs inputs-file types) + "Read @var{inputs-file} resolving file paths if any. If @var{inputs-file} is an +YAML file, coerce inputs to @var{types}. @var{types} is an association list +mapping input identifiers to CWL types." + (guard (c ((and (who-condition? c) + (memq (condition-who c) + '(read-json-file read-yaml-file)) + (irritants-condition? c)) + (user-error "Unable to read inputs file ~a: ~a" + inputs-file + (string-join (condition-irritants c) + ": ")))) + (let ((inputs-path (guard (c ((unsupported-uri-scheme? c) + (user-error "Unsupported URI scheme ~a in ~a" + (unsupported-uri-scheme-scheme c) + inputs-file))) + (location->path inputs-file)))) + (call-with-current-directory (dirname inputs-path) + (lambda () + (map (match-lambda + ((input-id . input) + (cons input-id + (normalize-input input)))) + ;; Even though YAML is a superset of JSON, use the JSON + ;; reader if possible; it is simpler and less prone to type + ;; ambiguities. + (if (string=? (file-name-extension inputs-path) + ".json") + (read-json-file (basename inputs-path)) + (map (match-lambda + ((input-id . input) + (cons input-id + (cond + ((assoc-ref types input-id) + => (cut coerce-type input <>)) + (else input))))) + (read-yaml-file (basename inputs-path)))))))))) + +(define (read-workflow+inputs workflow-file inputs-file) + (let ((workflow-cwl (read-workflow workflow-file))) + (values workflow-cwl + (read-inputs inputs-file + (vector-map->list (lambda (formal-input) + (cons (assoc-ref formal-input "id") + (formal-parameter-type + (assoc-ref formal-input "type")))) + (assoc-ref workflow-cwl "inputs")))))) (define (coerce-type val type) - "Coerce @var{val} to @var{type}." + "Coerce @var{val} to CWL @var{type}." ;; This function exists to handle YAML's type ambiguities. (case type ((boolean) (cond ((member val (list "true" "yes")) #t) ((member val (list "false" "no")) #f) - (else (error "Unable to coerce value to type" val type)))) - ((number) - (if (number? val) + (else (raise-exception (type-coercion-violation val type))))) + ((int float double) + (cond + ((number? val) val) + ((string? val) (string->number val)) + (else (raise-exception (type-coercion-violation val type))))) + ((string) + (if (string? val) val - (string->number val))) - (else val))) + (raise-exception (type-coercion-violation val type)))) + ((File) + (if (and (list? val) + (string=? (assoc-ref val "class") + "File")) + val + (raise-exception (type-coercion-violation val type)))) + (else + (cond + ((cwl-array-type? type) + (vector-map (cut coerce-type <> (cwl-array-type-subtype type)) + val)) + ((cwl-union-type? type) + (match (cwl-union-type-subtypes type) + (() + (raise-exception (type-coercion-violation val type))) + ((head-subtype tail-subtypes ...) + (guard (c ((type-coercion-violation? c) + (coerce-type val (cwl-union-type tail-subtypes)))) + (coerce-type val head-subtype))))) + (else + (error "Invalid type to coerce to" type)))))) + +(define (read-json-file file) + "Read JSON @var{file} and return scheme tree." + (guard (c (else + (raise-exception + (condition (make-who-condition 'read-json-file) + c)))) + (call-with-input-file file + json->scm))) + +(define (read-yaml-file file) + "Read YAML @var{file} and return scheme tree." + (guard (c ((and (message-condition? c) + (string-prefix? "read-yaml-file:" (condition-message c))) + (raise-exception + (condition (make-who-condition 'read-yaml-file) + (make-irritants-condition + (match (string-split (condition-message c) #\:) + ((_ message file) + (list (string-trim message) + (string-trim file))))))))) + (yaml:read-yaml-file file))) diff --git a/ravanan/single-machine.scm b/ravanan/single-machine.scm index 854dd2c..b03137f 100644 --- a/ravanan/single-machine.scm +++ b/ravanan/single-machine.scm @@ -20,6 +20,7 @@ #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module (ravanan work monads) + #:use-module (ravanan work ui) #:export (submit-job)) (define (submit-job environment stdout-file stderr-file script) @@ -34,9 +35,8 @@ else @code{#f}. The return value is state-monadic." ((name . value) (setenv name value))) environment) - (format (current-error-port) - "Running ~a~%" - script) + (log-info "Running ~a~%" + script) (zero? (with-output-to-file stdout-file (lambda () (with-error-to-file stderr-file diff --git a/ravanan/store.scm b/ravanan/store.scm index 5cb4a64..a52bb50 100644 --- a/ravanan/store.scm +++ b/ravanan/store.scm @@ -17,22 +17,29 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan store) + #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) + #:use-module (srfi srfi-71) #:use-module (ice-9 filesystem) + #:use-module (gcrypt hash) + #:use-module (guix base16) + #:use-module (guix base32) + #:use-module (guix build utils) #:use-module (ravanan work command-line-tool) #:use-module (ravanan work monads) + #:use-module (ravanan work ui) + #:use-module (ravanan work utils) #:use-module (ravanan work vectors) #:export (%store-files-directory %store-data-directory %store-logs-directory make-store - script->store-files-directory - script->store-data-file - script->store-stdout-file - script->store-stderr-file - intern-file - store-item-name)) + step-store-files-directory + step-store-data-file + step-store-stdout-file + step-store-stderr-file + intern-file)) (define %store-files-directory "files") @@ -46,40 +53,68 @@ (define (make-store store) "Make @var{store} directory and initialize with subdirectories. If @var{store} already exists, do nothing." - (unless (file-exists? store) - (format (current-error-port) - "store ~a does not exist; creating it~%" - store) - (make-directories store) - (for-each (lambda (directory) - (mkdir (expand-file-name directory store))) - (list %store-files-directory - %store-data-directory - %store-logs-directory)))) - -(define (script->store-files-directory script store) - "Return the store files directory in @var{store} corresponding to @var{script} -path." + (let ((store-subdirectories (map (cut expand-file-name <> store) + (list %store-files-directory + %store-data-directory + %store-logs-directory)))) + (unless (and (file-exists? store) + (every file-exists? store-subdirectories)) + (log-warning "store ~a does not exist or is not initialized; setting it up" + store)) + ;; The store directory may exist but the store subdirectories may not. We + ;; need to be careful to create those as well. + (for-each make-directories store-subdirectories))) + +(define (sha1-hash-sexp tree) + (bytevector->base32-string + (let ((port get-hash (open-hash-port (hash-algorithm sha1)))) + ;; tree should probably be canonicalized using canonical S-expressions or + ;; similar. But, it doesn't matter much for our purposes. write already + ;; canonicalizes in a way. In the unlikely case of a problem, the worst + ;; that can happen is that we recompute all steps of the workflow. + (write tree port) + (close port) + (get-hash)))) + +(define (step-store-basename script inputs) + "Return the basename in the store for files of CWL step with @var{script} and +@var{inputs}." + (string-append (sha1-hash-sexp (cons script (canonicalize-json inputs))) + "-" + (strip-store-file-name script))) + +(define (step-store-files-directory script inputs store) + "Return the @var{store} files directory for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-files-directory - (basename script)) + (step-store-basename script inputs)) store)) -(define (script->store-data-file script store) - "Return the store data file in @var{store} corresponding to @var{script} path." +(define (step-store-data-file script inputs store) + "Return the @var{store} data file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-data-directory - (string-append (basename script) ".json")) + (string-append + (step-store-basename script inputs) + ".json")) store)) -(define (script->store-stdout-file script store) - "Return the store stdout file in @var{store} corresponding to @var{script} path." +(define (step-store-stdout-file script inputs store) + "Return the @var{store} stdout file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-logs-directory - (string-append (basename script) ".stdout")) + (string-append + (step-store-basename script inputs) + ".stdout")) store)) -(define (script->store-stderr-file script store) - "Return the store stderr file in @var{store} corresponding to @var{script} path." +(define (step-store-stderr-file script inputs store) + "Return the @var{store} stderr file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-logs-directory - (string-append (basename script) ".stderr")) + (string-append + (step-store-basename script inputs) + ".stderr")) store)) (define (same-filesystem? path1 path2) @@ -88,6 +123,15 @@ Else, return @code{#f}." (= (stat:dev (stat path1)) (stat:dev (stat path2)))) +(define (link-or-copy source destination) + "Hard link @var{source} to @var{destination} if possible. Else, copy it." + ;; Hard link if the source file is on the same filesystem as the destination + ;; directory. Else, copy. + ((if (same-filesystem? source (dirname destination)) + link + copy-file) + source destination)) + (define (intern-file file store) "Intern @code{File} type object @var{file} into the ravanan @var{store} unless it is already a store path. Return an updated @code{File} type object with the @@ -96,8 +140,9 @@ interned path and location." (checksum (assoc-ref file "checksum")) (sha1 (if (and checksum (string-prefix? "sha1$" checksum)) - (string-drop checksum (string-length "sha1$")) - (sha1-hash path))) + (base16-string->bytevector + (string-drop checksum (string-length "sha1$"))) + (sha1-hash-bytes path))) (interned-path (if (string-prefix? store path) ;; If file is already a store path, return it as is. @@ -108,25 +153,19 @@ interned path and location." (let ((interned-path (expand-file-name (file-name-join* %store-files-directory - (string-append sha1 + (string-append (bytevector->base32-string sha1) "-" - (basename path))) + (basename path)) + (basename path)) store))) (if (file-exists? interned-path) - (format (current-error-port) - "~a previously interned into store as ~a~%" - path interned-path) + (log-debug "~a previously interned into store as ~a~%" + path interned-path) (begin - (format (current-error-port) - "Interning ~a into store as ~a~%" - path interned-path) - ;; Hard link if on the same filesystem. Else, copy. - ((if (same-filesystem? path - (expand-file-name %store-files-directory - store)) - link - copy-file) - path interned-path))) + (log-info "Interning ~a into store as ~a~%" + path interned-path) + (mkdir (dirname interned-path)) + (link-or-copy path interned-path))) interned-path)))) (maybe-assoc-set file (cons "location" (just (string-append "file://" interned-path))) @@ -140,11 +179,4 @@ interned path and location." (just (vector-map (cut intern-file <> store) secondary-files))))))) -;; Length of a base-16 encoded SHA1 hash -(define %store-hash-length 40) -(define (store-item-name path) - "Return the basename of store item @var{path} with the store hash stripped out." - (string-drop (basename path) - ;; the hash and the dash after the hash - (1+ %store-hash-length))) diff --git a/ravanan/utils.scm b/ravanan/utils.scm index a76a14c..fbf71ed 100644 --- a/ravanan/utils.scm +++ b/ravanan/utils.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -22,7 +22,8 @@ #:use-module (ice-9 filesystem) #:use-module (ice-9 match) #:export (string-trim-prefix - load-script)) + load-script) + #:declarative? #f) (define (string-trim-prefix prefix str) "Remove @var{prefix} from @var{str} if it exists. Else, return @var{str} as is." @@ -54,4 +55,4 @@ before loading script." ;; define-module invocation" warning during compilation. But, it is ;; probably safe to ignore this warning since we use load only within a ;; dummy module. - (load (canonicalize-path script-file)))))) + (load script-file))))) diff --git a/ravanan/work/command-line-tool.scm b/ravanan/work/command-line-tool.scm index d33c80c..0157b2c 100644 --- a/ravanan/work/command-line-tool.scm +++ b/ravanan/work/command-line-tool.scm @@ -17,8 +17,10 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan work command-line-tool) + #:use-module (rnrs conditions) #:use-module (rnrs exceptions) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-26) #:use-module (ice-9 filesystem) #:use-module (ice-9 format) @@ -29,9 +31,14 @@ #:use-module (json) #:use-module (ravanan work monads) #:use-module (ravanan work types) + #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) - #:export (value->string + #:export (unsupported-uri-scheme + unsupported-uri-scheme? + unsupported-uri-scheme-scheme + + value->string call-with-current-directory object-type match-type @@ -39,11 +46,26 @@ formal-parameter-type run-command sha1-hash + sha1-hash-bytes checksum location->path canonicalize-file-value secondary-path - evaluate-javascript)) + evaluate-javascript + + command-line-binding + command-line-binding? + command-line-binding-position + command-line-binding-prefix + command-line-binding-type + command-line-binding-value + command-line-binding-item-separator + command-line-binding->args + build-command)) + +(define-condition-type &unsupported-uri-scheme &serious + unsupported-uri-scheme unsupported-uri-scheme? + (scheme unsupported-uri-scheme-scheme)) (define (value->string x) "Convert value @var{x} to a string." @@ -76,9 +98,9 @@ directory after THUNK returns." ((vector? obj) (match obj (#(head _ ...) - (array-type (object-type head))) + (cwl-array-type (object-type head))) (#() - (array-type 'Any)))) + (cwl-array-type 'Any)))) ;; File and Directory objects ((assoc-ref obj "class") => string->symbol) (else @@ -101,16 +123,21 @@ example, when @var{type} is a union type." ;; CWL specification needs separate types for these. ((eq? type 'double) (match-type obj 'float)) + ;; Accept ints as floats too. + ((eq? type 'float) + (and (memq (object-type obj) + (list 'int 'float)) + 'float)) ;; Recursively match type of every element of array. - ((array-type? type) + ((cwl-array-type? type) (and (vector? obj) - (every (cut match-type <> (array-type-subtype type)) + (every (cut match-type <> (cwl-array-type-subtype type)) (vector->list obj)) type)) ;; Match any one of the subtypes of the union type. - ((union-type? type) + ((cwl-union-type? type) (any (cut match-type obj <>) - (union-type-subtypes type))) + (cwl-union-type-subtypes type))) ;; Else, match the exact type of the object. (else (and (eq? (object-type obj) @@ -144,15 +171,15 @@ example, when @var{type} is a union type." 'directory) type)) (_ #f))) - ((array-type? type) + ((cwl-array-type? type) (and (every (lambda (m) (glob-match-type (list m) - (array-type-subtype type))) + (cwl-array-type-subtype type))) matches) type)) - ((union-type? type) + ((cwl-union-type? type) (any (cut glob-match-type matches <>) - (union-type-subtypes type))))) + (cwl-union-type-subtypes type))))) ;; TODO: Support all types. (define (formal-parameter-type type) @@ -160,7 +187,7 @@ example, when @var{type} is a union type." (cond ;; Union types ((vector? type) - (apply union-type + (apply cwl-union-type (map formal-parameter-type (vector->list type)))) ;; Other types ((string? type) @@ -168,7 +195,7 @@ example, when @var{type} is a union type." ;; Array types ((string=? (assoc-ref type "type") "array") - (array-type (formal-parameter-type (assoc-ref type "items")))))) + (cwl-array-type (formal-parameter-type (assoc-ref type "items")))))) (define (run-command command stdin-file stdout-file success-codes) "Run @var{command} passing in @var{stdin-file} as the standard input @@ -202,31 +229,42 @@ status in @var{success-codes} as success. Error out otherwise." (with-output-to-port (current-error-port) (cut invoke command)))))) +(define (sha1-hash-bytes file) + "Return the SHA1 hash of @var{file} as a bytevector." + (file-hash (lookup-hash-algorithm 'sha1) + file)) + (define (sha1-hash file) "Return the SHA1 hash of @var{file} as a hexadecimal string." - (bytevector->base16-string - (file-hash (lookup-hash-algorithm 'sha1) - file))) + (bytevector->base16-string (sha1-hash-bytes file))) (define (checksum file) "Return the checksum of @var{file} as defined in the CWL specification." (string-append "sha1$" (sha1-hash file))) (define (location->path location) - "Convert file @var{location} URI to path. Tolerate invalid locations that are -actually paths." + "Convert file @var{location} @code{file://} URI to path. Tolerate invalid +locations that are actually paths. Raise an @code{&unsupported-uri-scheme} +condition on unsupported URI schemes." (cond - ;; If location is an URI, parse the URI and return the path part. - ((string->uri location) => uri-path) + ;; If location is a file:// URI, parse the URI and return the path part. + ((string->uri location) + => (lambda (uri) + (if (eq? (uri-scheme uri) 'file) + (uri-path uri) + (raise-exception (unsupported-uri-scheme (uri-scheme uri)))))) ;; location is actually a path; return as is. (else location))) (define (canonicalize-file-value value) "Canonicalize @code{File} type @var{value} adding missing fields." - (let ((path (or (assoc-ref value "path") - (location->path (assoc-ref value "location")))) - (location (or (assoc-ref value "location") - (string-append "file://" (assoc-ref value "path"))))) + (let* ((path (canonicalize-path + (or (assoc-ref value "path") + (location->path (assoc-ref value "location"))))) + ;; The location field may actually be a path instead of an URI; that's + ;; invalid. So, unconditionally reconstruct the location URI from path. + ;; This assumes they are always file:// URIs, but that works for now. + (location (string-append "file://" path))) ;; Populate all fields of the File type value. (maybe-assoc-set `(("class" . "File") ("location" . ,location) @@ -262,3 +300,157 @@ actually paths." (format #f "--eval=~a console.log(\"%j\", ~a)" preamble expression)) json->scm))) + +(define-immutable-record-type <command-line-binding> + (command-line-binding position prefix type value item-separator) + command-line-binding? + (position command-line-binding-position) + (prefix command-line-binding-prefix) + (type command-line-binding-type) + (value command-line-binding-value) + (item-separator command-line-binding-item-separator)) + +(define (command-line-binding->args binding) + "Return a list of arguments for @var{binding}. The returned list may +contain strings or G-expressions. The G-expressions may reference an +@code{inputs-directory} variable that must be defined in the context in which +the G-expressions are inserted." + (let ((prefix (command-line-binding-prefix binding)) + (type (command-line-binding-type binding)) + (value (command-line-binding-value binding))) + (cond + ((eq? type 'boolean) + (if value + ;; TODO: Error out if boolean input has no prefix? + (maybe->list prefix) + (list))) + ((eq? type 'null) (list)) + ((cwl-array-type? type) + (match value + ;; Empty arrays should be noops. + (() (list)) + (_ + (let ((args (append-map command-line-binding->args + value))) + (append (maybe->list prefix) + (from-maybe + (maybe-let* ((item-separator (command-line-binding-item-separator binding))) + (just (list (string-join args item-separator)))) + args)))))) + (else + (append (maybe->list prefix) + (list (case type + ((string) + value) + ((int float) + (number->string value)) + ((File) + (assoc-ref* value "path")) + (else + (user-error "Invalid formal input type ~a" + type))))))))) + +(define (build-command base-command arguments formal-inputs inputs) + "Return a list of @code{<command-line-binding>} objects for a +@code{CommandLineTool} class workflow with @var{base-command}, @var{arguments}, +@var{formal-inputs} and @var{inputs}. The @code{value} field of the returned +@code{<command-line-binding>} objects may be strings or G-expressions. The +G-expressions may reference @var{inputs} and @var{runtime} variables that must +be defined in the context in which the G-expressions are inserted." + (define (argument->command-line-binding i argument) + (command-line-binding (cond + ((assoc-ref argument "position") + => string->number) + (else i)) + (maybe-assoc-ref (just argument) "prefix") + 'string + (value->string (assoc-ref* argument "valueFrom")) + %nothing)) + + (define (collect-bindings ids+inputs+types+bindings) + (append-map id+input+type-tree+binding->command-line-binding + ids+inputs+types+bindings)) + + (define id+input+type-tree+binding->command-line-binding + (match-lambda + ;; We stretch the idea of an input id, by making it an address that + ;; identifies the exact location of a value in a tree that possibly + ;; contains array types. For example, '("foo") identifies the input "foo"; + ;; '("foo" 1) identifies the 1th element of the array input "foo"; '("foo" + ;; 37 1) identifies the 1th element of the 37th element of the array input + ;; "foo"; etc. + ((id input type-tree binding) + ;; Check type. + (let* ((type (formal-parameter-type type-tree)) + (matched-type (match-type input type))) + (unless matched-type + (error input "Type mismatch" input type)) + (let ((position + (from-maybe + (maybe-let* ((position (maybe-assoc-ref binding "position"))) + (just (string->number position))) + ;; FIXME: Why a default value of 0? + 0)) + (prefix (maybe-assoc-ref binding "prefix"))) + (cond + ;; Recurse over array types. + ;; TODO: Implement record and enum types. + ((cwl-array-type? matched-type) + (list (command-line-binding + position + prefix + matched-type + (append-map (lambda (i input) + (id+input+type-tree+binding->command-line-binding + (list (append id (list i)) + input + (assoc-ref type-tree "items") + (maybe-assoc-ref (just type-tree) + "inputBinding")))) + (iota (vector-length input)) + (vector->list input)) + (maybe-assoc-ref binding "itemSeparator")))) + (else + (list (command-line-binding position + prefix + matched-type + (apply json-ref inputs id) + %nothing))))))))) + + ;; For details of this algorithm, see §4.1 Input binding of the CWL + ;; 1.2 CommandLineTool specification: + ;; https://www.commonwl.org/v1.2/CommandLineTool.html#Input_binding + (append + ;; Insert elements from baseCommand. + (vector->list (or base-command + (vector))) + (sort + (append + ;; Collect CommandLineBinding objects from arguments; assign a sorting key. + (vector->list + (vector-map-indexed argument->command-line-binding + (or arguments + #()))) + ;; Collect CommandLineBinding objects from the inputs schema; assign a + ;; sorting key. + (collect-bindings + (filter-map (lambda (formal-input) + ;; Exclude formal inputs without an inputBinding. + (and (assoc "inputBinding" formal-input) + (let ((id (assoc-ref formal-input "id"))) + (list (list id) + ;; The inputs alist has been fully resolved + ;; with default and null values. It is + ;; guaranteed to have a mapping for every input + ;; id. + (assoc-ref inputs id) + (or (assoc-ref formal-input "type") + (user-error "Type of input ~a not specified" + id)) + (maybe-assoc-ref (just formal-input) + "inputBinding"))))) + (vector->list formal-inputs)))) + ;; Sort elements using the assigned sorting keys. + (lambda (binding1 binding2) + (< (command-line-binding-position binding1) + (command-line-binding-position binding2)))))) diff --git a/ravanan/work/types.scm b/ravanan/work/types.scm index 8b17872..ca49c33 100644 --- a/ravanan/work/types.scm +++ b/ravanan/work/types.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -18,22 +18,22 @@ (define-module (ravanan work types) #:use-module (srfi srfi-9 gnu) - #:export (array-type - array-type? - array-type-subtype - union-type - union-type? - union-type-subtypes)) + #:export (cwl-array-type + cwl-array-type? + cwl-array-type-subtype + cwl-union-type + cwl-union-type? + cwl-union-type-subtypes)) -(define-immutable-record-type <array-type> - (array-type subtype) - array-type? - (subtype array-type-subtype)) +(define-immutable-record-type <cwl-array-type> + (cwl-array-type subtype) + cwl-array-type? + (subtype cwl-array-type-subtype)) (define-immutable-record-type <union-type> - (-union-type subtypes) - union-type? - (subtypes union-type-subtypes)) + (-cwl-union-type subtypes) + cwl-union-type? + (subtypes cwl-union-type-subtypes)) -(define (union-type . subtypes) - (-union-type subtypes)) +(define (cwl-union-type . subtypes) + (-cwl-union-type subtypes)) diff --git a/ravanan/work/ui.scm b/ravanan/work/ui.scm index 758f2cc..5a9ceeb 100644 --- a/ravanan/work/ui.scm +++ b/ravanan/work/ui.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -17,16 +17,58 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan work ui) - #:export (warning + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) + #:export (%log-level + log + log-debug + log-info + log-warning + log-error + log-critical + warning user-error)) +(define %log-level + (make-parameter #f)) + +(define (log-level>=? level1 level2) + "Return @code{#t} if log @var{level1} has greater than or equal severity to log +@var{level2}. Else, return @code{#f}." + (let ((levels '(debug info warning error critical))) + (>= (list-index (cut eq? level1 <>) levels) + (list-index (cut eq? level2 <>) levels)))) + +(define (log level fmt . args) + "Log message when current log level is not @code{#f} and @var{level} has greater +than or equal severity to the current log level. @var{fmt} and @var{args} are +arguments to @code{format}." + (when (and (%log-level) + (log-level>=? level (%log-level))) + (apply format (current-error-port) fmt args) + (newline))) + +(define log-debug + (cut log 'debug <> <...>)) + +(define log-info + (cut log 'info <> <...>)) + +(define log-warning + (cut log 'warning <> <...>)) + +(define log-error + (cut log 'error <> <...>)) + +(define log-critical + (cut log 'critical <> <...>)) + (define (warning fmt . args) "Print warning. @var{fmt} and @var{args} are arguments to format." - (apply format (current-error-port) fmt args) - (newline)) + (apply log-warning fmt args)) (define (user-error fmt . args) "Print error message and exit with failure. @var{fmt} and @var{args} are arguments to format." - (apply warning fmt args) + (apply log-error fmt args) (exit #f)) diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm index 2770f31..2fd00d6 100644 --- a/ravanan/workflow.scm +++ b/ravanan/workflow.scm @@ -27,6 +27,8 @@ #:use-module (ice-9 filesystem) #:use-module (ice-9 match) #:use-module (web uri) + #:use-module (guix inferior) + #:use-module (guix store) #:use-module (ravanan batch-system) #:use-module (ravanan command-line-tool) #:use-module (ravanan job-state) @@ -59,16 +61,28 @@ (define-condition-type &job-failure &error job-failure job-failure? - (script job-failure-script)) + (script job-failure-script) + (inputs job-failure-inputs)) (define-immutable-record-type <scheduler-proc> - (scheduler-proc name cwl scatter scatter-method) + (-scheduler-proc name script-or-propnet formal-inputs formal-outputs + resource-requirement scatter scatter-method) scheduler-proc? (name scheduler-proc-name) - (cwl scheduler-proc-cwl) + (script-or-propnet scheduler-proc-script-or-propnet) + (formal-inputs scheduler-proc-formal-inputs) + (formal-outputs scheduler-proc-formal-outputs) + (resource-requirement scheduler-proc-resource-requirement) (scatter scheduler-proc-scatter) (scatter-method scheduler-proc-scatter-method)) +(define* (scheduler-proc name script-or-propnet formal-inputs formal-outputs + #:optional + (resource-requirement %nothing) + (scatter %nothing) (scatter-method %nothing)) + (-scheduler-proc name script-or-propnet formal-inputs formal-outputs + resource-requirement scatter scatter-method)) + (define-immutable-record-type <command-line-tool-state> (command-line-tool-state job-state formal-outputs) command-line-tool-state? @@ -159,75 +173,130 @@ requirements and hints of the step." (subset-requirements step-hints))))))) (define (optional-input? input) - "Return @code{#t} if @var{input} is optional. Else, return @code{#f}." + "Return truthy value if @var{input} is optional. Else, return @code{#f}." ;; Inputs that either have a default or accept null values are optional. - (and (or (assoc-ref input "default") - (match-type 'null - (formal-parameter-type - (assoc-ref* input "type")))) - (assoc-ref input "id"))) - -(define* (command-line-tool->propagator name cwl) - "Convert @code{CommandLineTool} workflow @var{cwl} of @var{name} to a -propagator." - (propagator name - (scheduler-proc name cwl %nothing %nothing) - (vector-map->list (lambda (input) - (cons (assoc-ref input "id") - (assoc-ref input "id"))) - (assoc-ref cwl "inputs")) - ;; Inputs that either have a default or accept null values are - ;; optional. - (vector-filter-map->list optional-input? - (assoc-ref cwl "inputs")) - (vector-map->list (lambda (output) - (cons (assoc-ref output "id") - (assoc-ref output "id"))) - (assoc-ref cwl "outputs")))) - -(define* (workflow-class->propnet name cwl scheduler batch-system) + (or (assoc "default" input) + (match-type 'null + (formal-parameter-type + (assoc-ref* input "type"))))) + +(define* (workflow->scheduler-proc name cwl scheduler + manifest-file inferior scratch store + batch-system guix-store + #:optional + (scatter %nothing) + (scatter-method %nothing)) + "Return a @code{<scheduler-proc>} object for @var{cwl} workflow named @var{name} +scheduled using @var{scheduler}. @var{scatter} and @var{scatter-method} are the +CWL scattering properties of this step. Build @code{CommandLineTool} workflow +scripts using @var{guix-store}. + +@var{manifest-file}, @var{scratch}, @var{store} and @var{batch-system} are the +same as in @code{run-workflow}. @var{inferior} is the same as in +@code{build-command-line-tool-script} from @code{(ravanan command-line-tool)}." + ;; TODO: Statically validate workflow before/while building it. + (scheduler-proc name + (let ((class (assoc-ref* cwl "class"))) + (cond + ((string=? class "CommandLineTool") + (run-with-store guix-store + (build-command-line-tool-script name + manifest-file + inferior + cwl + scratch + store + batch-system))) + ((string=? class "ExpressionTool") + (error "Workflow class not implemented yet" class)) + ((string=? class "Workflow") + (workflow-class->propnet cwl + scheduler + manifest-file + inferior + scratch + store + batch-system + guix-store)) + (else + (assertion-violation class "Unexpected workflow class")))) + (assoc-ref* cwl "inputs") + (assoc-ref* cwl "outputs") + (find-requirement (inherit-requirements + (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #())) + "ResourceRequirement") + scatter + scatter-method)) + +(define* (workflow-class->propnet cwl scheduler + manifest-file inferior scratch store + batch-system guix-store) "Return a propagator network scheduled using @var{scheduler} on -@var{batch-system} for @var{cwl}, a @code{Workflow} class workflow with -@var{name}." +@var{batch-system} for @var{cwl}, a @code{Workflow} class workflow. Build +@code{CommandLineTool} workflow scripts using @var{guix-store}. + +@var{manifest-file}, @var{scratch}, @var{store}, @var{batch-system} and +@var{guix-daemon-socket} are the same as in @code{run-workflow}. @var{inferior} +is the same as in @code{build-command-line-tool-script} from @code{(ravanan +command-line-tool)}." (define (normalize-scatter-method scatter-method) (assoc-ref* '(("dotproduct" . dot-product) ("nested_crossproduct" . nested-cross-product) ("flat_crossproduct" . flat-cross-product)) scatter-method)) + (define (step->scheduler-proc step parent-requirements parent-hints) + (let ((run (assoc-ref* step "run"))) + (workflow->scheduler-proc (assoc-ref* step "id") + (inherit-requirements-and-hints + run + parent-requirements + parent-hints + (or (assoc-ref step "requirements") + #()) + (or (assoc-ref step "hints") + #())) + scheduler + manifest-file + inferior + scratch + store + batch-system + guix-store + (maybe-assoc-ref (just step) "scatter") + (maybe-bind (maybe-assoc-ref (just step) "scatterMethod") + (compose just normalize-scatter-method))))) + (define (step->propagator step) - (let* ((step-id (assoc-ref* step "id")) - (step-propagator - (command-line-tool->propagator step-id (assoc-ref* step "run")))) - (propagator (propagator-name step-propagator) - (let ((proc (propagator-proc step-propagator))) - (scheduler-proc (scheduler-proc-name proc) - (inherit-requirements-and-hints - (scheduler-proc-cwl proc) - (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()) - (or (assoc-ref step "requirements") - #()) - (or (assoc-ref step "hints") - #())) - (maybe-assoc-ref (just step) "scatter") - (maybe-bind (maybe-assoc-ref (just step) "scatterMethod") - (compose just normalize-scatter-method)))) - (map (match-lambda - ((input-id . _) - (cons input-id - (json-ref step "in" input-id)))) - (propagator-inputs step-propagator)) - (propagator-optional-inputs step-propagator) - (filter-map (match-lambda - ((output . cell) - (and (vector-member output - (assoc-ref* step "out")) - (cons output - (string-append step-id "/" cell))))) - (propagator-outputs step-propagator))))) + (let ((step-id (assoc-ref* step "id")) + (run (assoc-ref* step "run"))) + (propagator step-id + (step->scheduler-proc step + (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #())) + (vector-map->list (lambda (input) + (let ((input-id (assoc-ref input "id"))) + (cons input-id + (json-ref step "in" input-id)))) + (assoc-ref run "inputs")) + ;; Inputs that either have a default or accept null values are + ;; optional. + (vector-filter-map->list (lambda (input) + (and (optional-input? input) + (assoc-ref input "id"))) + (assoc-ref run "inputs")) + (vector-map->list (lambda (output) + (let ((output-id (assoc-ref output "id"))) + (and (vector-member output-id + (assoc-ref* step "out")) + (cons output-id + (string-append step-id "/" output-id))))) + (assoc-ref run "outputs"))))) (maybe-let* ((requirements (maybe-assoc-ref (just cwl) "requirements"))) (check-requirements requirements @@ -246,26 +315,28 @@ propagator." merge-values scheduler)) -(define* (workflow-scheduler manifest-file channels scratch store batch-system - #:key guix-daemon-socket) +(define* (workflow-scheduler store batch-system) (define (schedule proc inputs scheduler) "Schedule @var{proc} with inputs from the @var{inputs} association list. Return a -state-monadic job state object. @var{proc} may either be a @code{<propnet>} -object or a @code{<scheduler-proc>} object." +state-monadic job state object. @var{proc} must be a @code{<scheduler-proc>} +object." (let* ((name (scheduler-proc-name proc)) - (cwl (scheduler-proc-cwl proc)) + (script-or-propnet (scheduler-proc-script-or-propnet proc)) (scatter (from-maybe (scheduler-proc-scatter proc) #f)) (scatter-method (from-maybe (scheduler-proc-scatter-method proc) - #f)) - (class (assoc-ref* cwl "class"))) + #f))) (if scatter (case scatter-method ((dot-product) (apply state-map (lambda input-elements ;; Recurse with scattered inputs spliced in. - (schedule (scheduler-proc name cwl %nothing %nothing) + (schedule (scheduler-proc name + script-or-propnet + (scheduler-proc-formal-inputs proc) + (scheduler-proc-formal-outputs proc) + (scheduler-proc-resource-requirement proc)) ;; Replace scattered inputs with single ;; elements. (apply assoc-set @@ -281,38 +352,26 @@ object or a @code{<scheduler-proc>} object." ((nested-cross-product flat-cross-product) (error scatter-method "Scatter method not implemented yet"))) - (let* ((formal-inputs (assoc-ref* cwl "inputs")) - ;; We need to resolve inputs after adding defaults since the - ;; default values may contain uninterned File objects. - (inputs (resolve-inputs (add-defaults inputs formal-inputs) - formal-inputs - store))) - (cond - ((string=? class "CommandLineTool") - (state-let* ((job-state - (run-command-line-tool name - manifest-file - channels - cwl - inputs - scratch - store - batch-system - #:guix-daemon-socket guix-daemon-socket))) - (state-return (command-line-tool-state job-state - (assoc-ref* cwl "outputs"))))) - ((string=? class "ExpressionTool") - (error "Workflow class not implemented yet" class)) - ((string=? class "Workflow") - (state-let* ((propnet-state - (schedule-propnet (workflow-class->propnet name - cwl - scheduler - batch-system) - inputs))) - (state-return - (workflow-state propnet-state - (assoc-ref* cwl "outputs")))))))))) + (let ((inputs (resolve-inputs inputs + (scheduler-proc-formal-inputs proc) + store))) + (if (propnet? script-or-propnet) + (state-let* ((propnet-state (schedule-propnet script-or-propnet inputs))) + (state-return + (workflow-state propnet-state + (scheduler-proc-formal-outputs proc)))) + (let ((resource-requirement + (scheduler-proc-resource-requirement proc))) + (state-let* ((job-state + (run-command-line-tool name + script-or-propnet + inputs + resource-requirement + store + batch-system))) + (state-return + (command-line-tool-state job-state + (scheduler-proc-formal-outputs proc)))))))))) (define (poll state) "Return updated state and current status of job @var{state} object as a @@ -341,7 +400,8 @@ state-monadic @code{<state+status>} object. The status is one of the symbols (case status ((failed) (raise-exception (job-failure - (job-state-script job-state)))) + (job-state-script job-state) + (job-state-inputs job-state)))) (else => identity))))))) ;; Poll sub-workflow state. We do not need to check the status here since ;; job failures only occur at the level of a CommandLineTool. @@ -386,7 +446,7 @@ is the class of the workflow." formal-outputs)) (define (capture-output state) - "Return output of completed job @var{state}." + "Return output of completed job @var{state} as a state-monadic value." (cond ((workflow-state? state) (state-let* ((outputs (capture-propnet-output @@ -414,49 +474,30 @@ is the class of the workflow." head-output)))))) (else ;; Log progress and return captured output. - (let ((script (job-state-script (command-line-tool-state-job-state state)))) + (let ((script (job-state-script (command-line-tool-state-job-state state))) + (inputs (job-state-inputs (command-line-tool-state-job-state state)))) (state-return (begin - (format (current-error-port) - "~a completed; logs at ~a and ~a~%" - script - (script->store-stdout-file script store) - (script->store-stderr-file script store)) + (log-info "~a completed; logs at ~a and ~a~%" + script + (step-store-stdout-file script inputs store) + (step-store-stderr-file script inputs store)) (filter-outputs "CommandLineTool" - (capture-command-line-tool-output script store) + (capture-command-line-tool-output script inputs store) (command-line-tool-state-formal-outputs state)))))))) (scheduler schedule poll capture-output)) -(define (add-defaults inputs formal-inputs) - "Add default values from @var{formal-inputs} to @var{inputs}." - (vector-filter-map->list (lambda (formal-input) - (let* ((id (assoc-ref* formal-input "id")) - ;; Try - ;; - the input value - ;; - the default value - ;; - the null value (for optional inputs) - (value (or (assoc-ref inputs id) - (assoc-ref formal-input "default") - 'null)) - (expected-type (formal-parameter-type - (assoc-ref* formal-input "type")))) - (unless (match-type value expected-type) - (user-error "Type mismatch for input `~a'; expected `~a' but got `~a'" - id expected-type (object-type value))) - (and (not (eq? value 'null)) - (cons id value)))) - formal-inputs)) - (define (resolve-inputs inputs formal-inputs store) - "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any -files found into the @var{store} and return a tree of the fully resolved inputs." + "Traverse @var{inputs} and @var{formal-inputs} recursively, add defaults from +@var{formal-inputs}, check types, intern any files found into the @var{store} +and return a tree of the fully resolved inputs." (define (match-secondary-file-pattern input secondary-file) "Return @code{#t} if @var{secondary-file} matches at least one secondary file in @var{input}." (vector-any (lambda (candidate) - (string=? (store-item-name (assoc-ref* candidate "path")) - (secondary-path (store-item-name (assoc-ref* input "path")) + (string=? (basename (assoc-ref* candidate "path")) + (secondary-path (basename (assoc-ref* input "path")) secondary-file))) (or (assoc-ref input "secondaryFiles") (user-error "Missing secondaryFiles in input ~a" @@ -475,15 +516,13 @@ error out." (define (resolve inputs types maybe-secondary-files) (vector-map (lambda (input type-tree maybe-secondary-files) - ;; Check type. - (let* ((type (formal-parameter-type type-tree)) - (matched-type (match-type input type))) - (unless matched-type - (error input "Type mismatch" input type)) + (let ((matched-type + (match-type input + (formal-parameter-type type-tree)))) ;; TODO: Implement record and enum types. (cond ;; Recurse over array types. - ((array-type? matched-type) + ((cwl-array-type? matched-type) (resolve input (make-vector (vector-length input) (assoc-ref type-tree "items")) @@ -503,15 +542,31 @@ error out." types maybe-secondary-files)) + (define (add-defaults-and-check-types inputs formal-inputs) + (vector-map (lambda (formal-input) + (let* ((id (assoc-ref* formal-input "id")) + ;; Try + ;; - the input value + ;; - the default value + ;; - the null value (for optional inputs) + (value (cond + ;; We can't use assoc-ref because #f is also a + ;; valid value. + ((assoc id inputs) => cdr) + ((assoc "default" formal-input) => cdr) + (else 'null))) + (expected-type (formal-parameter-type + (assoc-ref* formal-input "type")))) + (unless (match-type value expected-type) + (user-error "Type mismatch for input `~a'; expected `~a' but got `~a'" + id expected-type (object-type value))) + value)) + formal-inputs)) + (vector-map->list (lambda (input formal-input) (cons (assoc-ref formal-input "id") input)) - (resolve (vector-map (lambda (formal-input) - (let ((id (assoc-ref* formal-input "id"))) - (or (assoc-ref inputs id) - (assoc-ref formal-input "default") - 'null))) - formal-inputs) + (resolve (add-defaults-and-check-types inputs formal-inputs) (vector-map (lambda (formal-input) (let ((id (assoc-ref* formal-input "id"))) (or (assoc-ref formal-input "type") @@ -524,6 +579,39 @@ error out." formal-inputs)) formal-inputs)) +(define (call-with-inferior inferior proc) + "Call @var{proc} with @var{inferior} and return the return value of @var{proc}. +Close @var{inferior} when done, even if @var{proc} exits non-locally." + (dynamic-wind (const #t) + (cut proc inferior) + (cut close-inferior inferior))) + +(define* (build-workflow name cwl scheduler + manifest-file channels scratch store + batch-system + #:optional guix-daemon-socket) + "Build @var{cwl} workflow named @var{name} into a @code{<scheduler-proc>} object +scheduled using @var{scheduler}. When @var{guix-daemon-socket} is specified, +connect to the Guix daemon at that specific socket. Else, connect to the default +socket. + +@var{manifest-file}, @var{channels}, @var{scratch}, @var{store} and +@var{batch-system} are the same as in @code{run-workflow}." + (define builder + (cut workflow->scheduler-proc name cwl scheduler + manifest-file <> scratch store + batch-system <>)) + + (if guix-daemon-socket + (parameterize ((%daemon-socket-uri guix-daemon-socket)) + (build-workflow name cwl scheduler manifest-file channels + scratch store batch-system)) + (with-store guix-store + (if channels + (call-with-inferior (inferior-for-channels channels) + (cut builder <> guix-store)) + (builder #f guix-store))))) + (define* (run-workflow name manifest-file channels cwl inputs scratch store batch-system #:key guix-daemon-socket) @@ -536,37 +624,46 @@ area need not be shared. @var{store} is the path to the shared ravanan store. @var{batch-system} is an object representing one of the supported batch systems. @var{guix-daemon-socket} is the Guix daemon socket to connect to." - (guard (c ((job-failure? c) - (let ((script (job-failure-script c))) - (user-error - "~a failed; logs at ~a and ~a~%" - script - (script->store-stdout-file script store) - (script->store-stderr-file script store))))) - (let ((scheduler (workflow-scheduler - manifest-file channels scratch store batch-system - #:guix-daemon-socket guix-daemon-socket))) + (let* ((scheduler (workflow-scheduler store batch-system)) + (scheduler-proc (build-workflow name + cwl + scheduler + manifest-file + channels + scratch + store + batch-system + guix-daemon-socket))) + ;; All validation of the workflow must happen in build-workflow, and all + ;; validation of the inputs must happen in the schedule function of the + ;; scheduler. No validation must happen here. + (guard (c ((job-failure? c) + (let ((script (job-failure-script c)) + (inputs (job-failure-inputs c))) + (user-error + "~a failed; logs at ~a and ~a~%" + script + (step-store-stdout-file script inputs store) + (step-store-stderr-file script inputs store))))) (run-with-state - (let loop ((mstate ((scheduler-schedule scheduler) - (scheduler-proc name cwl %nothing %nothing) - inputs - scheduler))) - ;; Poll. - (state-let* ((state mstate) - (state+status ((scheduler-poll scheduler) state))) - (if (eq? (state+status-status state+status) - 'pending) - (begin - ;; Pause before looping and polling again so we don't bother the - ;; job server too often. - (sleep (cond - ;; Single machine jobs are run synchronously. So, there - ;; is no need to wait to poll them. - ((eq? batch-system 'single-machine) - 0) - ((slurm-api-batch-system? batch-system) - %job-poll-interval))) - (loop (state-return (state+status-state state+status)))) - ;; Capture outputs. - ((scheduler-capture-output scheduler) - (state+status-state state+status))))))))) + (let loop ((mstate ((scheduler-schedule scheduler) + scheduler-proc inputs scheduler))) + ;; Poll. + (state-let* ((state mstate) + (state+status ((scheduler-poll scheduler) state))) + (if (eq? (state+status-status state+status) + 'pending) + (begin + ;; Pause before looping and polling again so we don't bother the + ;; job server too often. + (sleep (cond + ;; Single machine jobs are run synchronously. So, there + ;; is no need to wait to poll them. + ((eq? batch-system 'single-machine) + 0) + ((slurm-api-batch-system? batch-system) + %job-poll-interval))) + (loop (state-return (state+status-state state+status)))) + ;; Capture outputs. + ((scheduler-capture-output scheduler) + (state+status-state state+status))))))))) diff --git a/releases/ravanan-0.2.0.tar.lz b/releases/ravanan-0.2.0.tar.lz new file mode 100644 index 0000000..1283cae --- /dev/null +++ b/releases/ravanan-0.2.0.tar.lz Binary files differdiff --git a/releases/ravanan-0.2.0.tar.lz.asc b/releases/ravanan-0.2.0.tar.lz.asc new file mode 100644 index 0000000..92cfc20 --- /dev/null +++ b/releases/ravanan-0.2.0.tar.lz.asc @@ -0,0 +1,11 @@ +-----BEGIN PGP SIGNATURE----- + +iQEzBAABCAAdFiEEf3MDQ/Lwnzx3v3nTLiXui2GAK7MFAmkkpyoACgkQLiXui2GA +K7P5eQgArrEdNpI/Qtlb/U4276RYm+RWLVR1fZLCxoD3vvT7VaR+o80DOSsESVKb +qUH7H/GM9teHnH+yGHK3OMjUYj+E9XUV9TMrTk2olxDgr3cj1Atn22zWb3QB2za2 +M1y4VD7Fi7mGiJE/+TzFBGLEY0AHd7S3sNXfi+NF9JScFcOP2vI81NyhAxwxsiC2 +7nJgbIbI332vD2uiOYbB64Vgw0Tz/6bbYsZicOBrU03uBgCJeWf/rny3MzKM4JqR +iToe96igy04+xyvNnM4Eve84RzJQcbPVYxncQNaP8itijxDleOOmqeuOuX0D9q4C +VRUNTcLYCUyQ2UakUlS5E3ARjwZSIw== +=kBK8 +-----END PGP SIGNATURE----- diff --git a/test-data/empty.yaml b/test-data/empty.yaml new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/test-data/empty.yaml @@ -0,0 +1 @@ +{} diff --git a/test-data/foo b/test-data/foo new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test-data/foo diff --git a/test-data/input-file-with-location-only.yaml b/test-data/input-file-with-location-only.yaml new file mode 100644 index 0000000..dccb850 --- /dev/null +++ b/test-data/input-file-with-location-only.yaml @@ -0,0 +1,3 @@ +foo: + class: File + location: foo diff --git a/test-data/input-file-with-path-only.yaml b/test-data/input-file-with-path-only.yaml new file mode 100644 index 0000000..04fdfcd --- /dev/null +++ b/test-data/input-file-with-path-only.yaml @@ -0,0 +1,3 @@ +foo: + class: File + path: foo diff --git a/test-data/inputs-with-type-ambiguities.yaml b/test-data/inputs-with-type-ambiguities.yaml new file mode 100644 index 0000000..119b72f --- /dev/null +++ b/test-data/inputs-with-type-ambiguities.yaml @@ -0,0 +1,5 @@ +number: 13 +flag: true +reverseflag: false +foo: bar +arr: [1, 2, 3] diff --git a/test-data/workflow-for-inputs-with-type-ambiguities.cwl b/test-data/workflow-for-inputs-with-type-ambiguities.cwl new file mode 100644 index 0000000..4e33148 --- /dev/null +++ b/test-data/workflow-for-inputs-with-type-ambiguities.cwl @@ -0,0 +1,11 @@ +class: Workflow +inputs: + number: int + flag: boolean + reverseflag: boolean + foo: string + arr: + type: + type: array + items: int +outputs: [] diff --git a/test-data/workflow-with-a-file-input.cwl b/test-data/workflow-with-a-file-input.cwl new file mode 100644 index 0000000..ba3ab95 --- /dev/null +++ b/test-data/workflow-with-a-file-input.cwl @@ -0,0 +1,4 @@ +class: Workflow +inputs: + foo: File +outputs: [] diff --git a/test-data/workflow-with-default-inputs.cwl b/test-data/workflow-with-default-inputs.cwl new file mode 100644 index 0000000..abc8817 --- /dev/null +++ b/test-data/workflow-with-default-inputs.cwl @@ -0,0 +1,23 @@ +class: Workflow +inputs: + number: + type: int + default: 13 + flag: + type: boolean + default: true + reverseflag: + type: boolean + default: false + foo: + type: string + default: bar + arr: + type: + type: array + items: int + default: + - 1 + - 2 + - 3 +outputs: [] diff --git a/test-data/workflow-with-various-file-type-formals.cwl b/test-data/workflow-with-various-file-type-formals.cwl new file mode 100644 index 0000000..762bf9f --- /dev/null +++ b/test-data/workflow-with-various-file-type-formals.cwl @@ -0,0 +1,39 @@ +class: Workflow +inputs: + infoo: + type: File + secondaryFiles: + - .bai + inbar: + type: + type: array + items: File + secondaryFiles: + - .bai + infoobar: + type: + type: array + items: + type: array + items: File + secondaryFiles: + - .bai +outputs: + outfoo: + type: File + secondaryFiles: + - .bai + outbar: + type: + type: array + items: File + secondaryFiles: + - .bai + outfoobar: + type: + type: array + items: + type: array + items: File + secondaryFiles: + - .bai diff --git a/tests/javascript.scm b/tests/javascript.scm index 75936c0..58d2639 100644 --- a/tests/javascript.scm +++ b/tests/javascript.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024–2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -119,7 +119,12 @@ (evaluate-parameter-reference "foo$(inputs.vector)$(inputs.object)"))) (test-equal "evaluate parameter reference with node (without context)" - '(evaluate-javascript (*approximate*) "(inputs.n + 1)" "") + '(evaluate-javascript (*approximate*) + "(inputs.n + 1)" + (string-append "" + "inputs = " (scm->json-string inputs) ";" + "self = " (scm->json-string self) ";" + "runtime = " (scm->json-string runtime) ";")) (gexp->sexp-rec (evaluate-parameter-reference "$(inputs.n + 1)"))) @@ -129,7 +134,12 @@ (if (string? token) token (scm->json-string (canonicalize-json token)))) (list (json-ref runtime "cores") "foo" - (evaluate-javascript (*approximate*) "(inputs.threads*2)" "") + (evaluate-javascript (*approximate*) + "(inputs.threads*2)" + (string-append "" + "inputs = " (scm->json-string inputs) ";" + "self = " (scm->json-string self) ";" + "runtime = " (scm->json-string runtime) ";")) (json-ref inputs "output_filename"))) "") (gexp->sexp-rec @@ -142,7 +152,12 @@ (list "foo" (json-ref inputs "vector") (json-ref inputs "object") - (evaluate-javascript (*approximate*) "(inputs.object.foo*20)" ""))) + (evaluate-javascript (*approximate*) + "(inputs.object.foo*20)" + (string-append "" + "inputs = " (scm->json-string inputs) ";" + "self = " (scm->json-string self) ";" + "runtime = " (scm->json-string runtime) ";")))) "") (gexp->sexp-rec (evaluate-parameter-reference "foo$(inputs.vector)$(inputs.object)$(inputs.object.foo*20)"))) diff --git a/tests/propnet.scm b/tests/propnet.scm index 182eb30..1f48d61 100644 --- a/tests/propnet.scm +++ b/tests/propnet.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024–2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -23,19 +23,30 @@ (test-equal "Trigger propagator with no inputs" '((out . #t)) - (run-propnet (propnet (list (propagator "const" - (const '((out . #t))) - '() - '() - '((out . out)))) - (@@ (ravanan workflow) value=?) - (@@ (ravanan workflow) merge-values) - (scheduler (lambda (proc _) - proc) - (const 'completed) - 0 - (lambda (proc) - (proc)))) - '())) + (run-with-state + (let loop ((mstate (schedule-propnet + (propnet (list (propagator "const" + (const '((out . #t))) + '() + '() + '((out . out)))) + (@@ (ravanan workflow) value=?) + (@@ (ravanan workflow) merge-values) + (scheduler (lambda (proc inputs scheduler) + (state-return proc)) + (lambda (state) + (state-return (state+status state + 'completed))) + (lambda (proc) + (state-return (proc))))) + '()))) + ;; Poll. + (state-let* ((state mstate) + (state+status (poll-propnet state))) + (if (eq? (state+status-status state+status) + 'pending) + (loop (state-return (state+status-state state+status))) + ;; Capture outputs. + (capture-propnet-output (state+status-state state+status))))))) (test-end "propnet") diff --git a/tests/reader.scm b/tests/reader.scm index f3bcdd2..d55e396 100644 --- a/tests/reader.scm +++ b/tests/reader.scm @@ -16,169 +16,185 @@ ;;; You should have received a copy of the GNU General Public License ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. -(use-modules (srfi srfi-1) +(use-modules (srfi srfi-26) (srfi srfi-64) (ice-9 filesystem) (ice-9 match) (web uri) (ravanan reader) (ravanan work command-line-tool) - (ravanan work utils)) - -(define normalize-formal-input - (@@ (ravanan reader) normalize-formal-input)) - -(define normalize-formal-output - (@@ (ravanan reader) normalize-formal-output)) - -(define normalize-input - (@@ (ravanan reader) normalize-input)) - -(define (json=? tree1 tree2) - (cond - ;; Arrays - ((vector? tree1) - (lset= json=? - (vector->list tree1) - (vector->list tree2))) - ;; Dictionaries - ((list? tree1) - (lset= (match-lambda* - (((key1 . value1) (key2 . value2)) - (and (string=? key1 key2) - (json=? value1 value2)))) - tree1 - tree2)) - ;; Atoms - (else - (equal? tree1 tree2)))) + (ravanan work types) + (ravanan work utils) + (ravanan work vectors)) (test-begin "reader") -(test-equal "Coerce number to number" +(test-equal "Coerce to boolean (true)" + #t + (coerce-type "true" 'boolean)) + +(test-equal "Coerce to boolean (false)" + #f + (coerce-type "false" 'boolean)) + +(test-equal "Coerce to int" + 37 + (coerce-type "37" 'int)) + +(test-equal "Coerce to float" + 37.1 + (coerce-type "37.1" 'float)) + +(test-equal "Coerce to double" + 37.1 + (coerce-type "37.1" 'double)) + +(test-equal "Coerce to string" + "37" + (coerce-type "37" 'string)) + +(test-equal "Coerce to File" + '(("class" . "File") + ("location" . "foo")) + (coerce-type '(("class" . "File") + ("location" . "foo")) + 'File)) + +(test-equal "Coerce to array" + #(1 2 3) + (coerce-type #("1" "2" "3") + (cwl-array-type 'int))) + +(test-equal "Coerce to union type (int first)" + 37 + (coerce-type "37" + (cwl-union-type 'int 'string))) + +(test-equal "Coerce to union type (string first)" + "37" + (coerce-type "37" + (cwl-union-type 'string 'int))) + +(test-equal "Coerce int to int" 37 - (coerce-type 37 'number)) - -(test-assert "Normalize File type formal input" - (json=? '(("type" . "File") - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #t))))) - (normalize-formal-input - '(("type" . "File") - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize File array type formal input" - (json=? '(("type" - ("type" . "array") - ("items" . "File")) - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #t))))) - (normalize-formal-input - '(("type" - ("type" . "array") - ("items" . "File")) - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize array of File arrays type formal input" - (json=? '(("type" - ("type" . "array") - ("items" . (("type" . "array") - ("items" . "File")))) - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #t))))) - (normalize-formal-input - '(("type" - ("type" . "array") - ("items" . (("type" . "array") - ("items" . "File")))) - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize File type formal output" - (json=? '(("type" . "File") - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #f))))) - (normalize-formal-output - '(("type" . "File") - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize File array type formal output" - (json=? '(("type" - ("type" . "array") - ("items" . "File")) - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #f))))) - (normalize-formal-output - '(("type" - ("type" . "array") - ("items" . "File")) - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize array of File arrays type formal output" - (json=? '(("type" - ("type" . "array") - ("items" . (("type" . "array") - ("items" . "File")))) - ("id" . "foo") - ("secondaryFiles" . #((("pattern" . ".bai") - ("required" . #f))))) - (normalize-formal-output - '(("type" - ("type" . "array") - ("items" . (("type" . "array") - ("items" . "File")))) - ("id" . "foo") - ("secondaryFiles" . #(".bai")))))) - -(test-assert "Normalize inputs with only location" - (call-with-temporary-directory - (lambda (dir) - (json=? (let ((path (expand-file-name "foo" dir))) - `(("class" . "File") - ("location" . ,(uri->string (build-uri 'file #:path path))) - ("path" . ,path) - ("basename" . "foo") - ("nameroot" . "foo") - ("nameext" . "") - ("size" . 0) - ("checksum" . "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"))) - (call-with-current-directory dir - (lambda () - ;; Create an actual file called "foo" so that canonicalize-path - ;; works. - (call-with-output-file "foo" - (const #t)) - (normalize-input '(("class" . "File") - ("location" . "foo"))))))))) - -(test-assert "Normalize inputs with only path" - (call-with-temporary-directory - (lambda (dir) - (json=? (let ((path (expand-file-name "foo" dir))) - `(("class" . "File") - ("location" . ,(uri->string (build-uri 'file #:path path))) - ("path" . ,path) - ("basename" . "foo") - ("nameroot" . "foo") - ("nameext" . "") - ("size" . 0) - ("checksum" . "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"))) - (call-with-current-directory dir - (lambda () - ;; Create an actual file called "foo" so that canonicalize-path - ;; works. - (call-with-output-file "foo" - (const #t)) - (normalize-input '(("class" . "File") - ("path" . "foo"))))))))) + (coerce-type 37 'int)) + +(test-equal "Normalize inputs with only location" + (canonicalize-json + (let ((path (canonicalize-path "test-data/foo"))) + `(("class" . "File") + ("location" . ,(uri->string (build-uri 'file + #:host "" + #:path path + #:validate? #f))) + ("path" . ,path) + ("basename" . "foo") + ("nameroot" . "foo") + ("nameext" . "") + ("size" . 0) + ("checksum" . "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709")))) + (call-with-values + (cut read-workflow+inputs + "test-data/workflow-with-a-file-input.cwl" + "test-data/input-file-with-location-only.yaml") + (lambda (workflow inputs) + (canonicalize-json (assoc-ref inputs "foo"))))) + +(test-equal "Normalize inputs with only path" + (canonicalize-json + (let ((path (canonicalize-path "test-data/foo"))) + `(("class" . "File") + ("location" . ,(uri->string (build-uri 'file + #:host "" + #:path path + #:validate? #f))) + ("path" . ,path) + ("basename" . "foo") + ("nameroot" . "foo") + ("nameext" . "") + ("size" . 0) + ("checksum" . "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709")))) + (call-with-values + (cut read-workflow+inputs + "test-data/workflow-with-a-file-input.cwl" + "test-data/input-file-with-path-only.yaml") + (lambda (workflow inputs) + (canonicalize-json (assoc-ref inputs "foo"))))) + +(test-equal "Read YAML inputs file with type ambiguities" + '(("number" . 13) + ("flag" . #t) + ("reverseflag" . #f) + ("foo" . "bar") + ("arr" . #(1 2 3))) + (call-with-values + (cut read-workflow+inputs + "test-data/workflow-for-inputs-with-type-ambiguities.cwl" + "test-data/inputs-with-type-ambiguities.yaml") + (lambda (workflow inputs) + inputs))) + +(test-equal "Resolve type ambiguities in workflow default inputs" + '(("number" . 13) + ("flag" . #t) + ("reverseflag" . #f) + ("foo" . "bar") + ("arr" . #(1 2 3))) + (call-with-values + (cut read-workflow+inputs + "test-data/workflow-with-default-inputs.cwl" + "test-data/empty.yaml") + (lambda (workflow inputs) + (vector-map->list (lambda (input) + (cons (assoc-ref input "id") + (assoc-ref input "default"))) + (assoc-ref workflow "inputs"))))) + +(test-equal "Normalize File type formals" + (list (vector-map canonicalize-json + #((("id" . "infoo") + ("type" . "File") + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #t))))) + (("id" . "inbar") + ("type" + ("type" . "array") + ("items" . "File")) + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #t))))) + (("id" . "infoobar") + ("type" + ("type" . "array") + ("items" . (("type" . "array") + ("items" . "File")))) + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #t))))))) + (vector-map canonicalize-json + #((("id" . "outfoo") + ("type" . "File") + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #f))))) + (("id" . "outbar") + ("type" + ("type" . "array") + ("items" . "File")) + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #f))))) + (("id" . "outfoobar") + ("type" + ("type" . "array") + ("items" . (("type" . "array") + ("items" . "File")))) + ("secondaryFiles" . #((("pattern" . ".bai") + ("required" . #f)))))))) + (call-with-values + (cut read-workflow+inputs + "test-data/workflow-with-various-file-type-formals.cwl" + "test-data/empty.yaml") + (lambda (workflow inputs) + (list (vector-map canonicalize-json + (assoc-ref workflow "inputs")) + (vector-map canonicalize-json + (assoc-ref workflow "outputs")))))) (test-end "reader") diff --git a/tests/store.scm b/tests/store.scm new file mode 100644 index 0000000..f209583 --- /dev/null +++ b/tests/store.scm @@ -0,0 +1,80 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(use-modules (srfi srfi-64) + (ravanan store)) + +(test-begin "store") + +(test-equal "step-store-files-directory must be insensitive to order of inputs" + (step-store-files-directory "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-files-directory "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-data-file must be insensitive to order of inputs" + (step-store-data-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-data-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-stdout-file must be insensitive to order of inputs" + (step-store-stdout-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-stdout-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-stderr-file must be insensitive to order of inputs" + (step-store-stderr-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-stderr-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-end "store") diff --git a/tests/work/command-line-tool.scm b/tests/work/command-line-tool.scm index 3d11b53..93b97d4 100644 --- a/tests/work/command-line-tool.scm +++ b/tests/work/command-line-tool.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -24,6 +24,14 @@ (test-equal "match null object to array type" #f - (match-type 'null (array-type 'File))) + (match-type 'null (cwl-array-type 'File))) + +(test-equal "match int to float" + 'float + (match-type 1 'float)) + +(test-equal "match float to float" + 'float + (match-type 1.3 'float)) (test-end "work.command-line-tool") diff --git a/tests/workflow.scm b/tests/workflow.scm new file mode 100644 index 0000000..06b2609 --- /dev/null +++ b/tests/workflow.scm @@ -0,0 +1,31 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(use-modules (srfi srfi-64)) + +(define optional-input? + (@@ (ravanan workflow) optional-input?)) + +(test-begin "workflow") + +(test-assert "Inputs that have a boolean #f default are also optional" + (optional-input? `(("id" . "foo") + ("type" . "boolean") + ("default" . #f)))) + +(test-end "workflow") diff --git a/website/style.css b/website/style.css new file mode 100644 index 0000000..ae17e5f --- /dev/null +++ b/website/style.css @@ -0,0 +1,48 @@ +@font-face { + font-family: 'Charter'; + src: url('/fonts/charter_regular.woff2') format('woff2'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'Fira Code'; + src: url('/fonts/FiraCode-Regular.woff2') format('woff2'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'Fira Code'; + src: url('/fonts/FiraCode-SemiBold.woff2') format('woff2'); + font-weight: 600; + font-style: normal; +} + +body { + margin: 40px auto; + max-width: 900px; + line-height: 1.6; + font-family: 'Charter'; + font-size: 20px; + padding: 0 10px; +} + +pre, code, samp { + font-family: 'Fira Code'; +} + +pre { + background-color: #f0f0f0; + padding: 1em; + font-size: 0.9em; +} + +code, samp { + font-size: 0.8em; + font-weight: 600; +} + +img { + max-width: 100%; +} |
