diff options
-rw-r--r-- | .dir-locals.el | 1 | ||||
-rw-r--r-- | .guix/cwl-conformance.scm | 114 | ||||
-rw-r--r-- | .guix/cwltest-package.scm | 117 | ||||
-rw-r--r-- | .guix/e2e-tests.scm | 78 | ||||
-rw-r--r-- | .guix/ravanan-package.scm | 13 | ||||
-rw-r--r-- | HACKING.md | 18 | ||||
-rw-r--r-- | README.md | 13 | ||||
-rwxr-xr-x | bin/ravanan | 63 | ||||
-rw-r--r-- | cwl-conformance/LICENSE | 2 | ||||
-rw-r--r-- | cwl-conformance/badgegen.py | 37 | ||||
-rw-r--r-- | cwl-conformance/commonwl.svg | 1 | ||||
-rw-r--r-- | cwl-conformance/manifest.scm | 2 | ||||
-rw-r--r-- | e2e-tests/jobs/hello-world.yaml | 1 | ||||
-rw-r--r-- | e2e-tests/manifest.scm | 2 | ||||
-rw-r--r-- | e2e-tests/tests.yaml | 9 | ||||
-rw-r--r-- | e2e-tests/tools/hello-world.scm | 3 | ||||
-rw-r--r-- | manifest.scm | 16 | ||||
-rw-r--r-- | ravanan/command-line-tool.scm | 1055 | ||||
-rw-r--r-- | ravanan/job-state.scm | 15 | ||||
-rw-r--r-- | ravanan/propnet.scm | 22 | ||||
-rw-r--r-- | ravanan/reader.scm | 119 | ||||
-rw-r--r-- | ravanan/single-machine.scm | 6 | ||||
-rw-r--r-- | ravanan/store.scm | 104 | ||||
-rw-r--r-- | ravanan/utils.scm | 4 | ||||
-rw-r--r-- | ravanan/work/command-line-tool.scm | 211 | ||||
-rw-r--r-- | ravanan/work/ui.scm | 52 | ||||
-rw-r--r-- | ravanan/workflow.scm | 323 | ||||
-rw-r--r-- | tests/store.scm | 80 |
28 files changed, 1703 insertions, 778 deletions
diff --git a/.dir-locals.el b/.dir-locals.el index 6b07349..153cb4a 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -14,4 +14,5 @@ (eval . (put 'call-with-inferior 'scheme-indent-function 1)) (eval . (put 'maybe-let* 'scheme-indent-function 1)) (eval . (put 'maybe-assoc-set 'scheme-indent-function 1)) + (eval . (put 'run-with-state 'scheme-indent-function 0)) (eval . (put 'state-let* 'scheme-indent-function 1)))) diff --git a/.guix/cwl-conformance.scm b/.guix/cwl-conformance.scm new file mode 100644 index 0000000..3e25c71 --- /dev/null +++ b/.guix/cwl-conformance.scm @@ -0,0 +1,114 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (cwl-conformance) + #:use-module ((cwltest-package) #:select (cwltest)) + #:use-module ((ravanan-package) #:select (ravanan)) + #:use-module ((gnu packages python) #:select (python)) + #:use-module ((gnu packages python-web) #:select (python-pybadges)) + #:use-module (guix gexp) + #:use-module (guix git-download) + #:use-module (guix packages) + #:use-module (guix profiles) + #:use-module (guix utils) + #:use-module (ice-9 match) + #:export (cwltest-suite-gexp)) + +(define* (cwltest-suite-gexp cwltest-suite manifest-file #:key (skip-tests '())) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (ice-9 match)) + + ;; Guix peeks into HOME. + (setenv "HOME" (getcwd)) + ;; cwltest writes out output directories to TMPDIR, but does not clean + ;; up after. So, we set TMPDIR to our own temporary directory that we + ;; can manage easily. See pending issue on cleaning up temporary output + ;; directories: + ;; https://github.com/common-workflow-language/cwltest/issues/249 + (mkdir "tmpdir") + (setenv "TMPDIR" "tmpdir") + (apply invoke + #$(file-append cwltest "/bin/cwltest") + "--test" #$cwltest-suite + "--tool" #$(file-append ravanan "/bin/ravanan") + "--badgedir" "badges" + (append '#$(match skip-tests + (() '()) + (_ (list "-S" (string-join skip-tests ",")))) + (list "--" + "--store=store" + (string-append "--guix-manifest=" #$manifest-file))))))) + +(define cwl-v1.2-conformance-suite + (let ((version "1.2.1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/common-workflow-language/cwl-v1.2") + (commit (string-append "v" version)))) + (file-name (git-file-name "cwl-v1.2" version)) + (sha256 + (base32 + "03q8pd0niaaff52n6sn07l3rjnvwi4da649lnc8mn928sh0vywf3"))))) + +(define-public cwl-v1.2-conformance + (program-file "cwl-v1.2-conformance" + (cwltest-suite-gexp + (file-append cwl-v1.2-conformance-suite + "/conformance_tests.yaml") + (local-file "../cwl-conformance/manifest.scm") + ;; With these tests, evil things happen and too much memory is + ;; consumed. So, disable for now. + #:skip-tests (list "env_home_tmpdir" + "env_home_tmpdir_docker" + "env_home_tmpdir_docker_no_return_code")))) + +(define generate-badges-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (ice-9 match)) + + (match (command-line) + ((_ cwltest-badgedir output-directory) + (set-path-environment-variable + "GUIX_PYTHONPATH" + '(#$(string-append "lib/python" + (version-major+minor (package-version python)) + "/site-packages")) + (list #$(profile + (content (packages->manifest + (list python python-pybadges)))))) + (invoke #$(file-append python "/bin/python3") + #$(local-file "../cwl-conformance/badgegen.py") + cwltest-badgedir + #$(local-file "../cwl-conformance/commonwl.svg") + output-directory)) + ((program _ ...) + (format (current-error-port) + "Usage: ~a CWLTEST_BADGEDIR OUTPUT-DIRECTORY~%" + program) + (exit #f)))))) + +(define-public generate-badges + (program-file "generate-badges" + generate-badges-gexp)) + +cwl-v1.2-conformance diff --git a/.guix/cwltest-package.scm b/.guix/cwltest-package.scm new file mode 100644 index 0000000..f3eeb9d --- /dev/null +++ b/.guix/cwltest-package.scm @@ -0,0 +1,117 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (cwltest-package) + #:use-module ((gnu packages bioinformatics) + #:select (cwltool python-schema-salad)) + #:use-module ((gnu packages check) + #:select (python-pytest python-pytest-xdist)) + #:use-module ((gnu packages node) #:select (node)) + #:use-module ((gnu packages python-build) + #:select (python-setuptools python-setuptools-scm python-wheel)) + #:use-module ((gnu packages python-check) #:select (python-junit-xml)) + #:use-module ((gnu packages xml) #:select (python-defusedxml)) + #:use-module (guix build-system pyproject) + #:use-module (guix build-system python) + #:use-module (guix download) + #:use-module (guix gexp) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (guix packages)) + +;; cwltest uses cwltool as a library. so, create a library version of cwltool +;; where inputs become propagated inputs. +(define python-cwltool + (package + (inherit cwltool) + (name "python-cwltool") + (inputs + (list node)) + (propagated-inputs + (modify-inputs (package-inputs cwltool) + (delete "node"))))) + +;; cwltest requires cwl-runner, the implementation-agnostic entry point to +;; cwltool, for its tests. +(define cwl-runner + (file-union "cwl-runner" + `(("bin/cwl-runner" ,(file-append cwltool "/bin/cwltool"))))) + +(define-public cwltest + (package + (name "cwltest") + (version "2.6.20250314152537") + (source + (origin + (method url-fetch) + (uri (pypi-uri "cwltest" version)) + (sha256 + (base32 "0h2w9bllb6cz8d5pja5lbbd1kj08yga40jdi3300anwllczcnfq6")))) + (build-system pyproject-build-system) + (arguments + (list #:modules `((rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (srfi srfi-171) + (guix build pyproject-build-system) + (guix build utils)) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'disable-docker-in-tests + (lambda _ + ;; Remove DockerRequirement (lines 7–10). + (let* ((file "tests/test-data/v1.0/cat1-testcli.cwl") + (lines (call-with-input-file file + (cut port-transduce + identity + rcons + get-line + <>)))) + (call-with-output-file file + (lambda (port) + (for-each (lambda (line) + (display line port) + (newline port)) + (append (take lines 6) + (drop lines 10)))))))) + (add-after 'install 'fix-permissions + (lambda* (#:key inputs outputs #:allow-other-keys) + ;; Make test scripts executable. + (for-each (lambda (file) + (chmod (string-append (site-packages inputs outputs) + "/cwltest/tests/test-data/" + file) + #o755)) + (list "dummy-executor.sh" + "mock_cwl_runner.py"))))))) + (inputs (list python-defusedxml + python-junit-xml + python-pytest + python-schema-salad)) + (native-inputs (list cwl-runner + python-cwltool + python-pytest + python-pytest-xdist + python-setuptools + python-setuptools-scm + python-wheel)) + (home-page "https://github.com/common-workflow-language/cwltest") + (synopsis "Common Workflow Language testing framework") + (description "Common Workflow Language testing framework.") + (license license:asl2.0))) + +cwltest diff --git a/.guix/e2e-tests.scm b/.guix/e2e-tests.scm new file mode 100644 index 0000000..58ba480 --- /dev/null +++ b/.guix/e2e-tests.scm @@ -0,0 +1,78 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(define-module (e2e-tests) + #:use-module ((cwl-conformance) #:select (cwltest-suite-gexp)) + #:use-module ((gnu packages bioinformatics) #:select (ccwl)) + #:use-module (guix gexp) + #:use-module (ice-9 match)) + +(define (ccwl-compile source-file) + #~(begin + (use-modules (rnrs io ports) + (srfi srfi-26) + (ice-9 match) + (ice-9 popen)) + + (define (call-with-input-pipe command proc) + (match command + ((prog args ...) + (let ((port #f)) + (dynamic-wind + (lambda () + (set! port (apply open-pipe* OPEN_READ prog args))) + (cut proc port) + (lambda () + (unless (zero? (close-pipe port)) + (error "Command invocation failed" command)))))))) + + (call-with-output-file #$output + (cut display + (call-with-input-pipe '(#$(file-append ccwl "/bin/ccwl") + "compile" + #$source-file) + get-string-all) + <>)))) + +(define e2e-tools-ccwl-sources + `(("hello-world.scm" . ,(local-file "../e2e-tests/tools/hello-world.scm")))) + +(define e2e-tools + (file-union "e2e-tools" + (map (match-lambda + ((ccwl-source-filename . ccwl-source-file) + (let ((cwl-filename (string-append (basename ccwl-source-filename ".scm") + ".cwl"))) + (list cwl-filename + (computed-file cwl-filename + (ccwl-compile ccwl-source-file)))))) + e2e-tools-ccwl-sources))) + +(define e2e-test-suite + (file-union "e2e-test-suite" + `(("tests.yaml" ,(local-file "../e2e-tests/tests.yaml")) + ("tools" ,e2e-tools) + ("jobs" ,(local-file "../e2e-tests/jobs" + #:recursive? #t))))) + +(define-public e2e-tests + (program-file "e2e-tests" + (cwltest-suite-gexp (file-append e2e-test-suite "/tests.yaml") + (local-file "../e2e-tests/manifest.scm")))) + +e2e-tests diff --git a/.guix/ravanan-package.scm b/.guix/ravanan-package.scm index a59ec68..508e8ce 100644 --- a/.guix/ravanan-package.scm +++ b/.guix/ravanan-package.scm @@ -29,7 +29,16 @@ (source (local-file ".." "ravanan-checkout" #:recursive? #t - #:select? (or (git-predicate (dirname (current-source-directory))) - (const #t)))))) + #:select? (lambda (file stat) + ;; If .guix is included, changes to other + ;; files under .guix—such as the CWL + ;; conformance tests—unnecessarily trigger a + ;; rebuild of ravanan. This could be a + ;; nuisance when hacking on the CWL + ;; conformance test scripts. + (and (not (string-contains file "/.guix/")) + ((or (git-predicate (dirname (current-source-directory))) + (const #t)) + file stat))))))) ravanan diff --git a/HACKING.md b/HACKING.md new file mode 100644 index 0000000..85cb4d4 --- /dev/null +++ b/HACKING.md @@ -0,0 +1,18 @@ +# Set up development environment + +Drop into a development environment using `guix shell`. This shell includes additional packages required for development, and not simply the dependencies required to build ravanan. +``` +guix shell -L .guix -Df manifest.scm +``` + +# Run end-to-end tests + +ravanan comes with a suite of end-to-end tests under `e2e-tests`. End-to-end tests require a running Guix daemon. To run them, create and change into a new empty directory. +``` +mkdir rundir +cd rundir +``` +Then, build and run the tests. +``` +$(guix build -L ../.guix -f ../.guix/e2e-tests.scm) +``` diff --git a/README.md b/README.md index f511d61..612d96e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[](https://ci.systemreboot.net/jobs/ravanan) +[](https://ci.systemreboot.net/jobs/ravanan) [](#cwl-v1.2-conformance) ravanan (pronounced rah-vun-un, IPA: rɑːvʌnʌn, Shavian: 𐑮𐑭𐑝𐑳𐑯𐑳𐑯) is a [Common Workflow Language (CWL)](https://www.commonwl.org/) implementation that is powered by [GNU Guix](https://guix.gnu.org/) and provides strong reproducibility guarantees. ravanan provides strong bullet-proof caching ([work reuse](https://www.commonwl.org/v1.2/CommandLineTool.html#WorkReuse)) so you never run the same step of your workflow twice, nor do you have to keep track of which steps were run and with what parameters: ravanan remembers everything for you. ravanan captures logs from every step of your workflow so you can always trace back in case of job failures. @@ -19,6 +19,7 @@ ravanan currently runs on single machines and on slurm via its [API](https://slu - [On HPC using slurm](#on-hpc-using-slurm) - [Using a specific version of Guix](#using-a-specific-version-of-guix) - [Referencing local scripts](#referencing-local-scripts) +- [CWL conformance](#cwl-conformance) - [License](#license) - [The Name](#the-name) @@ -151,6 +152,16 @@ Thanks to Guix, a better way is possible. Guix, in its magnificent elegance, doe Please send questions, feedback, bug reports and patches to [ravanan@systemreboot.net](mailto:ravanan@systemreboot.net). You may also browse the [archives](https://lists.systemreboot.net/ravanan) of previous conversations. We do not accept issues or pull requests on GitHub. Thank you! +# CWL conformance + +CWL conformance is a work in progress. We're getting there! + +## [CWL v1.2 conformance](https://ci.systemreboot.net/jobs/ravanan-cwl-v1.2-conformance) + + + +                           + # License ravanan is free software released under the terms of the [GNU General Public License](https://www.gnu.org/licenses/gpl.html), either version 3 of the License, or (at your option) any later version. diff --git a/bin/ravanan b/bin/ravanan index 6ad5167..fbf3b05 100755 --- a/bin/ravanan +++ b/bin/ravanan @@ -36,7 +36,9 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (ravanan utils) (ravanan verbosity) (ravanan workflow) - (ravanan work utils)) + (ravanan work ui) + (ravanan work utils) + (ravanan work vectors)) (define %options (list (option (list "batch-system" "batchSystem") #t #f @@ -44,7 +46,7 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (if (member arg (list "single-machine" "slurm-api")) (acons 'batch-system (string->symbol arg) result) - (error "Unknown batch system" arg)))) + (user-error "Unknown batch system ~a" arg)))) (option (list "guix-channels") #t #f (lambda (opt name arg result) (acons 'guix-channels-file arg result))) @@ -79,6 +81,14 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (lambda (opt name arg result) (acons 'outdir arg result))) + (option (list "log-level") #t #f + (lambda (opt name arg result) + (let ((accepted-values (list "debug" "info" "warning" + "error" "critical"))) + (if (member (string-downcase arg) accepted-values) + (acons 'log-level (string->symbol (string-downcase arg)) + result) + (user-error "Unknown log level ~a" arg))))) (option (list "trace") #t #f (lambda (opt name arg result) (let ((accepted-values (list "slurm-api"))) @@ -87,7 +97,11 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (cons 'traces (cons (string->symbol arg) (assq-ref result 'traces)))) - (error "Unknown trace subsystem" arg))))) + (user-error "Unknown trace subsystem ~a" arg))))) + (option (list "quiet") #f #f + (lambda (opt name arg result) + (acons 'quiet arg + result))) (option (list "help") #f #t (lambda (opt name arg result) (acons 'help #t result))) @@ -96,7 +110,13 @@ exec guile --no-auto-compile -e main -s "$0" "$@" (acons 'version #t result))))) (define (invalid-option opt name arg result) - (error "Invalid option" name)) + (user-error "Invalid option ~a" name)) + +(define (print-short-usage program) + (format (current-error-port) + "Usage: ~a [OPTIONS] CWL-WORKFLOW INPUTS +Run CWL-WORKFLOW with INPUTS.~%" + program)) (define (print-usage program) (format (current-error-port) @@ -129,9 +149,14 @@ Slurm API batch system options: Debugging options: + --log-level=LEVEL log messages with higher severity than LEVEL + (accepted values: debug, info, warning, + error, critical) --trace=SUBSYSTEM enable tracing on subsystem; - repeat to trace multiple subsystems + repeat to trace more than one subsystem (accepted values: slurm-api) + --quiet disable all logging and tracing; + overrides --log-level and --trace " program)) @@ -200,6 +225,7 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (slurm-api-endpoint . ,(build-uri 'http #:host "localhost" #:port 6820)) + (log-level . warning) (traces . ()))))) (when (assq-ref args 'help) (print-usage program) @@ -211,13 +237,13 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (exit #t)) ;; Check for required arguments. (unless (assq-ref args 'store) - (error "--store not specified")) + (user-error "--store not specified")) (case (assq-ref args 'batch-system) ((slurm-api) (unless (assq-ref args 'scratch) - (error "--scratch not specified")) + (user-error "--scratch not specified")) (unless (assq-ref args 'slurm-jwt) - (error "--slurm-jwt not specified")))) + (user-error "--slurm-jwt not specified")))) (match (reverse (assq-ref args 'args)) ((workflow-file inputs-file) ;; We must not try to compile guix manifest files. @@ -238,15 +264,19 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." (let ((file (manifest-file-error-file c))) (cond ((not file) - (error "--guix-manifest not specified")) + (user-error "--guix-manifest not specified")) ((not (file-exists? file)) - (error "Manifest file ~a does not exist" - file)) + (user-error "Manifest file ~a does not exist" + file)) (else - (error "Error loading manifest file" - file) + (user-error "Error loading manifest file ~a" + file) (raise-exception c)))))) - (parameterize ((%traces (assq-ref args 'traces))) + (parameterize ((%log-level (and (not (assq-ref args 'quiet)) + (assq-ref args 'log-level))) + (%traces (if (assq-ref args 'quiet) + '() + (assq-ref args 'traces)))) (run-workflow (file-name-stem workflow-file) (and (assq 'guix-manifest-file args) (canonicalize-path @@ -282,4 +312,7 @@ files that have the token in the @verbatim{SLURM_JWT=token} format." outputs) outputs) #:pretty #t)) - (newline))))))) + (newline)) + (_ + (print-short-usage program) + (exit #f))))))) diff --git a/cwl-conformance/LICENSE b/cwl-conformance/LICENSE new file mode 100644 index 0000000..b1e57b6 --- /dev/null +++ b/cwl-conformance/LICENSE @@ -0,0 +1,2 @@ +commonwl.svg is under the ISC license and is sourced from +https://github.com/badgen/badgen-icons/blob/master/icons/commonwl.svg \ No newline at end of file diff --git a/cwl-conformance/badgegen.py b/cwl-conformance/badgegen.py new file mode 100644 index 0000000..1e8f5f0 --- /dev/null +++ b/cwl-conformance/badgegen.py @@ -0,0 +1,37 @@ +### ravanan --- High-reproducibility CWL runner powered by Guix +### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +### +### This file is part of ravanan. +### +### ravanan is free software: you can redistribute it and/or modify it +### under the terms of the GNU General Public License as published by +### the Free Software Foundation, either version 3 of the License, or +### (at your option) any later version. +### +### ravanan is distributed in the hope that it will be useful, but +### WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +### General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +import json +from pathlib import Path +from pybadges import badge +import sys + +match sys.argv: + case [_, cwltest_badgedir, cwl_icon_file, output_directory]: + for summary_file in Path(cwltest_badgedir).glob("*.json"): + with open(summary_file) as file: + summary = json.load(file) + with (Path(output_directory) / (summary_file.stem + ".svg")).open("w") as file: + file.write(badge(left_text=summary["subject"], + right_text=summary["status"], + right_color=summary["color"], + logo=cwl_icon_file, + embed_logo=True)) + case [program, *_]: + print(f"Usage: {program} CWLTEST_BADGEDIR CWL_ICON_FILE OUTPUT_DIRECTORY") + sys.exit(1) diff --git a/cwl-conformance/commonwl.svg b/cwl-conformance/commonwl.svg new file mode 100644 index 0000000..0eeb627 --- /dev/null +++ b/cwl-conformance/commonwl.svg @@ -0,0 +1 @@ +<svg fill="#fff" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12.063 9.205l-3.565 3.568 3.518 3.512 1.545-1.545-1.975-1.967 2.023-2.023-1.546-1.545zm1.543 13.218L11.6 20.375l3.786-3.815-1.559-1.573-5.33 5.403.115.114-.015.008 3.456 3.484zm1.896-13.565l-3.685-3.541 3.685-3.722L13.936 0 8.598 5.352l.108.101v.007l5.252 4.943z"/></svg> diff --git a/cwl-conformance/manifest.scm b/cwl-conformance/manifest.scm new file mode 100644 index 0000000..9cf33d5 --- /dev/null +++ b/cwl-conformance/manifest.scm @@ -0,0 +1,2 @@ +(specifications->manifest + (list "coreutils" "python-wrapper")) diff --git a/e2e-tests/jobs/hello-world.yaml b/e2e-tests/jobs/hello-world.yaml new file mode 100644 index 0000000..5329e73 --- /dev/null +++ b/e2e-tests/jobs/hello-world.yaml @@ -0,0 +1 @@ +message: Hello world! diff --git a/e2e-tests/manifest.scm b/e2e-tests/manifest.scm new file mode 100644 index 0000000..da7eb55 --- /dev/null +++ b/e2e-tests/manifest.scm @@ -0,0 +1,2 @@ +(specifications->manifest + (list "coreutils")) diff --git a/e2e-tests/tests.yaml b/e2e-tests/tests.yaml new file mode 100644 index 0000000..5c0ab22 --- /dev/null +++ b/e2e-tests/tests.yaml @@ -0,0 +1,9 @@ +- id: hello-world + doc: Hello world! + tool: tools/hello-world.cwl + job: jobs/hello-world.yaml + output: + output_message: + class: File + size: 13 + checksum: sha1$47a013e660d408619d894b20806b1d5086aab03b diff --git a/e2e-tests/tools/hello-world.scm b/e2e-tests/tools/hello-world.scm new file mode 100644 index 0000000..a037a36 --- /dev/null +++ b/e2e-tests/tools/hello-world.scm @@ -0,0 +1,3 @@ +(command #:inputs (message #:type string) + #:run "echo" message + #:outputs (output_message #:type stdout)) diff --git a/manifest.scm b/manifest.scm new file mode 100644 index 0000000..12ba0e0 --- /dev/null +++ b/manifest.scm @@ -0,0 +1,16 @@ +(use-modules ((gnu packages bioinformatics) #:select (ccwl)) + ((cwltest-package) #:select (cwltest)) + ((ravanan-package) #:select (ravanan)) + (srfi srfi-1)) + +(define (manifest-cons* . args) + "ARGS is of the form (PACKAGES ... ONTO-MANIFEST). Return a manifest +with PACKAGES and all packages in ONTO-MANIFEST." + (let ((packages (drop-right args 1)) + (onto-manifest (last args))) + (manifest (append (map package->manifest-entry packages) + (manifest-entries onto-manifest))))) + +(manifest-cons* ccwl + cwltest + (package->development-manifest ravanan)) diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm index c47bb3c..356fb37 100644 --- a/ravanan/command-line-tool.scm +++ b/ravanan/command-line-tool.scm @@ -53,9 +53,11 @@ #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) - #:export (run-command-line-tool + #:export (build-command-line-tool-script + run-command-line-tool check-requirements inherit-requirements + find-requirement %command-line-tool-supported-requirements command-line-tool-supported-requirements capture-command-line-tool-output @@ -95,15 +97,6 @@ (secondary-files formal-output-secondary-files) (binding formal-output-binding)) -(define-immutable-record-type <command-line-binding> - (command-line-binding position prefix type value item-separator) - command-line-binding? - (position command-line-binding-position) - (prefix command-line-binding-prefix) - (type command-line-binding-type) - (value command-line-binding-value) - (item-separator command-line-binding-item-separator)) - (define-immutable-record-type <output-binding> (output-binding glob load-contents? load-listing? output-eval) output-binding? @@ -118,7 +111,7 @@ supported-requirements #:optional hint?) "Error out if any of @var{requirements} are not supported by @var{batch-system}. -If @var{hint?} is @code{#t}, only print a warning. +If @var{hint?} is @code{#t}, only log a warning. @var{supported-requirements-for-batch-system} is a function that when passed a batch system returns the requirements supported by it. @var{supported-requirements} is the list of requirements supported by at least @@ -292,104 +285,45 @@ G-expressions are inserted." (< (command-line-binding-position binding1) (command-line-binding-position binding2)))))) -(define (command-line-binding->args binding) - "Return a list of arguments for @var{binding}. The returned list may -contain strings or G-expressions. The G-expressions may reference an -@code{inputs-directory} variable that must be defined in the context -in which the G-expressions are inserted." - (let ((prefix (command-line-binding-prefix binding)) - (type (command-line-binding-type binding)) - (value (command-line-binding-value binding))) - (cond - ((eq? type 'boolean) - (if value - ;; TODO: Error out if boolean input has no prefix? - (maybe->list prefix) - (list))) - ((eq? type 'null) (list)) - ((array-type? type) - (match value - ;; Empty arrays should be noops. - (() (list)) - (_ - (let ((args (append-map command-line-binding->args - value))) - (append (maybe->list prefix) - (from-maybe - (maybe-let* ((item-separator (command-line-binding-item-separator binding))) - (just (list #~(string-join (list #$@args) - #$item-separator)))) - args)))))) - (else - (append (maybe->list prefix) - (list (case type - ((string) - value) - ((int float) - #~(number->string #$value)) - ((File) - #~(assoc-ref* #$value "path")) - (else - (user-error "Invalid formal input type ~a" - type))))))))) - -(define* (build-gexp-script name exp #:optional guix-daemon-socket) - "Build script named @var{name} using G-expression @var{exp}. - -When @var{guix-daemon-socket} is provided, connect to that Guix daemon." - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-gexp-script name exp)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv (gexp->script name exp))) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) +(define* (build-gexp-script name exp) + "Build script named @var{name} using G-expression @var{exp}. Return the path to +the built script as a monadic value." + (mlet %store-monad ((drv (gexp->script name exp))) + (mbegin %store-monad + (built-derivations (list drv)) + (return (derivation->output-path drv))))) -(define* (run-command-line-tool name manifest-file channels cwl inputs - scratch store batch-system - #:key guix-daemon-socket) - "Run @code{CommandLineTool} class workflow @var{cwl} named @var{name} with -@var{inputs} using tools from Guix manifest in @var{manifest-file}. Return a -state-monadic job state object. +(define* (run-command-line-tool name script inputs resource-requirement + store batch-system) + "Run @code{CommandLineTool} class workflow @var{script} named @var{name} with +@var{inputs}. Return a state-monadic job state object. -@var{channels}, @var{scratch}, @var{store}, @var{batch-system} and -@var{guix-daemon-socket} are the same as in @code{run-workflow} from +@var{resource-requirement} is the @code{ResourceRequirement} of the workflow. +@var{store} and @var{batch-system} are the same as in @code{run-workflow} from @code{(ravanan workflow)}." - (let* ((script - (build-command-line-tool-script name manifest-file channels cwl inputs - scratch store batch-system - guix-daemon-socket)) - (requirements (inherit-requirements (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()))) - (cpus (from-maybe - (maybe-bind (maybe-assoc-ref (find-requirement requirements - "ResourceRequirement") - "coresMin") - (compose just - inexact->exact - ceiling - (cut coerce-type <> 'number) - (cut coerce-expression - <> - `(("inputs" . ,inputs))))) - 1)) - (store-files-directory (script->store-files-directory script store)) - (store-data-file (script->store-data-file script store)) - (stdout-file (script->store-stdout-file script store)) - (stderr-file (script->store-stderr-file script store))) + (let ((cpus (from-maybe + (maybe-bind (maybe-assoc-ref resource-requirement + "coresMin") + (compose just + inexact->exact + ceiling + (cut coerce-type <> 'number) + (cut coerce-expression + <> + `(("inputs" . ,inputs))))) + 1)) + (store-files-directory (step-store-files-directory script inputs store)) + (store-data-file (step-store-data-file script inputs store)) + (stdout-file (step-store-stdout-file script inputs store)) + (stderr-file (step-store-stderr-file script inputs store))) (if (file-exists? store-data-file) ;; Return a dummy success state object if script has already ;; been run successfully. (state-return (begin - (format (current-error-port) - "~a previously run; retrieving result from store~%" - script) - (single-machine-job-state script #t))) + (log-info "~a previously run; retrieving result from store" + script) + (single-machine-job-state script inputs #t))) ;; Run script if it has not already been run. (begin ;; Delete output files directory if an incomplete one exists @@ -401,44 +335,40 @@ state-monadic job state object. (when (file-exists? store-files-directory) (delete-file-recursively store-files-directory)) (mkdir store-files-directory) - (cond - ((eq? batch-system 'single-machine) - (state-let* ((success? (single-machine:submit-job - `(("WORKFLOW_OUTPUT_DIRECTORY" . - ,store-files-directory) - ("WORKFLOW_OUTPUT_DATA_FILE" . - ,store-data-file)) - stdout-file - stderr-file - script))) - (state-return (single-machine-job-state script success?)))) - ((slurm-api-batch-system? batch-system) - (state-let* ((job-id - (slurm:submit-job `(("WORKFLOW_OUTPUT_DIRECTORY" . - ,store-files-directory) - ("WORKFLOW_OUTPUT_DATA_FILE" . - ,store-data-file)) - stdout-file - stderr-file - cpus - name - script - #:api-endpoint (slurm-api-batch-system-endpoint batch-system) - #:jwt (slurm-api-batch-system-jwt batch-system) - #:partition (slurm-api-batch-system-partition batch-system) - #:nice (slurm-api-batch-system-nice batch-system)))) - (format (current-error-port) - "~a submitted as job ID ~a~%" - script - job-id) - (state-return (slurm-job-state script job-id)))) - (else - (assertion-violation batch-system "Invalid batch system"))))))) + (let ((environment + `(("WORKFLOW_INPUTS" . ,(scm->json-string inputs)) + ("WORKFLOW_OUTPUT_DIRECTORY" . ,store-files-directory) + ("WORKFLOW_OUTPUT_DATA_FILE" . ,store-data-file)))) + (cond + ((eq? batch-system 'single-machine) + (state-let* ((success? (single-machine:submit-job environment + stdout-file + stderr-file + script))) + (state-return (single-machine-job-state script inputs success?)))) + ((slurm-api-batch-system? batch-system) + (state-let* ((job-id + (slurm:submit-job environment + stdout-file + stderr-file + cpus + name + script + #:api-endpoint (slurm-api-batch-system-endpoint batch-system) + #:jwt (slurm-api-batch-system-jwt batch-system) + #:partition (slurm-api-batch-system-partition batch-system) + #:nice (slurm-api-batch-system-nice batch-system)))) + (log-info "~a submitted as job ID ~a~%" + script + job-id) + (state-return (slurm-job-state script inputs job-id)))) + (else + (assertion-violation batch-system "Invalid batch system")))))))) -(define (capture-command-line-tool-output script store) +(define (capture-command-line-tool-output script inputs store) "Capture and return output of @code{CommandLineTool} class workflow that ran -@var{script}. @var{store} is the path to the ravanan store." - (let* ((store-data-file (script->store-data-file script store)) +@var{script} with @var{inputs}. @var{store} is the path to the ravanan store." + (let* ((store-data-file (step-store-data-file script inputs store)) (output-json (call-with-input-file store-data-file json->scm))) ;; Recursively rewrite file paths in output JSON. @@ -455,7 +385,7 @@ state-monadic job state object. (string=? (assoc-ref tree "class") "File")) (let* ((store-files-directory - (script->store-files-directory script store)) + (step-store-files-directory script inputs store)) (path (expand-file-name (relative-file-name (assoc-ref tree "path") store-files-directory) @@ -478,52 +408,6 @@ state-monadic job state object. (call-with-input-file store-data-file json->scm))) -(define (copy-input-files-gexp inputs) - "Return a G-expression that copies @code{File} type inputs (along with secondary -files) from @var{inputs} into @code{inputs-directory} and return a new -association list with updated @code{location} and @code{path} fields. - -The returned G-expression will reference an @code{inputs-directory} variable." - (define (copy-input-files input) - (cond - ((vector? input) - #~,(list->vector - `#$(map copy-input-files - (vector->list input)))) - ((eq? (object-type input) - 'File) - #~,(let ((path-in-inputs-directory - (expand-file-name #$(store-item-name (assoc-ref input "path")) - inputs-directory))) - (copy-file #$(assoc-ref input "path") - path-in-inputs-directory) - (maybe-assoc-set '#$input - (cons "location" - (just path-in-inputs-directory)) - (cons "path" - (just path-in-inputs-directory)) - (cons "basename" - (just (basename path-in-inputs-directory))) - (cons "nameroot" - (just (file-name-stem path-in-inputs-directory))) - (cons "nameext" - (just (file-name-extension path-in-inputs-directory))) - (cons "secondaryFiles" - #$(from-maybe - (maybe-let* ((secondary-files - (maybe-assoc-ref (just input) "secondaryFiles"))) - (just #~(just (list->vector - `#$(vector-map->list copy-input-files - secondary-files))))) - #~%nothing))))) - (else input))) - - #~(list->dotted-list - `#$(map (match-lambda - ((id . input) - (list id (copy-input-files input)))) - inputs))) - (define (find-requirement requirements class) "Find requirement of @var{class} among @var{requirements} and return a maybe-monadic value." @@ -548,64 +432,60 @@ maybe-monadic value." (guix profiles)))) (raise-exception (manifest-file-error manifest-file)))) -(define (call-with-inferior inferior proc) - "Call @var{proc} with @var{inferior} and return the return value of @var{proc}. -Close @var{inferior} when done, even if @var{proc} exits non-locally." - (dynamic-wind (const #t) - (cut proc inferior) - (cut close-inferior inferior))) +;; Monadic version of inferior-eval-with-store; Argument order is rearranged +;; slightly to suit store-lift. +(define inferior-meval-with-store + (store-lift (lambda (store inferior proc) + (inferior-eval-with-store inferior store proc)))) -(define (manifest-file->environment manifest-file channels guix-daemon-socket) - "Build @var{manifest-file} and return an association list of environment -variables to set to use the built profile. Connect to the Guix daemon specified -by @var{guix-daemon-socket}. If @var{channels} is not @code{#f}, build manifest -in a Guix inferior with @var{channels}." - (if channels - (call-with-inferior (inferior-for-channels channels) - (cut inferior-eval - `(begin - (use-modules (ice-9 match) - (guix search-paths) - (gnu packages) - (guile) - (guix gexp) - (guix profiles)) +(define (manifest-file->search-path-sexps manifest-file inferior) + "Return a list of search path S-expressions for a profile described by +@var{manifest-file}. Load manifest in @var{inferior} unless it is @code{#f}. +Return value is monadic." + (define proc + `(lambda (store) + ;; Do not auto-compile manifest files. + (set! %load-should-auto-compile #f) + (map search-path-specification->sexp + (manifest-search-paths (load ,manifest-file))))) + + (if inferior + (begin + (inferior-eval '(use-modules (guix profiles) + (guix search-paths)) + inferior) + (mbegin %store-monad + (inferior-meval-with-store inferior proc))) + (mbegin %store-monad + (return (map search-path-specification->sexp + (manifest-search-paths (load-manifest manifest-file))))))) - (define (build-derivation drv guix-daemon-socket) - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-derivation drv)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv drv)) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) +(define (manifest-file->profile-derivation manifest-file inferior) + "Return a derivation to build @var{manifest-file}. Build manifest in +@var{inferior} unless it is @code{#f}. Return value is monadic." + (define proc + `(lambda (store) + ;; Do not auto-compile manifest files. + (set! %load-should-auto-compile #f) + (derivation-file-name + (run-with-store store + (profile-derivation (load ,manifest-file) + #:allow-collisions? #t))))) - ;; Do not auto-compile manifest files. - (set! %load-should-auto-compile #f) - (let ((manifest (load ,(canonicalize-path manifest-file)))) - (map (match-lambda - ((specification . value) - (cons (search-path-specification-variable specification) - value))) - (evaluate-search-paths - (manifest-search-paths manifest) - (list (build-derivation - (profile-derivation manifest - #:allow-collisions? #t) - ,guix-daemon-socket)))))) - <>)) - (manifest->environment (load-manifest manifest-file) - guix-daemon-socket))) + (if inferior + (begin + (inferior-eval '(use-modules (guix profiles)) + inferior) + (mlet %store-monad ((drv-file (inferior-meval-with-store inferior proc))) + (return (read-derivation-from-file drv-file)))) + (let ((manifest (load-manifest manifest-file))) + (profile-derivation manifest + #:allow-collisions? #t)))) -(define (software-packages->environment packages channels guix-daemon-socket) - "Build a profile with @var{packages} and return an association list -of environment variables to set to use the built profile. @var{packages} is a -vector of @code{SoftwarePackage} assocation lists as defined in the CWL -standard. Connect to the Guix daemon specified by @var{guix-daemon-socket}. If -@var{channels} is not @code{#f}, look up packages in a Guix inferior with -@var{channels}." +(define (software-packages->manifest packages inferior) + "Return a manifest with @var{packages}. @var{packages} is a vector of +@code{SoftwarePackage} assocation lists as defined in the CWL standard. Look up +packages in @var{inferior} unless it is @code{#f}." (define (software-package->package-specification package) (string-append (assoc-ref* package "package") (from-maybe @@ -614,63 +494,48 @@ standard. Connect to the Guix daemon specified by @var{guix-daemon-socket}. If (cut string-append "@" <>))) ""))) - (define packages->environment - (compose (cut manifest->environment <> guix-daemon-socket) - packages->manifest)) - - (if channels - (call-with-inferior (inferior-for-channels channels) - (lambda (inferior) - (packages->environment - (vector-map->list (lambda (package) - (let ((name (assoc-ref package "package")) - (version (assoc-ref package "version"))) - (match (lookup-inferior-packages inferior - name - version) - ((inferior-package _ ...) - inferior-package)))) - packages)))) - (packages->environment + (packages->manifest + (if inferior + (vector-map->list (lambda (package) + (let ((name (assoc-ref package "package")) + (version (assoc-ref package "version"))) + (match (lookup-inferior-packages inferior + name + version) + ((inferior-package _ ...) + inferior-package)))) + packages) (vector-map->list (compose specification->package software-package->package-specification) packages)))) -(define (manifest->environment manifest guix-daemon-socket) - "Build @var{manifest} and return an association list of environment -variables to set to use the built profile. Connect to the Guix daemon specified -by @var{guix-daemon-socket}." - (define (build-derivation drv guix-daemon-socket) - (if guix-daemon-socket - (parameterize ((%daemon-socket-uri guix-daemon-socket)) - (build-derivation drv #f)) - (with-store store - (run-with-store store - (mlet %store-monad ((drv drv)) - (mbegin %store-monad - (built-derivations (list drv)) - (return (derivation->output-path drv)))))))) +(define (software-packages->search-path-sexps packages inferior) + "Return a list of search path S-expressions for a profile with @var{packages}. +@var{packages} is a vector of @code{SoftwarePackage} assocation lists as defined +in the CWL standard. Look up packages in @var{inferior} unless it is @code{#f}. +Return value is monadic." + (mbegin %store-monad + (return (map search-path-specification->sexp + (manifest-search-paths + (software-packages->manifest packages inferior)))))) - (map (match-lambda - ((specification . value) - (cons (search-path-specification-variable specification) - value))) - (evaluate-search-paths - (manifest-search-paths manifest) - (list (build-derivation - (profile-derivation manifest - #:allow-collisions? #t) - guix-daemon-socket))))) +(define (software-packages->profile-derivation packages inferior) + "Return a derivation to build a profile with @var{packages}. @var{packages} is a +vector of @code{SoftwarePackage} assocation lists as defined in the CWL +standard. Look up packages in @var{inferior} unless it is @code{#f}. Return +value is monadic." + (profile-derivation (software-packages->manifest packages inferior) + #:allow-collisions? #t)) -(define (build-command-line-tool-script name manifest-file channels cwl inputs - scratch store batch-system - guix-daemon-socket) +(define (build-command-line-tool-script name manifest-file inferior cwl + scratch store batch-system) "Build and return script to run @code{CommandLineTool} class workflow @var{cwl} -named @var{name} with @var{inputs} using tools from Guix manifest in -@var{manifest-file} and on @var{batch-system}. +named @var{name} using tools from Guix manifest in @var{manifest-file} and on +@var{batch-system}. Use @var{inferior} to build manifests, unless it is +@code{#f}. Return value is monadic. -@var{channels}, @var{scratch}, @var{store} and @var{guix-daemon-socket} are the -same as in @code{run-workflow} from @code{(ravanan workflow)}." +@var{scratch} and @var{store} are the same as in @code{run-workflow} from +@code{(ravanan workflow)}." (define (environment-variables env-var-requirement) (just (vector-map->list (lambda (environment-definition) #~(list #$(assoc-ref* environment-definition @@ -697,12 +562,12 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (define (cores batch-system) (cond - ((slurm-api-batch-system? batch-system) - #~(string->number (getenv "SLURM_CPUS_ON_NODE"))) - ((eq? batch-system 'single-machine) - #~(total-processor-count)) - (else - (assertion-violation batch-system "Unknown batch system")))) + ((slurm-api-batch-system? batch-system) + #~(string->number (getenv "SLURM_CPUS_ON_NODE"))) + ((eq? batch-system 'single-machine) + #~(total-processor-count)) + (else + (assertion-violation batch-system "Unknown batch system")))) (define stdout-filename (cond @@ -725,12 +590,35 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (list 'File 'Directory)) (error #f "glob output binding not specified")))) + (define (coerce-argument argument) + (assoc-set argument + (cons "valueFrom" + (coerce-expression (assoc-ref* argument "valueFrom"))))) + + (define (vector->gexp vec) + ;; FIXME: G-expressions in vectors are not properly substituted. Fix this in + ;; Guix. + #~(vector #$@(vector->list vec))) + + (define (alist->gexp alist) + ;; FIXME: G-expressions as values in dotted alists are not properly + ;; substituted. Fix this in Guix. + #~(list #$@(map (match-lambda + ((key . value) + #~(cons #$key #$value))) + alist))) + (define run-command-gexp - #~(run-command (list #$@(append-map (lambda (arg) - (if (command-line-binding? arg) - (command-line-binding->args arg) - (list arg))) - (build-command cwl inputs))) + #~(run-command (append-map (lambda (arg) + (if (command-line-binding? arg) + (command-line-binding->args arg) + (list arg))) + (build-command #$(assoc-ref cwl "baseCommand") + #$(vector->gexp + (vector-map (compose alist->gexp coerce-argument) + (assoc-ref cwl "arguments"))) + #$(assoc-ref cwl "inputs") + inputs)) #$(coerce-expression (assoc-ref cwl "stdin")) #$stdout-filename '#$(from-maybe @@ -812,7 +700,7 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (maybe-let* ((work-reuse (find-requirement requirements "WorkReuse"))) (and (not (coerce-type (assoc-ref* work-reuse "enableReuse") 'boolean)) - (user-error "Disabling WorkReuse is not supported. ravanan's strong caching using Guix makes it unnecessary.")))) + (user-error "Disabling WorkReuse is not supported. With ravanan's strong caching using Guix, there is no need to disable WorkReuse.")))) (maybe-let* ((hints (maybe-assoc-ref (just cwl) "hints"))) (check-requirements hints batch-system @@ -823,276 +711,329 @@ same as in @code{run-workflow} from @code{(ravanan workflow)}." (maybe-let* ((work-reuse (find-requirement hints "WorkReuse"))) (and (not (coerce-type (assoc-ref* work-reuse "enableReuse") 'boolean)) - (warning "Ignoring disable of WorkReuse. ravanan's strong caching using Guix makes it unnecessary.")))) - ;; Copy input files and update corresponding input objects. - (build-gexp-script name - (let* ((requirements (inherit-requirements (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()))) - (initial-work-dir-requirement (find-requirement requirements - "InitialWorkDirRequirement")) - (manifest-file - (from-maybe (maybe-assoc-ref - (find-requirement requirements "SoftwareRequirement") - "manifest") - manifest-file)) - (packages - (from-maybe (maybe-assoc-ref - (find-requirement requirements "SoftwareRequirement") - "packages") - #()))) - (with-imported-modules (source-module-closure '((ravanan work command-line-tool) - (ravanan work monads) - (ravanan work ui) - (ravanan work vectors) - (ravanan glob) - (guix search-paths)) - #:select? (match-lambda - (('ravanan work . _) #t) - (('guix . _) #t) - (('json . _) #t) - (_ #f))) - (with-extensions (list guile-filesystem guile-gcrypt) - #~(begin - (use-modules (ravanan work command-line-tool) - (ravanan work monads) - (ravanan work types) - (ravanan work ui) - (ravanan work utils) - (ravanan work vectors) - (ravanan glob) - (rnrs io ports) - (srfi srfi-1) - (srfi srfi-26) - (ice-9 filesystem) - (ice-9 match) - (ice-9 threads) - (guix search-paths) - (json)) + (warning "Ignoring disable of WorkReuse. With ravanan's strong caching using Guix, there is no need to disable WorkReuse.")))) + + (let* ((requirements (inherit-requirements (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #()))) + (initial-work-dir-requirement (find-requirement requirements + "InitialWorkDirRequirement")) + (manifest-file + (from-maybe (maybe-assoc-ref + (find-requirement requirements "SoftwareRequirement") + "manifest") + manifest-file)) + (packages + (from-maybe (maybe-assoc-ref + (find-requirement requirements "SoftwareRequirement") + "packages") + #()))) + (mlet %store-monad ((search-path-sexps + (match packages + ;; No package specifications; try the manifest file. + (#() + (manifest-file->search-path-sexps manifest-file + inferior)) + ;; Use package specifications to build an + ;; environment. + (_ + (software-packages->search-path-sexps packages + inferior)))) + (profile-derivation + (match packages + ;; No package specifications; try the manifest file. + (#() + (manifest-file->profile-derivation manifest-file + inferior)) + ;; Use package specifications to build an + ;; environment. + (_ + (software-packages->profile-derivation packages + inferior))))) + (build-gexp-script name + (with-imported-modules (source-module-closure '((ravanan work command-line-tool) + (ravanan work monads) + (ravanan work ui) + (ravanan work vectors) + (ravanan glob) + (guix search-paths)) + #:select? (match-lambda + (('ravanan work . _) #t) + (('guix . _) #t) + (('json . _) #t) + (_ #f))) + (with-extensions (list guile-filesystem guile-gcrypt) + #~(begin + (use-modules (ravanan work command-line-tool) + (ravanan work monads) + (ravanan work types) + (ravanan work ui) + (ravanan work utils) + (ravanan work vectors) + (ravanan glob) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 filesystem) + (ice-9 match) + (ice-9 threads) + (guix search-paths) + (json)) - (define (copy-file-value value directory) - ;; Copy file represented by value to directory and return the - ;; new File value. - (let* ((path (assoc-ref* value "path")) - (destination-path (expand-file-name (basename path) - directory))) - (copy-file path destination-path) - (assoc-set value - (cons "location" (string-append "file://" destination-path)) - (cons "path" destination-path)))) + (define (copy-input-files input inputs-directory) + ;; Copy input files and update corresponding input objects. + (cond + ((vector? input) + (vector-map (cut copy-input-files <> inputs-directory) + input)) + ((eq? (object-type input) + 'File) + (let ((path-in-inputs-directory + ;; Input files may have the same filename. So, we take + ;; the additional precaution of copying input files + ;; into their own hash-prefixed subdirectories, just + ;; like they are in the ravanan store. + (expand-file-name (file-name-join + (take-right (file-name-split + (assoc-ref input "path")) + 2)) + inputs-directory))) + (make-directories (file-dirname path-in-inputs-directory)) + (copy-file (assoc-ref input "path") + path-in-inputs-directory) + (maybe-assoc-set input + (cons "location" + (just path-in-inputs-directory)) + (cons "path" + (just path-in-inputs-directory)) + (cons "basename" + (just (basename path-in-inputs-directory))) + (cons "nameroot" + (just (file-name-stem path-in-inputs-directory))) + (cons "nameext" + (just (file-name-extension path-in-inputs-directory))) + (cons "secondaryFiles" + (maybe-let* ((secondary-files + (maybe-assoc-ref (just input) "secondaryFiles"))) + (just (vector-map (cut copy-input-files <> inputs-directory) + secondary-files))))))) + (else input))) - (define (capture-secondary-file path secondary-file - workflow-output-directory) - "Capture @var{secondary-file} for primary @var{path} and return its + (define (copy-file-value value directory) + ;; Copy file represented by value to directory and return the + ;; new File value. + (let* ((path (assoc-ref* value "path")) + (destination-path (expand-file-name (basename path) + directory))) + (copy-file path destination-path) + (assoc-set value + (cons "location" (string-append "file://" destination-path)) + (cons "path" destination-path)))) + + (define (capture-secondary-file path secondary-file + workflow-output-directory) + "Capture @var{secondary-file} for primary @var{path} and return its canonicalized value. If @var{required} is @code{#t} and no such secondary file is found, error out. @var{workflow-output-directory} is path to the output directory of the workflow." - (let* ((secondary-file-path (secondary-path path secondary-file)) - (secondary-file-value - (and (file-exists? secondary-file-path) - (copy-file-value (canonicalize-file-value - `(("class" . "File") - ("path" . ,secondary-file-path))) - workflow-output-directory)))) - (if (and (assoc-ref* secondary-file "required") - (not secondary-file-value)) - (user-error "Secondary file ~a missing for output path ~a" - pattern - path) - secondary-file-value))) - - (define (path+sha1->value path sha1 workflow-output-directory maybe-secondary-files) - (maybe-assoc-set (copy-file-value (canonicalize-file-value - `(("class" . "File") - ("path" . ,path) - ("checksum" . ,(string-append "sha1$" sha1)))) - workflow-output-directory) - (cons "secondaryFiles" - (maybe-let* ((secondary-files maybe-secondary-files)) - (just (vector-filter-map (cut capture-secondary-file - path - <> - workflow-output-directory) - secondary-files)))))) + (let* ((secondary-file-path (secondary-path path secondary-file)) + (secondary-file-value + (and (file-exists? secondary-file-path) + (copy-file-value (canonicalize-file-value + `(("class" . "File") + ("path" . ,secondary-file-path))) + workflow-output-directory)))) + (if (and (assoc-ref* secondary-file "required") + (not secondary-file-value)) + (user-error "Secondary file ~a missing for output path ~a" + pattern + path) + secondary-file-value))) - (define (path->value path workflow-output-directory maybe-secondary-files) - (path+sha1->value path - (sha1-hash path) - workflow-output-directory - maybe-secondary-files)) + (define (path+sha1->value path sha1 workflow-output-directory maybe-secondary-files) + (maybe-assoc-set (copy-file-value (canonicalize-file-value + `(("class" . "File") + ("path" . ,path) + ("checksum" . ,(string-append "sha1$" sha1)))) + workflow-output-directory) + (cons "secondaryFiles" + (maybe-let* ((secondary-files maybe-secondary-files)) + (just (vector-filter-map (cut capture-secondary-file + path + <> + workflow-output-directory) + secondary-files)))))) - (define (stdout-output->value workflow-output-directory - stdout-directory - stdout-filename - output) - (cons (assoc-ref output "id") - (let ((sha1 (sha1-hash stdout-filename))) - ;; Use path+sha1->value instead of path->value to avoid - ;; recomputing the SHA1 hash. - (path+sha1->value - (if (string=? stdout-filename - (file-name-join* stdout-directory "stdout")) - ;; If stdout filename is unspecified, rename it to a - ;; hash of its contents. - (let ((hashed-filename - (file-name-join* stdout-directory sha1))) - (rename-file stdout-filename - hashed-filename) - hashed-filename) - ;; Else, return the stdout filename as it is. - stdout-filename) - sha1 - workflow-output-directory - %nothing)))) + (define (path->value path workflow-output-directory maybe-secondary-files) + (path+sha1->value path + (sha1-hash path) + workflow-output-directory + maybe-secondary-files)) - (define (other-output->value workflow-output-directory - output-id output-type-tree - maybe-secondary-files glob-pattern) - (cons output-id - ;; TODO: Support all types. - (let* ((output-type (formal-parameter-type output-type-tree)) - (paths (glob glob-pattern)) - (matched-type (glob-match-type paths output-type))) - (unless matched-type - (user-error "Type ~a mismatch for globbed paths ~a" - output-type - paths)) - ;; Coerce output value into matched type. - (let ((output-values (map (cut path->value - <> - workflow-output-directory - maybe-secondary-files) - paths))) - (cond - ((memq matched-type (list 'File 'Directory)) - (match output-values - ((output-file) - output-file))) - ;; TODO: Recurse. - ((and (array-type? matched-type) - (memq (array-type-subtype matched-type) - (list 'File 'Directory))) - (list->vector output-values)) - ((eq? matched-type 'null) - 'null)))))) + (define (stdout-output->value workflow-output-directory + stdout-directory + stdout-filename + output) + (cons (assoc-ref output "id") + (let ((sha1 (sha1-hash stdout-filename))) + ;; Use path+sha1->value instead of path->value to avoid + ;; recomputing the SHA1 hash. + (path+sha1->value + (if (string=? stdout-filename + (file-name-join* stdout-directory "stdout")) + ;; If stdout filename is unspecified, rename it to a + ;; hash of its contents. + (let ((hashed-filename + (file-name-join* stdout-directory sha1))) + (rename-file stdout-filename + hashed-filename) + hashed-filename) + ;; Else, return the stdout filename as it is. + stdout-filename) + sha1 + workflow-output-directory + %nothing)))) - (define (stage-file file entry-name) - ;; Stage file as entry-name and return the staged File value. - (rename-file (assoc-ref* file "path") - entry-name) - (canonicalize-file-value - (maybe-assoc-set `(("class" . "File") - ("path" . ,entry-name)) - (cons "secondaryFiles" - (maybe-let* ((secondary-files - (maybe-assoc-ref (just file) "secondaryFiles"))) - (just (vector-map (lambda (file) - (stage-file file (assoc-ref* file "basename"))) - secondary-files))))))) + (define (other-output->value workflow-output-directory + output-id output-type-tree + maybe-secondary-files glob-pattern) + (cons output-id + ;; TODO: Support all types. + (let* ((output-type (formal-parameter-type output-type-tree)) + (paths (glob glob-pattern)) + (matched-type (glob-match-type paths output-type))) + (unless matched-type + (user-error "Type ~a mismatch for globbed paths ~a" + output-type + paths)) + ;; Coerce output value into matched type. + (let ((output-values (map (cut path->value + <> + workflow-output-directory + maybe-secondary-files) + paths))) + (cond + ((memq matched-type (list 'File 'Directory)) + (match output-values + ((output-file) + output-file))) + ;; TODO: Recurse. + ((and (array-type? matched-type) + (memq (array-type-subtype matched-type) + (list 'File 'Directory))) + (list->vector output-values)) + ((eq? matched-type 'null) + 'null)))))) - ;; Stage files. - ;; We currently support File and Dirent only. TODO: Support others. - (define (stage-files entries outputs-directory) - ;; Stage entries and return an association list mapping files - ;; (presumably input files) that were staged. - (filter-map (match-lambda - ((entry-name entry) - (cond - ;; Stuff string literal into a file. - ((string? entry) - (call-with-input-file entry-name - (cut put-string <> entry)) - #f) - ;; Symlink to the file. - ((eq? (object-type entry) - 'File) - (cons entry - (stage-file entry entry-name)))))) - entries)) + (define (stage-file file entry-name) + ;; Stage file as entry-name and return the staged File value. + (rename-file (assoc-ref* file "path") + entry-name) + (canonicalize-file-value + (maybe-assoc-set `(("class" . "File") + ("path" . ,entry-name)) + (cons "secondaryFiles" + (maybe-let* ((secondary-files + (maybe-assoc-ref (just file) "secondaryFiles"))) + (just (vector-map (lambda (file) + (stage-file file (assoc-ref* file "basename"))) + secondary-files))))))) - (define (set-staged-path input staging-mapping) - ;; If input is a File type input that was staged, return new - ;; staged value. Else, return as is. - (cond - ;; Recurse on vector inputs. - ((vector? input) - (list->vector - (map (cut set-staged-path <> staging-mapping) - (vector->list input)))) - ;; Try to replace File input value with staged value. - ((eq? (object-type input) - 'File) - (or (any (match-lambda - ((old-value . new-value) - (and (alist=? input old-value) - new-value))) - staging-mapping) - input)) - ;; Else, return as is. - (else input))) + ;; Stage files. + ;; We currently support File and Dirent only. TODO: Support others. + (define (stage-files entries outputs-directory) + ;; Stage entries and return an association list mapping files + ;; (presumably input files) that were staged. + (filter-map (match-lambda + ((entry-name entry) + (cond + ;; Stuff string literal into a file. + ((string? entry) + (call-with-input-file entry-name + (cut put-string <> entry)) + #f) + ;; Symlink to the file. + ((eq? (object-type entry) + 'File) + (cons entry + (stage-file entry entry-name)))))) + entries)) - ;; Set search paths for manifest. - (for-each (match-lambda - ((name . value) - (setenv name value))) - '#$(match packages - ;; No package specifications; try the manifest - ;; file. - (#() - (manifest-file->environment manifest-file - channels - guix-daemon-socket)) - ;; Use package specifications to build an - ;; environment. - (_ - (software-packages->environment packages - channels - guix-daemon-socket)))) + (define (set-staged-path input staging-mapping) + ;; If input is a File type input that was staged, return new + ;; staged value. Else, return as is. + (cond + ;; Recurse on vector inputs. + ((vector? input) + (list->vector + (map (cut set-staged-path <> staging-mapping) + (vector->list input)))) + ;; Try to replace File input value with staged value. + ((eq? (object-type input) + 'File) + (or (any (match-lambda + ((old-value . new-value) + (and (alist=? input old-value) + new-value))) + staging-mapping) + input)) + ;; Else, return as is. + (else input))) - (call-with-temporary-directory - (lambda (inputs-directory) - ;; We need to canonicalize JSON trees before inserting them - ;; into G-expressions. If we don't, we would have degenerate - ;; G-expressions that produce exactly the same result. - (let ((inputs #$(copy-input-files-gexp (canonicalize-json inputs))) - (runtime `(("cores" . ,#$(cores batch-system))))) + ;; Set search paths for manifest. + (for-each (match-lambda + ((specification . value) + (setenv (search-path-specification-variable specification) + value))) + (evaluate-search-paths + (map sexp->search-path-specification + '#$search-path-sexps) + '(#$profile-derivation))) + (call-with-temporary-directory + (lambda (inputs-directory) + (let ((inputs (map (match-lambda + ((id . input) + (cons id + (copy-input-files input inputs-directory)))) + (json-string->scm + (getenv "WORKFLOW_INPUTS")))) + (runtime `(("cores" . ,#$(cores batch-system))))) - ;; Set environment defined by workflow. - (for-each (match-lambda - ((name value) - (setenv name value))) - (list #$@(from-maybe - (maybe-bind - (find-requirement requirements - "EnvVarRequirement") - environment-variables) - (list)))) + ;; Set environment defined by workflow. + (for-each (match-lambda + ((name value) + (setenv name value))) + (list #$@(from-maybe + (maybe-bind + (find-requirement requirements + "EnvVarRequirement") + environment-variables) + (list)))) - (call-with-temporary-directory - (lambda (stdout-directory) - (call-with-temporary-directory - (lambda (outputs-directory) - (call-with-current-directory outputs-directory - (lambda () - (let* ((staging-mapping - (stage-files (list #$@(from-maybe - (maybe-bind initial-work-dir-requirement - (compose just files-to-stage)) - (list))) - outputs-directory)) - (inputs - (map (match-lambda - ((id . input) - (cons id - (set-staged-path input - staging-mapping)))) - inputs))) - ;; Actually run the command. - #$run-command-gexp - ;; Capture outputs. - #$capture-outputs-gexp)))) - #$scratch)) - #$scratch))) - #$scratch))))) - guix-daemon-socket)) + (call-with-temporary-directory + (lambda (stdout-directory) + (call-with-temporary-directory + (lambda (outputs-directory) + (call-with-current-directory outputs-directory + (lambda () + (let* ((staging-mapping + (stage-files (list #$@(from-maybe + (maybe-bind initial-work-dir-requirement + (compose just files-to-stage)) + (list))) + outputs-directory)) + (inputs + (map (match-lambda + ((id . input) + (cons id + (set-staged-path input + staging-mapping)))) + inputs))) + ;; Actually run the command. + #$run-command-gexp + ;; Capture outputs. + #$capture-outputs-gexp)))) + #$scratch)) + #$scratch))) + #$scratch)))))))) diff --git a/ravanan/job-state.scm b/ravanan/job-state.scm index a769698..2894618 100644 --- a/ravanan/job-state.scm +++ b/ravanan/job-state.scm @@ -34,18 +34,21 @@ slurm-job-state job-state-script + job-state-inputs job-state-status)) (define-immutable-record-type <single-machine-job-state> - (single-machine-job-state script success?) + (single-machine-job-state script inputs success?) single-machine-job-state? (script single-machine-job-state-script) + (inputs single-machine-job-state-inputs) (success? single-machine-job-state-success?)) (define-immutable-record-type <slurm-job-state> - (slurm-job-state script job-id) + (slurm-job-state script inputs job-id) slurm-job-state? (script slurm-job-state-script) + (inputs slurm-job-state-inputs) (job-id slurm-job-state-job-id)) (define (job-state-script state) @@ -56,6 +59,14 @@ slurm-job-state-script)) state)) +(define (job-state-inputs state) + ((cond + ((single-machine-job-state? state) + single-machine-job-state-inputs) + ((slurm-job-state? state) + slurm-job-state-inputs)) + state)) + (define* (job-state-status state batch-system) "Return current status of job with @var{state} on @var{batch-system}. The status is one of the symbols @code{completed}, @code{failed} or @code{pending} diff --git a/ravanan/propnet.scm b/ravanan/propnet.scm index 34624b5..610991d 100644 --- a/ravanan/propnet.scm +++ b/ravanan/propnet.scm @@ -26,22 +26,26 @@ #:use-module (ravanan work monads) #:use-module (ravanan work utils) #:export (propnet + propnet? propnet-propagators propnet-value=? propnet-merge-values propnet-scheduler propagator + propagator? propagator-name propagator-proc propagator-inputs propagator-optional-inputs propagator-outputs scheduler + scheduler? scheduler-schedule scheduler-poll scheduler-capture-output schedule-propnet state+status + state+status? state+status-state state+status-status poll-propnet @@ -64,6 +68,12 @@ (optional-inputs propagator-optional-inputs) (outputs propagator-outputs)) +(set-record-type-printer! <propagator> + (lambda (record port) + (display "#<<propagator> " port) + (write (propagator-name record) port) + (display ">" port))) + (define-immutable-record-type <scheduler> (scheduler schedule poll capture-output) scheduler? @@ -86,6 +96,18 @@ (propagators-in-flight propnet-state-propagators-in-flight) (propagators-inbox propnet-state-propagators-inbox)) +(set-record-type-printer! <propnet-state> + (lambda (record port) + (display "#<<propnet-state> cells: " port) + (write (run-with-state (propnet-state-cells record)) port) + (display " cells-inbox: " port) + (write (run-with-state (propnet-state-cells-inbox record)) port) + (display " propagators-in-flight: " port) + (write (run-with-state (propnet-state-propagators-in-flight record)) port) + (display " propagators-inbox: " port) + (write (run-with-state (propnet-state-propagators-inbox record)) port) + (display ">" port))) + (define (partition-map pred proc lst) "Partition @var{lst} into two lists using @var{pred} like @code{partition}. Then, map @var{proc} over both the lists and return the resulting lists." diff --git a/ravanan/reader.scm b/ravanan/reader.scm index a4e57c7..d7670bb 100644 --- a/ravanan/reader.scm +++ b/ravanan/reader.scm @@ -17,15 +17,18 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan reader) + #:use-module (rnrs conditions) + #:use-module (rnrs exceptions) #:use-module (rnrs io ports) #:use-module (srfi srfi-26) #:use-module (ice-9 filesystem) #:use-module (ice-9 match) #:use-module (json) - #:use-module (yaml) + #:use-module ((yaml) #:prefix yaml:) #:use-module (ravanan work command-line-tool) #:use-module (ravanan work monads) #:use-module (ravanan work types) + #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) #:export (read-workflow @@ -101,18 +104,31 @@ each association list of the returned vector of association lists. If (define (normalize-env-var-requirement env-var-requirement) (assoc-set env-var-requirement - (cons "envDef" - (coerce-alist->vector - (assoc-ref env-var-requirement "envDef") - "envName" "envValue")))) + (cons "envDef" + (coerce-alist->vector + (assoc-ref env-var-requirement "envDef") + "envName" "envValue")))) + +(define (normalize-software-requirement software-requirement) + (maybe-assoc-set software-requirement + ;; Canonicalize manifest file path so that we look it up with respect to the + ;; path of the workflow file. + (cons "manifest" + (maybe-bind (maybe-assoc-ref (just software-requirement) + "manifest") + (compose just canonicalize-path))))) (define (normalize-requirements maybe-requirements) (maybe-let* ((requirements maybe-requirements)) (just (vector-map (lambda (requirement) - (if (string=? (assoc-ref requirement "class") - "EnvVarRequirement") - (normalize-env-var-requirement requirement) - requirement)) + (let ((class (assoc-ref requirement "class"))) + (cond + ((string=? class "EnvVarRequirement") + (normalize-env-var-requirement requirement)) + ((string=? class "SoftwareRequirement") + (normalize-software-requirement requirement)) + (else + requirement)))) (coerce-alist->vector requirements "class"))))) (define (normalize-secondary-files secondary-files default-required) @@ -149,7 +165,7 @@ array of array of @code{File}s, etc. Else, return @code{#f}" (maybe-assoc-set input (cons "default" (maybe-bind (maybe-assoc-ref (just input) "default") - normalize-input)) + (compose just normalize-input))) (cons "secondaryFiles" (maybe-bind (maybe-assoc-ref (just input) "secondaryFiles") (compose just @@ -250,10 +266,23 @@ array of array of @code{File}s, etc. Else, return @code{#f}" (define (read-workflow workflow-file) "Read CWL workflow (of any class) from @var{workflow-file}." - (call-with-current-directory (dirname workflow-file) - ;; TODO: Implement $import directive. - (cut normalize-workflow - (preprocess-include (read-yaml-file (basename workflow-file)))))) + (guard (c ((and (who-condition? c) + (eq? (condition-who c) + 'read-yaml-file) + (irritants-condition? c)) + (user-error "Unable to read workflow file ~a: ~a" + workflow-file + (string-join (condition-irritants c) + ": ")))) + (let ((workflow-path (guard (c ((unsupported-uri-scheme? c) + (user-error "Unsupported URI scheme ~a in ~a" + (unsupported-uri-scheme-scheme c) + workflow-file))) + (location->path workflow-file)))) + (call-with-current-directory (dirname workflow-path) + ;; TODO: Implement $import directive. + (cut normalize-workflow + (preprocess-include (read-yaml-file (basename workflow-path)))))))) (define (normalize-input input) "Normalize actual @var{input}." @@ -268,20 +297,32 @@ array of array of @code{File}s, etc. Else, return @code{#f}" (define (read-inputs inputs-file) "Read @var{inputs-file} resolving file paths if any." - (call-with-current-directory (dirname inputs-file) - (lambda () - (map (match-lambda - ((input-id . input) - (cons input-id - (normalize-input input)))) - ;; Even though YAML is a superset of JSON, use the JSON - ;; reader if possible; it is simpler and less prone to type - ;; ambiguities. - (if (string=? (file-name-extension inputs-file) - ".json") - (call-with-input-file (basename inputs-file) - json->scm) - (read-yaml-file (basename inputs-file))))))) + (guard (c ((and (who-condition? c) + (memq (condition-who c) + '(read-json-file read-yaml-file)) + (irritants-condition? c)) + (user-error "Unable to read inputs file ~a: ~a" + inputs-file + (string-join (condition-irritants c) + ": ")))) + (let ((inputs-path (guard (c ((unsupported-uri-scheme? c) + (user-error "Unsupported URI scheme ~a in ~a" + (unsupported-uri-scheme-scheme c) + inputs-file))) + (location->path inputs-file)))) + (call-with-current-directory (dirname inputs-path) + (lambda () + (map (match-lambda + ((input-id . input) + (cons input-id + (normalize-input input)))) + ;; Even though YAML is a superset of JSON, use the JSON + ;; reader if possible; it is simpler and less prone to type + ;; ambiguities. + (if (string=? (file-name-extension inputs-path) + ".json") + (read-json-file (basename inputs-path)) + (read-yaml-file (basename inputs-path))))))))) (define (coerce-type val type) "Coerce @var{val} to @var{type}." @@ -297,3 +338,25 @@ array of array of @code{File}s, etc. Else, return @code{#f}" val (string->number val))) (else val))) + +(define (read-json-file file) + "Read JSON @var{file} and return scheme tree." + (guard (c (else + (raise-exception + (condition (make-who-condition 'read-json-file) + c)))) + (call-with-input-file file + json->scm))) + +(define (read-yaml-file file) + "Read YAML @var{file} and return scheme tree." + (guard (c ((and (message-condition? c) + (string-prefix? "read-yaml-file:" (condition-message c))) + (raise-exception + (condition (make-who-condition 'read-yaml-file) + (make-irritants-condition + (match (string-split (condition-message c) #\:) + ((_ message file) + (list (string-trim message) + (string-trim file))))))))) + (yaml:read-yaml-file file))) diff --git a/ravanan/single-machine.scm b/ravanan/single-machine.scm index 854dd2c..b03137f 100644 --- a/ravanan/single-machine.scm +++ b/ravanan/single-machine.scm @@ -20,6 +20,7 @@ #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module (ravanan work monads) + #:use-module (ravanan work ui) #:export (submit-job)) (define (submit-job environment stdout-file stderr-file script) @@ -34,9 +35,8 @@ else @code{#f}. The return value is state-monadic." ((name . value) (setenv name value))) environment) - (format (current-error-port) - "Running ~a~%" - script) + (log-info "Running ~a~%" + script) (zero? (with-output-to-file stdout-file (lambda () (with-error-to-file stderr-file diff --git a/ravanan/store.scm b/ravanan/store.scm index 5cb4a64..6de7d21 100644 --- a/ravanan/store.scm +++ b/ravanan/store.scm @@ -18,21 +18,27 @@ (define-module (ravanan store) #:use-module (srfi srfi-26) + #:use-module (srfi srfi-71) #:use-module (ice-9 filesystem) + #:use-module (gcrypt hash) + #:use-module (guix base16) + #:use-module (guix base32) + #:use-module (guix build utils) #:use-module (ravanan work command-line-tool) #:use-module (ravanan work monads) + #:use-module (ravanan work ui) + #:use-module (ravanan work utils) #:use-module (ravanan work vectors) #:export (%store-files-directory %store-data-directory %store-logs-directory make-store - script->store-files-directory - script->store-data-file - script->store-stdout-file - script->store-stderr-file - intern-file - store-item-name)) + step-store-files-directory + step-store-data-file + step-store-stdout-file + step-store-stderr-file + intern-file)) (define %store-files-directory "files") @@ -47,9 +53,8 @@ "Make @var{store} directory and initialize with subdirectories. If @var{store} already exists, do nothing." (unless (file-exists? store) - (format (current-error-port) - "store ~a does not exist; creating it~%" - store) + (log-warning "store ~a does not exist; creating it" + store) (make-directories store) (for-each (lambda (directory) (mkdir (expand-file-name directory store))) @@ -57,29 +62,56 @@ already exists, do nothing." %store-data-directory %store-logs-directory)))) -(define (script->store-files-directory script store) - "Return the store files directory in @var{store} corresponding to @var{script} -path." +(define (sha1-hash-sexp tree) + (bytevector->base32-string + (let ((port get-hash (open-hash-port (hash-algorithm sha1)))) + ;; tree should probably be canonicalized using canonical S-expressions or + ;; similar. But, it doesn't matter much for our purposes. write already + ;; canonicalizes in a way. In the unlikely case of a problem, the worst + ;; that can happen is that we recompute all steps of the workflow. + (write tree port) + (close port) + (get-hash)))) + +(define (step-store-basename script inputs) + "Return the basename in the store for files of CWL step with @var{script} and +@var{inputs}." + (string-append (sha1-hash-sexp (cons script (canonicalize-json inputs))) + "-" + (strip-store-file-name script))) + +(define (step-store-files-directory script inputs store) + "Return the @var{store} files directory for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-files-directory - (basename script)) + (step-store-basename script inputs)) store)) -(define (script->store-data-file script store) - "Return the store data file in @var{store} corresponding to @var{script} path." +(define (step-store-data-file script inputs store) + "Return the @var{store} data file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-data-directory - (string-append (basename script) ".json")) + (string-append + (step-store-basename script inputs) + ".json")) store)) -(define (script->store-stdout-file script store) - "Return the store stdout file in @var{store} corresponding to @var{script} path." +(define (step-store-stdout-file script inputs store) + "Return the @var{store} stdout file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-logs-directory - (string-append (basename script) ".stdout")) + (string-append + (step-store-basename script inputs) + ".stdout")) store)) -(define (script->store-stderr-file script store) - "Return the store stderr file in @var{store} corresponding to @var{script} path." +(define (step-store-stderr-file script inputs store) + "Return the @var{store} stderr file for CWL step with @var{script} and +@var{inputs}." (expand-file-name (file-name-join* %store-logs-directory - (string-append (basename script) ".stderr")) + (string-append + (step-store-basename script inputs) + ".stderr")) store)) (define (same-filesystem? path1 path2) @@ -96,8 +128,9 @@ interned path and location." (checksum (assoc-ref file "checksum")) (sha1 (if (and checksum (string-prefix? "sha1$" checksum)) - (string-drop checksum (string-length "sha1$")) - (sha1-hash path))) + (base16-string->bytevector + (string-drop checksum (string-length "sha1$"))) + (sha1-hash-bytes path))) (interned-path (if (string-prefix? store path) ;; If file is already a store path, return it as is. @@ -108,18 +141,18 @@ interned path and location." (let ((interned-path (expand-file-name (file-name-join* %store-files-directory - (string-append sha1 + (string-append (bytevector->base32-string sha1) "-" - (basename path))) + (basename path)) + (basename path)) store))) (if (file-exists? interned-path) - (format (current-error-port) - "~a previously interned into store as ~a~%" - path interned-path) - (begin - (format (current-error-port) - "Interning ~a into store as ~a~%" + (log-info "~a previously interned into store as ~a~%" path interned-path) + (begin + (log-info "Interning ~a into store as ~a~%" + path interned-path) + (mkdir (dirname interned-path)) ;; Hard link if on the same filesystem. Else, copy. ((if (same-filesystem? path (expand-file-name %store-files-directory @@ -140,11 +173,4 @@ interned path and location." (just (vector-map (cut intern-file <> store) secondary-files))))))) -;; Length of a base-16 encoded SHA1 hash -(define %store-hash-length 40) -(define (store-item-name path) - "Return the basename of store item @var{path} with the store hash stripped out." - (string-drop (basename path) - ;; the hash and the dash after the hash - (1+ %store-hash-length))) diff --git a/ravanan/utils.scm b/ravanan/utils.scm index a76a14c..e4fad9c 100644 --- a/ravanan/utils.scm +++ b/ravanan/utils.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -54,4 +54,4 @@ before loading script." ;; define-module invocation" warning during compilation. But, it is ;; probably safe to ignore this warning since we use load only within a ;; dummy module. - (load (canonicalize-path script-file)))))) + (load script-file))))) diff --git a/ravanan/work/command-line-tool.scm b/ravanan/work/command-line-tool.scm index d33c80c..6378aa7 100644 --- a/ravanan/work/command-line-tool.scm +++ b/ravanan/work/command-line-tool.scm @@ -17,8 +17,10 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan work command-line-tool) + #:use-module (rnrs conditions) #:use-module (rnrs exceptions) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9 gnu) #:use-module (srfi srfi-26) #:use-module (ice-9 filesystem) #:use-module (ice-9 format) @@ -29,9 +31,14 @@ #:use-module (json) #:use-module (ravanan work monads) #:use-module (ravanan work types) + #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) - #:export (value->string + #:export (unsupported-uri-scheme + unsupported-uri-scheme? + unsupported-uri-scheme-scheme + + value->string call-with-current-directory object-type match-type @@ -39,11 +46,26 @@ formal-parameter-type run-command sha1-hash + sha1-hash-bytes checksum location->path canonicalize-file-value secondary-path - evaluate-javascript)) + evaluate-javascript + + command-line-binding + command-line-binding? + command-line-binding-position + command-line-binding-prefix + command-line-binding-type + command-line-binding-value + command-line-binding-item-separator + command-line-binding->args + build-command)) + +(define-condition-type &unsupported-uri-scheme &serious + unsupported-uri-scheme unsupported-uri-scheme? + (scheme unsupported-uri-scheme-scheme)) (define (value->string x) "Convert value @var{x} to a string." @@ -202,31 +224,42 @@ status in @var{success-codes} as success. Error out otherwise." (with-output-to-port (current-error-port) (cut invoke command)))))) +(define (sha1-hash-bytes file) + "Return the SHA1 hash of @var{file} as a bytevector." + (file-hash (lookup-hash-algorithm 'sha1) + file)) + (define (sha1-hash file) "Return the SHA1 hash of @var{file} as a hexadecimal string." - (bytevector->base16-string - (file-hash (lookup-hash-algorithm 'sha1) - file))) + (bytevector->base16-string (sha1-hash-bytes file))) (define (checksum file) "Return the checksum of @var{file} as defined in the CWL specification." (string-append "sha1$" (sha1-hash file))) (define (location->path location) - "Convert file @var{location} URI to path. Tolerate invalid locations that are -actually paths." + "Convert file @var{location} @code{file://} URI to path. Tolerate invalid +locations that are actually paths. Raise an @code{&unsupported-uri-scheme} +condition on unsupported URI schemes." (cond - ;; If location is an URI, parse the URI and return the path part. - ((string->uri location) => uri-path) + ;; If location is a file:// URI, parse the URI and return the path part. + ((string->uri location) + => (lambda (uri) + (if (eq? (uri-scheme uri) 'file) + (uri-path uri) + (raise-exception (unsupported-uri-scheme (uri-scheme uri)))))) ;; location is actually a path; return as is. (else location))) (define (canonicalize-file-value value) "Canonicalize @code{File} type @var{value} adding missing fields." - (let ((path (or (assoc-ref value "path") - (location->path (assoc-ref value "location")))) - (location (or (assoc-ref value "location") - (string-append "file://" (assoc-ref value "path"))))) + (let* ((path (canonicalize-path + (or (assoc-ref value "path") + (location->path (assoc-ref value "location"))))) + ;; The location field may actually be a path instead of an URI; that's + ;; invalid. So, unconditionally reconstruct the location URI from path. + ;; This assumes they are always file:// URIs, but that works for now. + (location (string-append "file://" path))) ;; Populate all fields of the File type value. (maybe-assoc-set `(("class" . "File") ("location" . ,location) @@ -262,3 +295,155 @@ actually paths." (format #f "--eval=~a console.log(\"%j\", ~a)" preamble expression)) json->scm))) + +(define-immutable-record-type <command-line-binding> + (command-line-binding position prefix type value item-separator) + command-line-binding? + (position command-line-binding-position) + (prefix command-line-binding-prefix) + (type command-line-binding-type) + (value command-line-binding-value) + (item-separator command-line-binding-item-separator)) + +(define (command-line-binding->args binding) + "Return a list of arguments for @var{binding}. The returned list may +contain strings or G-expressions. The G-expressions may reference an +@code{inputs-directory} variable that must be defined in the context in which +the G-expressions are inserted." + (let ((prefix (command-line-binding-prefix binding)) + (type (command-line-binding-type binding)) + (value (command-line-binding-value binding))) + (cond + ((eq? type 'boolean) + (if value + ;; TODO: Error out if boolean input has no prefix? + (maybe->list prefix) + (list))) + ((eq? type 'null) (list)) + ((array-type? type) + (match value + ;; Empty arrays should be noops. + (() (list)) + (_ + (let ((args (append-map command-line-binding->args + value))) + (append (maybe->list prefix) + (from-maybe + (maybe-let* ((item-separator (command-line-binding-item-separator binding))) + (just (list (string-join args item-separator)))) + args)))))) + (else + (append (maybe->list prefix) + (list (case type + ((string) + value) + ((int float) + (number->string value)) + ((File) + (assoc-ref* value "path")) + (else + (user-error "Invalid formal input type ~a" + type))))))))) + +(define (build-command base-command arguments formal-inputs inputs) + "Return a list of @code{<command-line-binding>} objects for a +@code{CommandLineTool} class workflow with @var{base-command}, @var{arguments}, +@var{formal-inputs} and @var{inputs}. The @code{value} field of the returned +@code{<command-line-binding>} objects may be strings or G-expressions. The +G-expressions may reference @var{inputs} and @var{runtime} variables that must +be defined in the context in which the G-expressions are inserted." + (define (argument->command-line-binding i argument) + (command-line-binding (cond + ((assoc-ref argument "position") + => string->number) + (else i)) + (maybe-assoc-ref (just argument) "prefix") + 'string + (value->string (assoc-ref* argument "valueFrom")) + %nothing)) + + (define (collect-bindings ids+inputs+types+bindings) + (append-map id+input+type-tree+binding->command-line-binding + ids+inputs+types+bindings)) + + (define id+input+type-tree+binding->command-line-binding + (match-lambda + ;; We stretch the idea of an input id, by making it an address that + ;; identifies the exact location of a value in a tree that possibly + ;; contains array types. For example, '("foo") identifies the input "foo"; + ;; '("foo" 1) identifies the 1th element of the array input "foo"; '("foo" + ;; 37 1) identifies the 1th element of the 37th element of the array input + ;; "foo"; etc. + ((id input type-tree binding) + ;; Check type. + (let* ((type (formal-parameter-type type-tree)) + (matched-type (match-type input type))) + (unless matched-type + (error input "Type mismatch" input type)) + (let ((position + (from-maybe + (maybe-let* ((position (maybe-assoc-ref binding "position"))) + (just (string->number position))) + ;; FIXME: Why a default value of 0? + 0)) + (prefix (maybe-assoc-ref binding "prefix"))) + (cond + ;; Recurse over array types. + ;; TODO: Implement record and enum types. + ((array-type? matched-type) + (list (command-line-binding + position + prefix + matched-type + (append-map (lambda (i input) + (id+input+type-tree+binding->command-line-binding + (list (append id (list i)) + input + (assoc-ref type-tree "items") + (maybe-assoc-ref (just type-tree) + "inputBinding")))) + (iota (vector-length input)) + (vector->list input)) + (maybe-assoc-ref binding "itemSeparator")))) + (else + (list (command-line-binding position + prefix + matched-type + (apply json-ref inputs id) + %nothing))))))))) + + ;; For details of this algorithm, see §4.1 Input binding of the CWL + ;; 1.2 CommandLineTool specification: + ;; https://www.commonwl.org/v1.2/CommandLineTool.html#Input_binding + (append + ;; Insert elements from baseCommand. + (vector->list (or base-command + (vector))) + (sort + (append + ;; Collect CommandLineBinding objects from arguments; assign a sorting key. + (vector->list + (vector-map-indexed argument->command-line-binding + (or arguments + #()))) + ;; Collect CommandLineBinding objects from the inputs schema; assign a + ;; sorting key. + (collect-bindings + (filter-map (lambda (formal-input) + ;; Exclude formal inputs without an inputBinding. + (and (assoc "inputBinding" formal-input) + (let ((id (assoc-ref formal-input "id"))) + (list (list id) + (or (assoc-ref inputs id) + (assoc-ref formal-input "default") + 'null) + (or (assoc-ref formal-input "type") + (user-error "Type of input ~a not specified" + id)) + (maybe-assoc-ref (just formal-input) + "inputBinding"))))) + (vector->list formal-inputs)))) + ;; Sort elements using the assigned sorting keys. + (lambda (binding1 binding2) + (< (command-line-binding-position binding1) + (command-line-binding-position binding2)))))) diff --git a/ravanan/work/ui.scm b/ravanan/work/ui.scm index 758f2cc..5a9ceeb 100644 --- a/ravanan/work/ui.scm +++ b/ravanan/work/ui.scm @@ -1,5 +1,5 @@ ;;; ravanan --- High-reproducibility CWL runner powered by Guix -;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net> +;;; Copyright © 2024, 2025 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of ravanan. ;;; @@ -17,16 +17,58 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan work ui) - #:export (warning + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) + #:export (%log-level + log + log-debug + log-info + log-warning + log-error + log-critical + warning user-error)) +(define %log-level + (make-parameter #f)) + +(define (log-level>=? level1 level2) + "Return @code{#t} if log @var{level1} has greater than or equal severity to log +@var{level2}. Else, return @code{#f}." + (let ((levels '(debug info warning error critical))) + (>= (list-index (cut eq? level1 <>) levels) + (list-index (cut eq? level2 <>) levels)))) + +(define (log level fmt . args) + "Log message when current log level is not @code{#f} and @var{level} has greater +than or equal severity to the current log level. @var{fmt} and @var{args} are +arguments to @code{format}." + (when (and (%log-level) + (log-level>=? level (%log-level))) + (apply format (current-error-port) fmt args) + (newline))) + +(define log-debug + (cut log 'debug <> <...>)) + +(define log-info + (cut log 'info <> <...>)) + +(define log-warning + (cut log 'warning <> <...>)) + +(define log-error + (cut log 'error <> <...>)) + +(define log-critical + (cut log 'critical <> <...>)) + (define (warning fmt . args) "Print warning. @var{fmt} and @var{args} are arguments to format." - (apply format (current-error-port) fmt args) - (newline)) + (apply log-warning fmt args)) (define (user-error fmt . args) "Print error message and exit with failure. @var{fmt} and @var{args} are arguments to format." - (apply warning fmt args) + (apply log-error fmt args) (exit #f)) diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm index 2770f31..3f0ef27 100644 --- a/ravanan/workflow.scm +++ b/ravanan/workflow.scm @@ -27,6 +27,8 @@ #:use-module (ice-9 filesystem) #:use-module (ice-9 match) #:use-module (web uri) + #:use-module (guix inferior) + #:use-module (guix store) #:use-module (ravanan batch-system) #:use-module (ravanan command-line-tool) #:use-module (ravanan job-state) @@ -59,16 +61,28 @@ (define-condition-type &job-failure &error job-failure job-failure? - (script job-failure-script)) + (script job-failure-script) + (inputs job-failure-inputs)) (define-immutable-record-type <scheduler-proc> - (scheduler-proc name cwl scatter scatter-method) + (-scheduler-proc name script-or-propnet formal-inputs formal-outputs + resource-requirement scatter scatter-method) scheduler-proc? (name scheduler-proc-name) - (cwl scheduler-proc-cwl) + (script-or-propnet scheduler-proc-script-or-propnet) + (formal-inputs scheduler-proc-formal-inputs) + (formal-outputs scheduler-proc-formal-outputs) + (resource-requirement scheduler-proc-resource-requirement) (scatter scheduler-proc-scatter) (scatter-method scheduler-proc-scatter-method)) +(define* (scheduler-proc name script-or-propnet formal-inputs formal-outputs + #:optional + (resource-requirement %nothing) + (scatter %nothing) (scatter-method %nothing)) + (-scheduler-proc name script-or-propnet formal-inputs formal-outputs + resource-requirement scatter scatter-method)) + (define-immutable-record-type <command-line-tool-state> (command-line-tool-state job-state formal-outputs) command-line-tool-state? @@ -167,67 +181,120 @@ requirements and hints of the step." (assoc-ref* input "type")))) (assoc-ref input "id"))) -(define* (command-line-tool->propagator name cwl) - "Convert @code{CommandLineTool} workflow @var{cwl} of @var{name} to a -propagator." - (propagator name - (scheduler-proc name cwl %nothing %nothing) - (vector-map->list (lambda (input) - (cons (assoc-ref input "id") - (assoc-ref input "id"))) - (assoc-ref cwl "inputs")) - ;; Inputs that either have a default or accept null values are - ;; optional. - (vector-filter-map->list optional-input? - (assoc-ref cwl "inputs")) - (vector-map->list (lambda (output) - (cons (assoc-ref output "id") - (assoc-ref output "id"))) - (assoc-ref cwl "outputs")))) - -(define* (workflow-class->propnet name cwl scheduler batch-system) +(define* (workflow->scheduler-proc name cwl scheduler + manifest-file inferior scratch store + batch-system guix-store + #:optional + (scatter %nothing) + (scatter-method %nothing)) + "Return a @code{<scheduler-proc>} object for @var{cwl} workflow named @var{name} +scheduled using @var{scheduler}. @var{scatter} and @var{scatter-method} are the +CWL scattering properties of this step. Build @code{CommandLineTool} workflow +scripts using @var{guix-store}. + +@var{manifest-file}, @var{scratch}, @var{store} and @var{batch-system} are the +same as in @code{run-workflow}. @var{inferior} is the same as in +@code{build-command-line-tool-script} from @code{(ravanan command-line-tool)}." + (scheduler-proc name + (let ((class (assoc-ref* cwl "class"))) + (cond + ((string=? class "CommandLineTool") + (run-with-store guix-store + (build-command-line-tool-script name + manifest-file + inferior + cwl + scratch + store + batch-system))) + ((string=? class "ExpressionTool") + (error "Workflow class not implemented yet" class)) + ((string=? class "Workflow") + (workflow-class->propnet cwl + scheduler + manifest-file + inferior + scratch + store + batch-system + guix-store)) + (else + (assertion-violation class "Unexpected workflow class")))) + (assoc-ref* cwl "inputs") + (assoc-ref* cwl "outputs") + (find-requirement (inherit-requirements + (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #())) + "ResourceRequirement") + scatter + scatter-method)) + +(define* (workflow-class->propnet cwl scheduler + manifest-file inferior scratch store + batch-system guix-store) "Return a propagator network scheduled using @var{scheduler} on -@var{batch-system} for @var{cwl}, a @code{Workflow} class workflow with -@var{name}." +@var{batch-system} for @var{cwl}, a @code{Workflow} class workflow. Build +@code{CommandLineTool} workflow scripts using @var{guix-store}. + +@var{manifest-file}, @var{scratch}, @var{store}, @var{batch-system} and +@var{guix-daemon-socket} are the same as in @code{run-workflow}. @var{inferior} +is the same as in @code{build-command-line-tool-script} from @code{(ravanan +command-line-tool)}." (define (normalize-scatter-method scatter-method) (assoc-ref* '(("dotproduct" . dot-product) ("nested_crossproduct" . nested-cross-product) ("flat_crossproduct" . flat-cross-product)) scatter-method)) + (define (step->scheduler-proc step parent-requirements parent-hints) + (let ((run (assoc-ref* step "run"))) + (workflow->scheduler-proc (assoc-ref* step "id") + (inherit-requirements-and-hints + run + parent-requirements + parent-hints + (or (assoc-ref step "requirements") + #()) + (or (assoc-ref step "hints") + #())) + scheduler + manifest-file + inferior + scratch + store + batch-system + guix-store + (maybe-assoc-ref (just step) "scatter") + (maybe-bind (maybe-assoc-ref (just step) "scatterMethod") + (compose just normalize-scatter-method))))) + (define (step->propagator step) - (let* ((step-id (assoc-ref* step "id")) - (step-propagator - (command-line-tool->propagator step-id (assoc-ref* step "run")))) - (propagator (propagator-name step-propagator) - (let ((proc (propagator-proc step-propagator))) - (scheduler-proc (scheduler-proc-name proc) - (inherit-requirements-and-hints - (scheduler-proc-cwl proc) - (or (assoc-ref cwl "requirements") - #()) - (or (assoc-ref cwl "hints") - #()) - (or (assoc-ref step "requirements") - #()) - (or (assoc-ref step "hints") - #())) - (maybe-assoc-ref (just step) "scatter") - (maybe-bind (maybe-assoc-ref (just step) "scatterMethod") - (compose just normalize-scatter-method)))) - (map (match-lambda - ((input-id . _) - (cons input-id - (json-ref step "in" input-id)))) - (propagator-inputs step-propagator)) - (propagator-optional-inputs step-propagator) - (filter-map (match-lambda - ((output . cell) - (and (vector-member output - (assoc-ref* step "out")) - (cons output - (string-append step-id "/" cell))))) - (propagator-outputs step-propagator))))) + (let ((step-id (assoc-ref* step "id")) + (run (assoc-ref* step "run"))) + (propagator step-id + (step->scheduler-proc step + (or (assoc-ref cwl "requirements") + #()) + (or (assoc-ref cwl "hints") + #())) + (vector-map->list (lambda (input) + (let ((input-id (assoc-ref input "id"))) + (cons input-id + (json-ref step "in" input-id)))) + (assoc-ref run "inputs")) + ;; Inputs that either have a default or accept null values are + ;; optional. + (vector-filter-map->list optional-input? + (assoc-ref run "inputs")) + (vector-map->list (lambda (output) + (let ((output-id (assoc-ref output "id"))) + (and (vector-member output-id + (assoc-ref* step "out")) + (cons output-id + (string-append step-id "/" output-id))))) + (assoc-ref run "outputs"))))) (maybe-let* ((requirements (maybe-assoc-ref (just cwl) "requirements"))) (check-requirements requirements @@ -246,26 +313,28 @@ propagator." merge-values scheduler)) -(define* (workflow-scheduler manifest-file channels scratch store batch-system - #:key guix-daemon-socket) +(define* (workflow-scheduler store batch-system) (define (schedule proc inputs scheduler) "Schedule @var{proc} with inputs from the @var{inputs} association list. Return a -state-monadic job state object. @var{proc} may either be a @code{<propnet>} -object or a @code{<scheduler-proc>} object." +state-monadic job state object. @var{proc} must be a @code{<scheduler-proc>} +object." (let* ((name (scheduler-proc-name proc)) - (cwl (scheduler-proc-cwl proc)) + (script-or-propnet (scheduler-proc-script-or-propnet proc)) (scatter (from-maybe (scheduler-proc-scatter proc) #f)) (scatter-method (from-maybe (scheduler-proc-scatter-method proc) - #f)) - (class (assoc-ref* cwl "class"))) + #f))) (if scatter (case scatter-method ((dot-product) (apply state-map (lambda input-elements ;; Recurse with scattered inputs spliced in. - (schedule (scheduler-proc name cwl %nothing %nothing) + (schedule (scheduler-proc name + script-or-propnet + (scheduler-proc-formal-inputs proc) + (scheduler-proc-formal-outputs proc) + (scheduler-proc-resource-requirement proc)) ;; Replace scattered inputs with single ;; elements. (apply assoc-set @@ -281,38 +350,29 @@ object or a @code{<scheduler-proc>} object." ((nested-cross-product flat-cross-product) (error scatter-method "Scatter method not implemented yet"))) - (let* ((formal-inputs (assoc-ref* cwl "inputs")) - ;; We need to resolve inputs after adding defaults since the - ;; default values may contain uninterned File objects. - (inputs (resolve-inputs (add-defaults inputs formal-inputs) - formal-inputs - store))) - (cond - ((string=? class "CommandLineTool") - (state-let* ((job-state - (run-command-line-tool name - manifest-file - channels - cwl - inputs - scratch - store - batch-system - #:guix-daemon-socket guix-daemon-socket))) - (state-return (command-line-tool-state job-state - (assoc-ref* cwl "outputs"))))) - ((string=? class "ExpressionTool") - (error "Workflow class not implemented yet" class)) - ((string=? class "Workflow") - (state-let* ((propnet-state - (schedule-propnet (workflow-class->propnet name - cwl - scheduler - batch-system) - inputs))) + (if (propnet? script-or-propnet) + (state-let* ((propnet-state (schedule-propnet script-or-propnet inputs))) (state-return (workflow-state propnet-state - (assoc-ref* cwl "outputs")))))))))) + (scheduler-proc-formal-outputs proc)))) + (let* ((formal-inputs (scheduler-proc-formal-inputs proc)) + ;; We need to resolve inputs after adding defaults since + ;; the default values may contain uninterned File objects. + (inputs (resolve-inputs (add-defaults inputs formal-inputs) + formal-inputs + store)) + (resource-requirement + (scheduler-proc-resource-requirement proc))) + (state-let* ((job-state + (run-command-line-tool name + script-or-propnet + inputs + resource-requirement + store + batch-system))) + (state-return + (command-line-tool-state job-state + (scheduler-proc-formal-outputs proc))))))))) (define (poll state) "Return updated state and current status of job @var{state} object as a @@ -341,7 +401,8 @@ state-monadic @code{<state+status>} object. The status is one of the symbols (case status ((failed) (raise-exception (job-failure - (job-state-script job-state)))) + (job-state-script job-state) + (job-state-inputs job-state)))) (else => identity))))))) ;; Poll sub-workflow state. We do not need to check the status here since ;; job failures only occur at the level of a CommandLineTool. @@ -414,16 +475,16 @@ is the class of the workflow." head-output)))))) (else ;; Log progress and return captured output. - (let ((script (job-state-script (command-line-tool-state-job-state state)))) + (let ((script (job-state-script (command-line-tool-state-job-state state))) + (inputs (job-state-inputs (command-line-tool-state-job-state state)))) (state-return (begin - (format (current-error-port) - "~a completed; logs at ~a and ~a~%" - script - (script->store-stdout-file script store) - (script->store-stderr-file script store)) + (log-info "~a completed; logs at ~a and ~a~%" + script + (step-store-stdout-file script inputs store) + (step-store-stderr-file script inputs store)) (filter-outputs "CommandLineTool" - (capture-command-line-tool-output script store) + (capture-command-line-tool-output script inputs store) (command-line-tool-state-formal-outputs state)))))))) (scheduler schedule poll capture-output)) @@ -455,8 +516,8 @@ files found into the @var{store} and return a tree of the fully resolved inputs. "Return @code{#t} if @var{secondary-file} matches at least one secondary file in @var{input}." (vector-any (lambda (candidate) - (string=? (store-item-name (assoc-ref* candidate "path")) - (secondary-path (store-item-name (assoc-ref* input "path")) + (string=? (basename (assoc-ref* candidate "path")) + (secondary-path (basename (assoc-ref* input "path")) secondary-file))) (or (assoc-ref input "secondaryFiles") (user-error "Missing secondaryFiles in input ~a" @@ -524,6 +585,39 @@ error out." formal-inputs)) formal-inputs)) +(define (call-with-inferior inferior proc) + "Call @var{proc} with @var{inferior} and return the return value of @var{proc}. +Close @var{inferior} when done, even if @var{proc} exits non-locally." + (dynamic-wind (const #t) + (cut proc inferior) + (cut close-inferior inferior))) + +(define* (build-workflow name cwl scheduler + manifest-file channels scratch store + batch-system + #:optional guix-daemon-socket) + "Build @var{cwl} workflow named @var{name} into a @code{<scheduler-proc>} object +scheduled using @var{scheduler}. When @var{guix-daemon-socket} is specified, +connect to the Guix daemon at that specific socket. Else, connect to the default +socket. + +@var{manifest-file}, @var{channels}, @var{scratch}, @var{store} and +@var{batch-system} are the same as in @code{run-workflow}." + (define builder + (cut workflow->scheduler-proc name cwl scheduler + manifest-file <> scratch store + batch-system <>)) + + (if guix-daemon-socket + (parameterize ((%daemon-socket-uri guix-daemon-socket)) + (build-workflow name cwl scheduler manifest-file channels + scratch store batch-system)) + (with-store guix-store + (if channels + (call-with-inferior (inferior-for-channels channels) + (cut builder <> guix-store)) + (builder #f guix-store))))) + (define* (run-workflow name manifest-file channels cwl inputs scratch store batch-system #:key guix-daemon-socket) @@ -537,18 +631,25 @@ area need not be shared. @var{store} is the path to the shared ravanan store. @var{guix-daemon-socket} is the Guix daemon socket to connect to." (guard (c ((job-failure? c) - (let ((script (job-failure-script c))) + (let ((script (job-failure-script c)) + (inputs (job-failure-inputs c))) (user-error "~a failed; logs at ~a and ~a~%" script - (script->store-stdout-file script store) - (script->store-stderr-file script store))))) - (let ((scheduler (workflow-scheduler - manifest-file channels scratch store batch-system - #:guix-daemon-socket guix-daemon-socket))) + (step-store-stdout-file script inputs store) + (step-store-stderr-file script inputs store))))) + (let ((scheduler (workflow-scheduler store batch-system))) (run-with-state (let loop ((mstate ((scheduler-schedule scheduler) - (scheduler-proc name cwl %nothing %nothing) + (build-workflow name + cwl + scheduler + manifest-file + channels + scratch + store + batch-system + guix-daemon-socket) inputs scheduler))) ;; Poll. diff --git a/tests/store.scm b/tests/store.scm new file mode 100644 index 0000000..f209583 --- /dev/null +++ b/tests/store.scm @@ -0,0 +1,80 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(use-modules (srfi srfi-64) + (ravanan store)) + +(test-begin "store") + +(test-equal "step-store-files-directory must be insensitive to order of inputs" + (step-store-files-directory "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-files-directory "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-data-file must be insensitive to order of inputs" + (step-store-data-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-data-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-stdout-file must be insensitive to order of inputs" + (step-store-stdout-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-stdout-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-equal "step-store-stderr-file must be insensitive to order of inputs" + (step-store-stderr-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("foobar" . 3) + ("bar" . (("aal" . 1) + ("vel" . 2)))) + "store") + (step-store-stderr-file "/gnu/store/hl96c0xd19ngvl8rf4cyw452rpgqsi1b-foo" + '(("foo" . 1) + ("bar" . (("vel" . 2) + ("aal" . 1))) + ("foobar" . 3)) + "store")) + +(test-end "store") |