aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2024-10-09 22:50:24 +0100
committerArun Isaac2024-10-09 23:17:17 +0100
commit5d06ccc6b27dd213871ff8cede2d44aed2cae373 (patch)
treefc60cc018199076adfe338bc1d156b4a2051925a
parent1f2b6aca9b85d4b41d10d75584a65345a4f2a57e (diff)
downloadravanan-5d06ccc6b27dd213871ff8cede2d44aed2cae373.tar.gz
ravanan-5d06ccc6b27dd213871ff8cede2d44aed2cae373.tar.lz
ravanan-5d06ccc6b27dd213871ff8cede2d44aed2cae373.zip
batch-system: Introduce <slurm-api-batch-system> record type.
A <slurm-api-batch-system> record type allows us an easy way to group and pass along parameters required by the slurm-api batch system. * ravanan/batch-system.scm: New file. * bin/ravanan: Import (ravanan batch-system). (main): Create <slurm-api-batch-system> object for batch-system argument. Do not pass #:slurm-api-endpoint and #:slurm-jwt arguments. * ravanan/command-line-tool.scm: Import (ravanan batch-system). (command-line-tool-supported-requirements): Check for slurm-api batch system using slurm-api-batch-system?. (run-command-line-tool): Check for slurm-api batch system using slurm-api-batch-system?. Do not accept #:slurm-api-endpoint and #:slurm-jwt arguments. * ravanan/workflow.scm: Import (ravanan batch-system). (workflow-scheduler): Do not accept #:slurm-api-endpoint and #:slurm-jwt arguments. Pass batch-system to job-state-status. (run-workflow): Check for slurm-api batch system using slurm-api-batch-system?. Do not accept #:slurm-api-endpoint and #:slurm-jwt arguments. * ravanan/job-state.scm: Import (ravanan batch-system). (job-state-status): Do not accept #:slurm-api-endpoint and #:slurm-jwt arguments. Accept batch-system argument.
-rwxr-xr-xbin/ravanan14
-rw-r--r--ravanan/batch-system.scm30
-rw-r--r--ravanan/command-line-tool.scm31
-rw-r--r--ravanan/job-state.scm19
-rw-r--r--ravanan/workflow.scm42
5 files changed, 77 insertions, 59 deletions
diff --git a/bin/ravanan b/bin/ravanan
index cc9005d..5807a8e 100755
--- a/bin/ravanan
+++ b/bin/ravanan
@@ -27,6 +27,7 @@ exec guile --no-auto-compile -e main -s "$0" "$@"
(ice-9 match)
(web uri)
(json)
+ (ravanan batch-system)
(ravanan reader)
(ravanan utils)
(ravanan workflow))
@@ -145,11 +146,14 @@ files that have the token in the @verbatim{SLURM_JWT=token} format."
(if (file-name-absolute? (assq-ref args 'store))
(assq-ref args 'store)
(canonicalize-path (assq-ref args 'store)))
- (assq-ref args 'batch-system)
- #:guix-daemon-socket (assq-ref args 'guix-daemon-socket)
- #:slurm-api-endpoint (assq-ref args 'slurm-api-endpoint)
- #:slurm-jwt (and (assq-ref args 'slurm-jwt)
- (read-jwt (assq-ref args 'slurm-jwt))))
+ (case (assq-ref args 'batch-system)
+ ((single-machine) 'single-machine)
+ ((slurm-api)
+ (slurm-api-batch-system
+ (assq-ref args 'slurm-api-endpoint)
+ (and (assq-ref args 'slurm-jwt)
+ (read-jwt (assq-ref args 'slurm-jwt))))))
+ #:guix-daemon-socket (assq-ref args 'guix-daemon-socket))
(current-output-port)
#:pretty #t)
(newline (current-output-port))))))))
diff --git a/ravanan/batch-system.scm b/ravanan/batch-system.scm
new file mode 100644
index 0000000..726bff8
--- /dev/null
+++ b/ravanan/batch-system.scm
@@ -0,0 +1,30 @@
+;;; ravanan --- High-reproducibility CWL runner powered by Guix
+;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of ravanan.
+;;;
+;;; ravanan is free software: you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; ravanan is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (ravanan batch-system)
+ #:use-module (srfi srfi-9 gnu)
+ #:export (slurm-api-batch-system
+ slurm-api-batch-system?
+ slurm-api-batch-system-endpoint
+ slurm-api-batch-system-jwt))
+
+(define-immutable-record-type <slurm-api-batch-system>
+ (slurm-api-batch-system endpoint jwt)
+ slurm-api-batch-system?
+ (endpoint slurm-api-batch-system-endpoint)
+ (jwt slurm-api-batch-system-jwt))
diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm
index 6225595..4254df9 100644
--- a/ravanan/command-line-tool.scm
+++ b/ravanan/command-line-tool.scm
@@ -35,6 +35,7 @@
#:use-module (guix search-paths)
#:use-module (guix store)
#:use-module (json)
+ #:use-module (ravanan batch-system)
#:use-module (ravanan javascript)
#:use-module (ravanan job-state)
#:use-module (ravanan reader)
@@ -75,11 +76,11 @@
"ResourceRequirement"))
(define (command-line-tool-supported-requirements batch-system)
- (case batch-system
- ((single-machine)
+ (cond
+ ((eq? batch-system 'single-machine)
(delete "ResourceRequirement"
%command-line-tool-supported-requirements))
- ((slurm-api)
+ ((slurm-api-batch-system? batch-system)
%command-line-tool-supported-requirements)
(else
(assertion-violation batch-system "Unknown batch system"))))
@@ -370,14 +371,12 @@ path."
(define* (run-command-line-tool name manifest-file cwl inputs
scratch store batch-system
- #:key guix-daemon-socket
- slurm-api-endpoint slurm-jwt)
+ #:key guix-daemon-socket)
"Run @code{CommandLineTool} class workflow @var{cwl} named @var{name} with
@var{inputs} using tools from Guix manifest in @var{manifest-file}.
-@var{scratch}, @var{store}, @var{batch-system}, @var{guix-daemon-socket},
-@var{slurm-api-endpoint} and @var{slurm-jwt} are the same as in
-@code{run-workflow} from @code{(ravanan workflow)}."
+@var{scratch}, @var{store}, @var{batch-system} and @var{guix-daemon-socket} are
+the same as in @code{run-workflow} from @code{(ravanan workflow)}."
;; TODO: Write to the store atomically.
(let* ((script
(build-command-line-tool-script name manifest-file cwl inputs
@@ -422,8 +421,8 @@ path."
(when (file-exists? store-files-directory)
(delete-file-recursively store-files-directory))
(mkdir store-files-directory)
- (case batch-system
- ((single-machine)
+ (cond
+ ((eq? batch-system 'single-machine)
(setenv "WORKFLOW_OUTPUT_DIRECTORY" store-files-directory)
(setenv "WORKFLOW_OUTPUT_DATA_FILE" store-data-file)
(format (current-error-port)
@@ -434,7 +433,7 @@ path."
(lambda ()
(with-error-to-file stderr-file
(cut system* script)))))))
- ((slurm-api)
+ ((slurm-api-batch-system? batch-system)
(format (current-error-port)
"Submitting job ~a~%"
script)
@@ -447,8 +446,8 @@ path."
cpus
name
script
- #:api-endpoint slurm-api-endpoint
- #:jwt slurm-jwt)))
+ #:api-endpoint (slurm-api-batch-system-endpoint batch-system)
+ #:jwt (slurm-api-batch-system-jwt batch-system))))
(format (current-error-port)
"~a submitted as job ID ~a~%"
script
@@ -610,10 +609,10 @@ named @var{name} with @var{inputs} using tools from Guix manifest in
"listing")))
(define (cores batch-system)
- (case batch-system
- ((slurm-api)
+ (cond
+ ((slurm-api-batch-system? batch-system)
#~(string->number (getenv "SLURM_CPUS_ON_NODE")))
- ((single-machine)
+ ((eq? batch-system 'single-machine)
#~(total-processor-count))
(else
(assertion-violation batch-system "Unknown batch system"))))
diff --git a/ravanan/job-state.scm b/ravanan/job-state.scm
index 88ed4b7..834a5bd 100644
--- a/ravanan/job-state.scm
+++ b/ravanan/job-state.scm
@@ -26,6 +26,7 @@
(define-module (ravanan job-state)
#:use-module (srfi srfi-9 gnu)
+ #:use-module (ravanan batch-system)
#:use-module (ravanan slurm-api)
#:use-module (ravanan work vectors)
#:export (single-machine-job-state
@@ -54,12 +55,10 @@
slurm-job-state-script))
state))
-(define* (job-state-status state #:key slurm-api-endpoint slurm-jwt)
- "Return current status and updated state of job with @var{state}. The status is
-one of the symbols @code{completed}, @code{failed} or @code{pending}.
-
-@var{slurm-api-endpoint} and @var{slurm-jwt} are the same as in
-@code{run-workflow} from @code{(ravanan workflow)}."
+(define* (job-state-status state batch-system)
+ "Return current status and updated state of job with @var{state} on
+@var{batch-system}. The status is one of the symbols @code{completed},
+@code{failed} or @code{pending}."
(values (cond
;; Single machine jobs are run synchronously. So, they return success
;; or failure immediately.
@@ -70,15 +69,13 @@ one of the symbols @code{completed}, @code{failed} or @code{pending}.
;; Poll slurm for job state.
((slurm-job-state? state)
(job-state (slurm-job-state-job-id state)
- #:api-endpoint slurm-api-endpoint
- #:jwt slurm-jwt))
+ #:api-endpoint (slurm-api-batch-system-endpoint batch-system)
+ #:jwt (slurm-api-batch-system-jwt batch-system)))
;; For vector states, poll each state element and return 'completed
;; only if all state elements have completed.
((vector? state)
(or (vector-every (lambda (state-element)
- (case (job-state-status state-element
- #:slurm-api-endpoint slurm-api-endpoint
- #:slurm-jwt slurm-jwt)
+ (case (job-state-status state-element batch-system)
((completed) => identity)
(else #f)))
state)
diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm
index 95dfb73..61a1297 100644
--- a/ravanan/workflow.scm
+++ b/ravanan/workflow.scm
@@ -27,6 +27,7 @@
#:use-module (ice-9 filesystem)
#:use-module (ice-9 match)
#:use-module (web uri)
+ #:use-module (ravanan batch-system)
#:use-module (ravanan command-line-tool)
#:use-module (ravanan job-state)
#:use-module (ravanan propnet)
@@ -246,8 +247,7 @@ propagator."
scheduler))
(define* (workflow-scheduler manifest-file scratch store batch-system
- #:key guix-daemon-socket
- slurm-api-endpoint slurm-jwt)
+ #:key guix-daemon-socket)
(define (schedule proc inputs scheduler)
"Schedule @var{proc} with inputs from the @var{inputs} association list. Return a
job state object. @var{proc} may either be a @code{<propnet>} object or a
@@ -296,9 +296,7 @@ job state object. @var{proc} may either be a @code{<propnet>} object or a
scratch
store
batch-system
- #:guix-daemon-socket guix-daemon-socket
- #:slurm-api-endpoint slurm-api-endpoint
- #:slurm-jwt slurm-jwt)
+ #:guix-daemon-socket guix-daemon-socket)
(assoc-ref* cwl "outputs")))
((string=? class "ExpressionTool")
(error "Workflow class not implemented yet" class))
@@ -335,8 +333,7 @@ exit if job has failed."
((command-line-tool-state? state)
(let ((status updated-job-state
(job-state-status (command-line-tool-state-job-state state)
- #:slurm-api-endpoint slurm-api-endpoint
- #:slurm-jwt slurm-jwt)))
+ batch-system)))
(values (case status
((failed)
(raise-exception (job-failure
@@ -572,29 +569,18 @@ error out."
(define* (run-workflow name manifest-file cwl inputs
scratch store batch-system
- #:key guix-daemon-socket
- slurm-api-endpoint slurm-jwt)
+ #:key guix-daemon-socket)
"Run a workflow @var{cwl} named @var{name} with @var{inputs} using
tools from Guix manifest in @var{manifest-file}.
-@var{scratch} is the path to the scratch area on all worker nodes. The
-scratch area need not be shared. @var{store} is the path to the shared
-ravanan store. @var{batch-system} is a symbol representing one of the
-supported batch systems (either @code{'single-machine} or
-@code{'slurm-api}).
+@var{scratch} is the path to the scratch area on all worker nodes. The scratch
+area need not be shared. @var{store} is the path to the shared ravanan store.
+@var{batch-system} is an object representing one of the supported batch systems.
-@var{guix-daemon-socket} is the Guix daemon socket to connect to.
-
-@var{slurm-api-endpoint}, a @code{<uri>} object, is the slurm API
-endpoint to connect to. @var{slurm-jwt}, a string, is the JWT token to
-authenticate to the slurm API with. @var{slurm-api-endpoint} and
-@var{slurm-jwt} are only used when @var{batch-system} is
-@code{'slurm-api}."
+@var{guix-daemon-socket} is the Guix daemon socket to connect to."
(let ((scheduler (workflow-scheduler
manifest-file scratch store batch-system
- #:guix-daemon-socket guix-daemon-socket
- #:slurm-api-endpoint slurm-api-endpoint
- #:slurm-jwt slurm-jwt)))
+ #:guix-daemon-socket guix-daemon-socket)))
(let loop ((state ((scheduler-schedule scheduler)
(scheduler-proc name cwl %nothing %nothing)
inputs
@@ -605,11 +591,13 @@ authenticate to the slurm API with. @var{slurm-api-endpoint} and
(begin
;; Pause before looping and polling again so we don't bother the
;; job server too often.
- (sleep (case batch-system
+ (sleep (cond
;; Single machine jobs are run synchronously. So, there
;; is no need to wait to poll them.
- ((single-machine) 0)
- ((slurm-api) %job-poll-interval)))
+ ((eq? batch-system 'single-machine)
+ 0)
+ ((slurm-api-batch-system? batch-system)
+ %job-poll-interval)))
(loop state))
;; Capture outputs.
((scheduler-capture-output scheduler) state))))))