diff options
-rw-r--r-- | ravanan/slurm-api.scm | 60 | ||||
-rw-r--r-- | tests/slurm-api.scm | 48 |
2 files changed, 88 insertions, 20 deletions
diff --git a/ravanan/slurm-api.scm b/ravanan/slurm-api.scm index 430b3c7..88f1de0 100644 --- a/ravanan/slurm-api.scm +++ b/ravanan/slurm-api.scm @@ -17,6 +17,7 @@ ;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. (define-module (ravanan slurm-api) + #:use-module ((rnrs base) #:select (assertion-violation)) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-71) @@ -116,6 +117,34 @@ state-monadic value." "job ~a assigned id ~a" name job-id) (state-return job-id))))) +(define (slurm-state-codes->job-state state-codes) + "Convert vector of @var{state-codes} from slurm into a @code{pending}, +@code{failed} or @code{completed} symbol." + ;; Slurm job state codes are documented at + ;; https://slurm.schedmd.com/job_state_codes.html + (let* ((base-state-codes (list 'boot_fail 'cancelled 'completed 'deadline + 'failed 'node_fail 'out_of_memory 'pending + 'preempted 'running 'suspended 'timeout)) + (base-states state-flags (partition (cut memq <> base-state-codes) + (map (compose string->symbol string-downcase) + (vector->list state-codes))))) + ;; TODO: Capture and report fine-grained job states, not merely 'failed, + ;; 'pending and 'completed. + (match base-states + (((or 'pending 'preempted 'running 'suspended)) + 'pending) + (((or 'boot_fail 'cancelled 'deadline 'failed 'node_fail 'out_of_memory)) + 'failed) + (('completed) + ;; If job has an additional COMPLETING flag, wait for it to fully + ;; complete. Mark it as pending until then. + (if (memq 'completing state-flags) + 'pending + 'completed)) + (_ + (assertion-violation base-states + "Multiple base states reported by slurm"))))) + (define* (job-state job-id #:key api-endpoint jwt) "Query the state of slurm @var{job-id} via @var{api-endpoint} authenticating using @var{jwt}. Return value is one of the symbols @@ -131,26 +160,17 @@ monad." (match (json-ref response "errors") (#() (state-return - (match (json-ref (find (lambda (job) - (= (json-ref job "job_id") - job-id)) - (vector->list (json-ref response "jobs"))) - "job_state") - (#(job-state) - (trace 'slurm-api - "slurmctld reports ~a state for job ~a" job-state job-id) - (let ((job-state - (case (string->symbol (string-downcase job-state)) - ;; slurm returns a PENDING state when the job has not yet - ;; been scheduled on a compute node, and RUNNING once it - ;; has been scheduled and is running. - ((pending running) 'pending) - ((completed) 'completed) - ((failed) 'failed) - (else (error "Unknown slurm job state" job-state))))) - (trace 'slurm-api - "return ~a state for job ~a" job-state job-id) - job-state))))) + (let ((state-codes (json-ref (find (lambda (job) + (= (json-ref job "job_id") + job-id)) + (vector->list (json-ref response "jobs"))) + "job_state"))) + (trace 'slurm-api + "slurmctld reports ~a state for job ~a" state-codes job-id) + (let ((job-state (slurm-state-codes->job-state state-codes))) + (trace 'slurm-api + "return ~a state for job ~a" job-state job-id) + job-state)))) (#(errors ...) ;; Check in slurmdbd if job has been completed and purged from ;; slurmctld's active memory. diff --git a/tests/slurm-api.scm b/tests/slurm-api.scm new file mode 100644 index 0000000..347c5c3 --- /dev/null +++ b/tests/slurm-api.scm @@ -0,0 +1,48 @@ +;;; ravanan --- High-reproducibility CWL runner powered by Guix +;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +;;; +;;; This file is part of ravanan. +;;; +;;; ravanan is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ravanan is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ravanan. If not, see <https://www.gnu.org/licenses/>. + +(use-modules (srfi srfi-64) + (ravanan slurm-api)) + +(define slurm-state-codes->job-state + (@@ (ravanan slurm-api) + slurm-state-codes->job-state)) + +(test-begin "slurm-api") + +(test-equal "Handle PENDING job state" + 'pending + (slurm-state-codes->job-state #("PENDING"))) + +(test-equal "Handle RUNNING job state" + 'pending + (slurm-state-codes->job-state #("RUNNING"))) + +(test-equal "Handle FAILED job state" + 'failed + (slurm-state-codes->job-state #("FAILED"))) + +(test-equal "Handle COMPLETED job state" + 'completed + (slurm-state-codes->job-state #("COMPLETED"))) + +(test-equal "Handle COMPLETED job state with additional COMPLETING flag" + 'pending + (slurm-state-codes->job-state #("COMPLETED" "COMPLETING"))) + +(test-end "slurm-api") |