summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2024-09-06 14:26:48 +0100
committerArun Isaac2024-09-06 14:26:48 +0100
commitadd78e89ce9ec0eb309b900e0199454db1b6420f (patch)
treeee43543db6f9400a4a9c65088dbde1add5037283
parentde5b44f066985fd1dadcc81d02919f213bdaf5c6 (diff)
downloadravanan-add78e89ce9ec0eb309b900e0199454db1b6420f.tar.gz
ravanan-add78e89ce9ec0eb309b900e0199454db1b6420f.tar.lz
ravanan-add78e89ce9ec0eb309b900e0199454db1b6420f.zip
command-line-tool: Refactor scheduler into separate named functions.
* ravanan/command-line-tool.scm (command-line-tool-scheduler):
Refactor scheduler into separate named functions.
-rw-r--r--ravanan/command-line-tool.scm110
1 files changed, 63 insertions, 47 deletions
diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm
index efa953e..c943881 100644
--- a/ravanan/command-line-tool.scm
+++ b/ravanan/command-line-tool.scm
@@ -939,53 +939,69 @@ named @var{name} with @var{inputs} using tools from Guix manifest
 (define* (command-line-tool-scheduler manifest scratch store batch-system
                                       #:key guix-daemon-socket
                                       slurm-api-endpoint slurm-jwt)
-  (scheduler (match-lambda*
-               ((proc inputs)
-                (let ((name (scheduler-proc-name proc))
-                      (cwl (scheduler-proc-cwl proc)))
-                  (run-command-line-tool name
-                                         manifest
-                                         cwl
-                                         inputs
-                                         scratch
-                                         store
-                                         batch-system
-                                         #:guix-daemon-socket guix-daemon-socket
-                                         #:slurm-api-endpoint slurm-api-endpoint
-                                         #:slurm-jwt slurm-jwt))))
-             (lambda (state)
-               (guard (c ((job-failure? c)
-                          (let ((script (job-failure-script c)))
-                            (error
-                             "~a failed; logs at ~a and ~a~%"
-                             script
-                             (script->store-stdout-file script store)
-                             (script->store-stderr-file script store)))))
-                 (cond
-                  ((single-machine-job-state? state)
-                   (if (single-machine-job-state-success? state)
-                       'completed
-                       (raise-exception
-                        (job-failure (single-machine-job-state-script state)))))
-                  ((slurm-job-state? state)
-                   (case (job-state (slurm-job-state-job-id state)
-                                    #:api-endpoint slurm-api-endpoint
-                                    #:jwt slurm-jwt)
-                     ((failed)
-                      (raise-exception
-                       (job-failure (slurm-job-state-script state))))
-                     (else => identity))))))
+  (define (schedule proc inputs)
+    "Schedule @var{proc} with inputs from the @var{inputs} association list. Return a
+job state object."
+    (let ((name (scheduler-proc-name proc))
+          (cwl (scheduler-proc-cwl proc)))
+      (run-command-line-tool name
+                             manifest
+                             cwl
+                             inputs
+                             scratch
+                             store
+                             batch-system
+                             #:guix-daemon-socket guix-daemon-socket
+                             #:slurm-api-endpoint slurm-api-endpoint
+                             #:slurm-jwt slurm-jwt)))
+
+  (define (poll state)
+    "Return current status of job @var{state} object---one of the symbols
+@code{pending} or @code{completed}. Raise an exception and exit if job has
+failed."
+    (guard (c ((job-failure? c)
+               (let ((script (job-failure-script c)))
+                 (error
+                  "~a failed; logs at ~a and ~a~%"
+                  script
+                  (script->store-stdout-file script store)
+                  (script->store-stderr-file script store)))))
+      (cond
+       ;; Single machine jobs are run synchronously. So, they return success or
+       ;; failure immediately.
+       ((single-machine-job-state? state)
+        (if (single-machine-job-state-success? state)
+            'completed
+            (raise-exception
+             (job-failure (single-machine-job-state-script state)))))
+       ;; Poll slurm for job state.
+       ((slurm-job-state? state)
+        (case (job-state (slurm-job-state-job-id state)
+                         #:api-endpoint slurm-api-endpoint
+                         #:jwt slurm-jwt)
+          ((failed)
+           (raise-exception
+            (job-failure (slurm-job-state-script state))))
+          (else => identity))))))
+
+  (define (capture-output state)
+    "Return output of completed job @var{state}."
+    (let ((script ((case batch-system
+                     ((single-machine) single-machine-job-state-script)
+                     ((slurm-api) slurm-job-state-script))
+                   state)))
+      (format (current-error-port)
+              "~a completed; logs at ~a and ~a~%"
+              script
+              (script->store-stdout-file script store)
+              (script->store-stderr-file script store))
+      (capture-command-line-tool-output script store)))
+
+  (scheduler schedule
+             poll
              (case batch-system
+               ;; Single machine jobs are run synchronously. So, there is no
+               ;; need to wait to poll them.
                ((single-machine) 0)
                ((slurm-api) %job-poll-interval))
-             (lambda (state)
-               (let ((script ((case batch-system
-                                ((single-machine) single-machine-job-state-script)
-                                ((slurm-api) slurm-job-state-script))
-                              state)))
-                 (format (current-error-port)
-                         "~a completed; logs at ~a and ~a~%"
-                         script
-                         (script->store-stdout-file script store)
-                         (script->store-stderr-file script store))
-                 (capture-command-line-tool-output script store)))))
+             capture-output))