summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2024-09-30 01:49:00 +0100
committerArun Isaac2024-10-01 01:12:51 +0100
commit0b7241bffd43ef3ce26734a020ca5dfe72dcc95a (patch)
tree8ff1120c0541fc205a7d91e9480a0232653b36bb
parent889dd94cfb1de3cdc98073a869e6cc80757850a5 (diff)
downloadravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.tar.gz
ravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.tar.lz
ravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.zip
command-line-tool: Move inputs resolution to (ravanan workflow).
We resolve inputs in run-workflow before doing anything else. We thus
avoid bugs due to partially or insufficiently resolved inputs.

* ravanan/command-line-tool.scm: Do not import (web uri).
(%store-files-directory, %store-data-directory,
%store-logs-directory): Export.
(location->path, resolve-inputs, intern-file): Move to (ravanan
workflow).
(build-command-line-tool-script): Do not call resolve-inputs.
* ravanan/workflow.scm: Import (web uri) and (ravanan work types).
(run-workflow): Call resolve-inputs.
-rw-r--r--ravanan/command-line-tool.scm141
-rw-r--r--ravanan/workflow.scm202
2 files changed, 173 insertions, 170 deletions
diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm
index 5cca5d0..8d1f8ef 100644
--- a/ravanan/command-line-tool.scm
+++ b/ravanan/command-line-tool.scm
@@ -27,7 +27,6 @@
   #:use-module (ice-9 format)
   #:use-module (ice-9 match)
   #:use-module (ice-9 popen)
-  #:use-module (web uri)
   #:use-module (gcrypt base16)
   #:use-module (gcrypt hash)
   #:use-module ((gnu packages gnupg) #:select (guile-gcrypt))
@@ -59,7 +58,11 @@
             %command-line-tool-supported-requirements
             script->store-stdout-file
             script->store-stderr-file
-            capture-command-line-tool-output))
+            capture-command-line-tool-output
+
+            %store-files-directory
+            %store-data-directory
+            %store-logs-directory))
 
 (define %store-files-directory
   "files")
@@ -297,119 +300,6 @@ G-expressions are inserted."
       (< (command-line-binding-position binding1)
          (command-line-binding-position binding2))))))
 
-(define (location->path location)
-  "Convert file @var{location} URI to path."
-  (if (string-prefix? "/" location)
-      ;; Sometimes location is actually a path. In that case, return as is.
-      location
-      ;; If location is an URI, parse the URI and return the path part.
-      (uri-path (string->uri location))))
-
-(define (resolve-inputs inputs formal-inputs store)
-  "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
-files found into the @var{store} and return a tree of the fully
-resolved inputs.
-
-The returned @code{File} type objects are updated with @code{basename},
-@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and
-store-interned paths in the @code{location} and @code{path} fields. The
-@code{basename} field contains the basename of the original path, and not the
-store-interned path."
-  (define (canonicalize-file-input input)
-    "Canonicalize @code{File} type @var{input} and its secondary files."
-    (let* ((path (or (and (assoc-ref input "location")
-                          (location->path (assoc-ref input "location")))
-                     (assoc-ref input "path")))
-           (interned-path (intern-file path store)))
-      (maybe-assoc-set input
-        (cons "location" (just interned-path))
-        (cons "path" (just interned-path))
-        (cons "basename" (just (basename path)))
-        (cons "nameroot" (just (file-name-stem path)))
-        (cons "nameext" (just (file-name-extension path)))
-        (cons "checksum" (just (checksum path)))
-        (cons "size" (just (stat:size (stat path))))
-        (cons "secondaryFiles"
-              (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
-                                                             "secondaryFiles")))
-                (just (vector-map canonicalize-file-input
-                                  secondary-files)))))))
-
-  (define (match-secondary-file-pattern input pattern)
-    "Return @code{#t} if secondary file @var{pattern} matches at least one secondary
-file in @var{input}."
-    ;; TODO: Implement caret characters in SecondaryFileSchema DSL.
-    (vector-any (lambda (secondary-file)
-                  (string=? (assoc-ref* secondary-file "path")
-                            (string-append (assoc-ref* input "path")
-                                           pattern)))
-                (or (assoc-ref input "secondaryFiles")
-                    (user-error "Missing secondaryFiles in input ~a"
-                                input))))
-
-  (define (check-secondary-files input secondary-files)
-    "Check if all required @var{secondary-files} are present in @var{input}. If not,
-error out."
-    (vector-for-each (lambda (secondary-file)
-                       (let ((pattern (assoc-ref* secondary-file "pattern")))
-                         (when (and (assoc-ref* secondary-file "required")
-                                    (not (match-secondary-file-pattern input pattern)))
-                           (user-error "Secondary file ~a missing in input ~a"
-                                       pattern
-                                       input))))
-                     secondary-files))
-
-  (define (resolve inputs types maybe-secondary-files)
-    (vector-map (lambda (input type-tree maybe-secondary-files)
-                  ;; Check type.
-                  (let* ((type (formal-parameter-type type-tree))
-                         (matched-type (match-type input type)))
-                    (unless matched-type
-                      (error input "Type mismatch" input type))
-                    ;; TODO: Implement record and enum types.
-                    (cond
-                     ;; Recurse over array types.
-                     ((array-type? matched-type)
-                      (resolve input
-                               (make-vector (vector-length input)
-                                            (assoc-ref type-tree "items"))
-                               (make-vector (vector-length input)
-                                            maybe-secondary-files)))
-                     ;; Intern File type inputs and fully resolve them.
-                     ((eq? matched-type 'File)
-                      (let ((resolved-input (canonicalize-file-input input)))
-                        ;; Ensure secondary files are provided with File type
-                        ;; inputs.
-                        (maybe-bind maybe-secondary-files
-                                    (cut check-secondary-files resolved-input <>))
-                        resolved-input))
-                     ;; Return other input types unchanged.
-                     (else input))))
-                inputs
-                types
-                maybe-secondary-files))
-  
-  (vector-map->list (lambda (input formal-input)
-                      (cons (assoc-ref formal-input "id")
-                            input))
-                    (resolve (vector-map (lambda (formal-input)
-                                           (let ((id (assoc-ref* formal-input "id")))
-                                             (or (assoc-ref inputs id)
-                                                 (assoc-ref formal-input "default")
-                                                 'null)))
-                                         formal-inputs)
-                             (vector-map (lambda (formal-input)
-                                           (let ((id (assoc-ref* formal-input "id")))
-                                             (or (assoc-ref formal-input "type")
-                                                 (user-error "Type of input ~a not specified"
-                                                             id))))
-                                         formal-inputs)
-                             (vector-map (lambda (formal-input)
-                                           (maybe-assoc-ref (just formal-input)
-                                                            "secondaryFiles"))
-                                         formal-inputs))
-                    formal-inputs))
-
 (define (command-line-binding->args binding)
   "Return a list of arguments for @var{binding}. The returned list may
 contain strings or G-expressions. The G-expressions may reference an
@@ -603,23 +493,6 @@ path."
     (call-with-input-file store-data-file
       json->scm)))
 
-(define (intern-file file store)
-  "Intern @var{file} into the ravanan @var{store} unless it is already a store
-path. Return the interned path."
-  (if (string-prefix? store file)
-      ;; If file is already a store path, return it as is.
-      file
-      ;; Else, intern it and return the interned path.
-      (let ((interned-path
-             (expand-file-name
-              (file-name-join* %store-files-directory
-                               (string-append (sha1-hash file)
-                                              "-"
-                                              (basename file)))
-              store)))
-        (copy-file file interned-path)
-        interned-path)))
-
 (define (copy-input-files-gexp inputs)
   "Return a G-expression that copies @code{File} type inputs (along with secondary
 files) from @var{inputs} into @code{inputs-directory} and return a new
@@ -1010,9 +883,7 @@ directory of the workflow."
 
               (call-with-temporary-directory
                (lambda (inputs-directory)
-                 (let ((inputs
-                        #$(copy-input-files-gexp
-                           (resolve-inputs inputs (assoc-ref* cwl "inputs") store)))
+                 (let ((inputs #$(copy-input-files-gexp inputs))
                        (runtime `(("cores" . ,(total-processor-count)))))
 
                    ;; Set environment defined by workflow.
diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm
index bbea430..015f8d5 100644
--- a/ravanan/workflow.scm
+++ b/ravanan/workflow.scm
@@ -25,12 +25,14 @@
   #:use-module (srfi srfi-71)
   #:use-module (ice-9 filesystem)
   #:use-module (ice-9 match)
+  #:use-module (web uri)
   #:use-module (ravanan command-line-tool)
   #:use-module (ravanan job-state)
   #:use-module (ravanan propnet)
   #:use-module (ravanan reader)
   #:use-module (ravanan work command-line-tool)
   #:use-module (ravanan work monads)
+  #:use-module (ravanan work types)
   #:use-module (ravanan work ui)
   #:use-module (ravanan work utils)
   #:use-module (ravanan work vectors)
@@ -356,6 +358,135 @@ exit if job has failed."
 
   (scheduler schedule poll capture-output))
 
+(define (location->path location)
+  "Convert file @var{location} URI to path."
+  (if (string-prefix? "/" location)
+      ;; Sometimes location is actually a path. In that case, return as is.
+      location
+      ;; If location is an URI, parse the URI and return the path part.
+      (uri-path (string->uri location))))
+
+(define (intern-file file store)
+  "Intern @var{file} into the ravanan @var{store} unless it is already a store
+path. Return the interned path."
+  (if (string-prefix? store file)
+      ;; If file is already a store path, return it as is.
+      file
+      ;; Else, intern it and return the interned path.
+      (let ((interned-path
+             (expand-file-name
+              (file-name-join* %store-files-directory
+                               (string-append (sha1-hash file)
+                                              "-"
+                                              (basename file)))
+              store)))
+        (copy-file file interned-path)
+        interned-path)))
+
+(define (resolve-inputs inputs formal-inputs store)
+  "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
+files found into the @var{store} and return a tree of the fully resolved inputs.
+
+The returned @code{File} type objects are updated with @code{basename},
+@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and
+store-interned paths in the @code{location} and @code{path} fields. The
+@code{basename} field contains the basename of the original path, and not the
+store-interned path."
+  (define (canonicalize-file-input input)
+    "Canonicalize @code{File} type @var{input} and its secondary files."
+    (let* ((path (or (and (assoc-ref input "location")
+                          (location->path (assoc-ref input "location")))
+                     (assoc-ref input "path")))
+           (interned-path (intern-file path store)))
+      (maybe-assoc-set input
+        (cons "location" (just interned-path))
+        (cons "path" (just interned-path))
+        (cons "basename" (just (basename path)))
+        (cons "nameroot" (just (file-name-stem path)))
+        (cons "nameext" (just (file-name-extension path)))
+        (cons "checksum" (just (checksum path)))
+        (cons "size" (just (stat:size (stat path))))
+        (cons "secondaryFiles"
+              (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
+                                                             "secondaryFiles")))
+                (just (vector-map canonicalize-file-input
+                                  secondary-files)))))))
+
+  (define (match-secondary-file-pattern input pattern)
+    "Return @code{#t} if secondary file @var{pattern} matches at least one secondary
+file in @var{input}."
+    ;; TODO: Implement caret characters in SecondaryFileSchema DSL.
+    (vector-any (lambda (secondary-file)
+                  (string=? (assoc-ref* secondary-file "path")
+                            (string-append (assoc-ref* input "path")
+                                           pattern)))
+                (or (assoc-ref input "secondaryFiles")
+                    (user-error "Missing secondaryFiles in input ~a"
+                                input))))
+
+  (define (check-secondary-files input secondary-files)
+    "Check if all required @var{secondary-files} are present in @var{input}. If not,
+error out."
+    (vector-for-each (lambda (secondary-file)
+                       (let ((pattern (assoc-ref* secondary-file "pattern")))
+                         (when (and (assoc-ref* secondary-file "required")
+                                    (not (match-secondary-file-pattern input pattern)))
+                           (user-error "Secondary file ~a missing in input ~a"
+                                       pattern
+                                       input))))
+                     secondary-files))
+
+  (define (resolve inputs types maybe-secondary-files)
+    (vector-map (lambda (input type-tree maybe-secondary-files)
+                  ;; Check type.
+                  (let* ((type (formal-parameter-type type-tree))
+                         (matched-type (match-type input type)))
+                    (unless matched-type
+                      (error input "Type mismatch" input type))
+                    ;; TODO: Implement record and enum types.
+                    (cond
+                     ;; Recurse over array types.
+                     ((array-type? matched-type)
+                      (resolve input
+                               (make-vector (vector-length input)
+                                            (assoc-ref type-tree "items"))
+                               (make-vector (vector-length input)
+                                            maybe-secondary-files)))
+                     ;; Intern File type inputs and fully resolve them.
+                     ((eq? matched-type 'File)
+                      (let ((resolved-input (canonicalize-file-input input)))
+                        ;; Ensure secondary files are provided with File type
+                        ;; inputs.
+                        (maybe-bind maybe-secondary-files
+                                    (cut check-secondary-files resolved-input <>))
+                        resolved-input))
+                     ;; Return other input types unchanged.
+                     (else input))))
+                inputs
+                types
+                maybe-secondary-files))
+
+  (vector-map->list (lambda (input formal-input)
+                      (cons (assoc-ref formal-input "id")
+                            input))
+                    (resolve (vector-map (lambda (formal-input)
+                                           (let ((id (assoc-ref* formal-input "id")))
+                                             (or (assoc-ref inputs id)
+                                                 (assoc-ref formal-input "default")
+                                                 'null)))
+                                         formal-inputs)
+                             (vector-map (lambda (formal-input)
+                                           (let ((id (assoc-ref* formal-input "id")))
+                                             (or (assoc-ref formal-input "type")
+                                                 (user-error "Type of input ~a not specified"
+                                                             id))))
+                                         formal-inputs)
+                             (vector-map (lambda (formal-input)
+                                           (maybe-assoc-ref (just formal-input)
+                                                            "secondaryFiles"))
+                                         formal-inputs))
+                    formal-inputs))
+
 (define* (run-workflow name manifest cwl inputs
                        scratch store batch-system
                        #:key guix-daemon-socket
@@ -399,38 +530,39 @@ authenticate to the slurm API with. @var{slurm-api-endpoint} and
        (else
         (error "output not found" output-id)))))
 
-  ;; Ensure required inputs are specified.
-  (vector-for-each (lambda (input)
-                     (let ((input-id (assoc-ref input "id")))
-                       (unless (or (optional-input? input)
-                                   (assoc input-id inputs))
-                         (user-error "Required input `~a' not specified"
-                                     input-id))))
-                   (assoc-ref cwl "inputs"))
-  (let loop ((state (schedule-propnet
-                     (propnet (workflow->propagators name cwl)
-                              value=?
-                              merge-values
-                              (workflow-scheduler
-                               manifest scratch store batch-system
-                               #:guix-daemon-socket guix-daemon-socket
-                               #:slurm-api-endpoint slurm-api-endpoint
-                               #:slurm-jwt slurm-jwt))
-                     inputs)))
-    ;; Poll.
-    (let ((status state (poll-propnet state)))
-      (if (eq? status 'pending)
-          (begin
-            ;; Pause before looping and polling again so we don't bother the job
-            ;; server too often.
-            (sleep (case batch-system
-                     ;; Single machine jobs are run synchronously. So, there is
-                     ;; no need to wait to poll them.
-                     ((single-machine) 0)
-                     ((slurm-api) %job-poll-interval)))
-            (loop state))
-          ;; Capture outputs.
-          (vector-filter-map->list (cute capture-output
-                                         (capture-propnet-output state)
-                                         <>)
-                                   (assoc-ref* cwl "outputs"))))))
+  (let ((inputs (resolve-inputs inputs (assoc-ref* cwl "inputs") store)))
+    ;; Ensure required inputs are specified.
+    (vector-for-each (lambda (input)
+                       (let ((input-id (assoc-ref input "id")))
+                         (unless (or (optional-input? input)
+                                     (assoc input-id inputs))
+                           (user-error "Required input `~a' not specified"
+                                       input-id))))
+                     (assoc-ref cwl "inputs"))
+    (let loop ((state (schedule-propnet
+                       (propnet (workflow->propagators name cwl)
+                                value=?
+                                merge-values
+                                (workflow-scheduler
+                                 manifest scratch store batch-system
+                                 #:guix-daemon-socket guix-daemon-socket
+                                 #:slurm-api-endpoint slurm-api-endpoint
+                                 #:slurm-jwt slurm-jwt))
+                       inputs)))
+      ;; Poll.
+      (let ((status state (poll-propnet state)))
+        (if (eq? status 'pending)
+            (begin
+              ;; Pause before looping and polling again so we don't bother the job
+              ;; server too often.
+              (sleep (case batch-system
+                       ;; Single machine jobs are run synchronously. So, there is
+                       ;; no need to wait to poll them.
+                       ((single-machine) 0)
+                       ((slurm-api) %job-poll-interval)))
+              (loop state))
+            ;; Capture outputs.
+            (vector-filter-map->list (cute capture-output
+                                           (capture-propnet-output state)
+                                           <>)
+                                     (assoc-ref* cwl "outputs")))))))