about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2025-03-18 17:46:19 +0000
committerArun Isaac2025-03-18 17:46:19 +0000
commit9be0612b4b486f7d96ddd71c156be9284eff30b2 (patch)
treeef2d7849d43c386196d1c772416e513e56d8bf9f
parent9f3079e84a368e757021735c5b500ae34b38e11b (diff)
downloadravanan-9be0612b4b486f7d96ddd71c156be9284eff30b2.tar.gz
ravanan-9be0612b4b486f7d96ddd71c156be9284eff30b2.tar.lz
ravanan-9be0612b4b486f7d96ddd71c156be9284eff30b2.zip
reader: Canonicalize File inputs completely in reader.
It is the reader's job to completely canonicalize File type inputs.
This should not be done later when the workflow is executing.

* ravanan/reader.scm: Import (web uri).
(normalize-formal-input): Normalize default value.
(location->path): New function.
(normalize-input): Canonicalize File type input completely.
* ravanan/workflow.scm (workflow-scheduler): Update comment about
partially specified File objects.
(location->path): Delete function.
(resolve-inputs): Do not canonicalize File type inputs; they already
are.
-rw-r--r--ravanan/reader.scm40
-rw-r--r--ravanan/workflow.scm46
2 files changed, 36 insertions, 50 deletions
diff --git a/ravanan/reader.scm b/ravanan/reader.scm
index 05dff6e..577ba97 100644
--- a/ravanan/reader.scm
+++ b/ravanan/reader.scm
@@ -21,6 +21,7 @@
   #:use-module (srfi srfi-26)
   #:use-module (ice-9 filesystem)
   #:use-module (ice-9 match)
+  #:use-module (web uri)
   #:use-module (json)
   #:use-module (yaml)
   #:use-module (ravanan work command-line-tool)
@@ -147,10 +148,9 @@ array of array of @code{File}s, etc. Else, return @code{#f}"
   "Normalize formal @var{input}."
   (if (some-file-type? (formal-parameter-type (assoc-ref input "type")))
       (maybe-assoc-set input
-        (cons (list "default" "location")
-              (maybe-let* ((location (maybe-assoc-ref (just input)
-                                                      "default" "location")))
-                (just (canonicalize-path location))))
+        (cons "default"
+              (maybe-bind (maybe-assoc-ref (just input) "default")
+                          normalize-input))
         (cons "secondaryFiles"
               (maybe-bind (maybe-assoc-ref (just input) "secondaryFiles")
                           (compose just
@@ -256,6 +256,15 @@ array of array of @code{File}s, etc. Else, return @code{#f}"
     (cut normalize-workflow
          (preprocess-include (read-yaml-file (basename workflow-file))))))
 
+(define (location->path location)
+  "Convert file @var{location} URI to path. Tolerate invalid locations that are
+actually paths."
+  (cond
+   ;; If location is an URI, parse the URI and return the path part.
+   ((string->uri location) => uri-path)
+   ;; location is actually a path; return as is.
+   (else location)))
+
 (define (normalize-input input)
   "Normalize actual @var{input}."
   (cond
@@ -264,9 +273,26 @@ array of array of @code{File}s, etc. Else, return @code{#f}"
                 input))
    ((eq? (object-type input)
          'File)
-    (assoc-set input
-      (cons "location"
-            (canonicalize-path (assoc-ref input "location")))))
+    (let ((path (canonicalize-path (cond
+                                    ((assoc-ref input "location") => location->path)
+                                    (else (assoc-ref input "path"))))))
+      (maybe-assoc-set input
+        (cons "basename" (just (basename path)))
+        (cons "nameroot" (just (file-name-stem path)))
+        (cons "nameext" (just (file-name-extension path)))
+        (cons "size" (just (stat:size (stat path))))
+        (cons "location" (just (uri->string (build-uri 'file #:path path))))
+        (cons "path" (just path))
+        ;; Compute the checksum, but only if it is not provided. If it is
+        ;; provided, trust that it is correct. This avoids costly (think hashing
+        ;; terabytes of data) hash computations causing a long delay before the
+        ;; workflow actually starts running.
+        (cons "checksum" (just (or (assoc-ref input "checksum")
+                                   (checksum path))))
+        (cons "secondaryFiles"
+              (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
+                                                             "secondaryFiles")))
+                (just (vector-map normalize-input secondary-files)))))))
    (else input)))
 
 (define (read-inputs inputs-file)
diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm
index a3339a3..3fc6ec6 100644
--- a/ravanan/workflow.scm
+++ b/ravanan/workflow.scm
@@ -283,7 +283,7 @@ object or a @code{<scheduler-proc>} object."
                     "Scatter method not implemented yet")))
           (let* ((formal-inputs (assoc-ref* cwl "inputs"))
                  ;; We need to resolve inputs after adding defaults since the
-                 ;; default values may contain partially specified File objects.
+                 ;; default values may contain uninterned File objects.
                  (inputs (resolve-inputs (add-defaults inputs formal-inputs)
                                          formal-inputs
                                          store)))
@@ -448,49 +448,9 @@ is the class of the workflow."
                                     (cons id value))))
                            formal-inputs))
 
-(define (location->path location)
-  "Convert file @var{location} URI to path."
-  (if (string-prefix? "/" location)
-      ;; Sometimes location is actually a path. In that case, return as is.
-      location
-      ;; If location is an URI, parse the URI and return the path part.
-      (uri-path (string->uri location))))
-
 (define (resolve-inputs inputs formal-inputs store)
   "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
-files found into the @var{store} and return a tree of the fully resolved inputs.
-
-The returned @code{File} type objects are updated with @code{basename},
-@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and
-store-interned paths in the @code{location} and @code{path} fields. The
-@code{basename} field contains the basename of the original path, and not the
-store-interned path."
-  (define (canonicalize-file-input input)
-    "Canonicalize @code{File} type @var{input} and its secondary files."
-    (let* ((path (or (and (assoc-ref input "location")
-                          (location->path (assoc-ref input "location")))
-                     (assoc-ref input "path")))
-           (interned-input
-            ;; Compute the checksum, but only if it is not provided. If it is
-            ;; provided, trust that it is correct. This avoids costly (think
-            ;; hashing terabytes of data) hash computations causing a long delay
-            ;; before the workflow actually starts running.
-            (intern-file (maybe-assoc-set input
-                           (cons "path" (just path))
-                           (cons "checksum" (just (or (assoc-ref input "checksum")
-                                                      (checksum path)))))
-                         store)))
-      (maybe-assoc-set interned-input
-        (cons "basename" (just (basename path)))
-        (cons "nameroot" (just (file-name-stem path)))
-        (cons "nameext" (just (file-name-extension path)))
-        (cons "size" (just (stat:size (stat path))))
-        (cons "secondaryFiles"
-              (maybe-let* ((secondary-files (maybe-assoc-ref (just interned-input)
-                                                             "secondaryFiles")))
-                (just (vector-map canonicalize-file-input
-                                  secondary-files)))))))
-
+files found into the @var{store} and return a tree of the fully resolved inputs."
   (define (match-secondary-file-pattern input pattern)
     "Return @code{#t} if secondary file @var{pattern} matches at least one secondary
 file in @var{input}."
@@ -533,7 +493,7 @@ error out."
                                             maybe-secondary-files)))
                      ;; Intern File type inputs and fully resolve them.
                      ((eq? matched-type 'File)
-                      (let ((resolved-input (canonicalize-file-input input)))
+                      (let ((resolved-input (intern-file input store)))
                         ;; Ensure secondary files are provided with File type
                         ;; inputs.
                         (maybe-bind maybe-secondary-files