summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2024-10-09 23:45:18 +0100
committerArun Isaac2024-10-10 02:31:24 +0100
commit99dc8a02214d423ec666dd37bf69b89fedaf0443 (patch)
treeb9d34ff2e40fab684af43d7afc07620fecc457c9
parent552919ee626a0c8ca3d6c43f7e771803d475d0db (diff)
downloadravanan-99dc8a02214d423ec666dd37bf69b89fedaf0443.tar.gz
ravanan-99dc8a02214d423ec666dd37bf69b89fedaf0443.tar.lz
ravanan-99dc8a02214d423ec666dd37bf69b89fedaf0443.zip
workflow: Recompute checksum only if it is not provided.
* ravanan/workflow.scm (intern-file): Accept and return File type
value, and recompute the SHA1 hash only if it is not provided in the
checksum field.
(resolve-inputs): Recompute checksum only if it is not provided.
Update call to intern-file.
-rw-r--r--ravanan/workflow.scm59
1 files changed, 38 insertions, 21 deletions
diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm
index 61a1297..1656e1e 100644
--- a/ravanan/workflow.scm
+++ b/ravanan/workflow.scm
@@ -447,21 +447,32 @@ is the class of the workflow."
       (uri-path (string->uri location))))
 
 (define (intern-file file store)
-  "Intern @var{file} into the ravanan @var{store} unless it is already a store
-path. Return the interned path."
-  (if (string-prefix? store file)
-      ;; If file is already a store path, return it as is.
-      file
-      ;; Else, intern it and return the interned path.
-      (let ((interned-path
-             (expand-file-name
-              (file-name-join* %store-files-directory
-                               (string-append (sha1-hash file)
-                                              "-"
-                                              (basename file)))
-              store)))
-        (copy-file file interned-path)
-        interned-path)))
+  "Intern @code{File} type object @var{file} into the ravanan @var{store} unless it
+is already a store path. Return an updated @code{File} type object with the
+interned path and location."
+  (let* ((path (assoc-ref file "path"))
+         (checksum (assoc-ref file "checksum"))
+         (sha1 (if (and checksum
+                        (string-prefix? "sha1$" checksum))
+                   (string-drop checksum (string-length "sha1$"))
+                   (sha1-hash path)))
+         (interned-path
+          (if (string-prefix? store path)
+              ;; If file is already a store path, return it as is.
+              path
+              ;; Else, intern it and return the interned path.
+              (let ((interned-path
+                     (expand-file-name
+                      (file-name-join* %store-files-directory
+                                       (string-append sha1
+                                                      "-"
+                                                      (basename path)))
+                      store)))
+                (copy-file path interned-path)
+                interned-path))))
+    (maybe-assoc-set file
+      (cons "location" (just (string-append "file://" interned-path)))
+      (cons "path" (just interned-path)))))
 
 (define (resolve-inputs inputs formal-inputs store)
   "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
@@ -477,17 +488,23 @@ store-interned path."
     (let* ((path (or (and (assoc-ref input "location")
                           (location->path (assoc-ref input "location")))
                      (assoc-ref input "path")))
-           (interned-path (intern-file path store)))
-      (maybe-assoc-set input
-        (cons "location" (just interned-path))
-        (cons "path" (just interned-path))
+           (interned-input
+            ;; Compute the checksum, but only if it is not provided. If it is
+            ;; provided, trust that it is correct. This avoids costly (think
+            ;; hashing terabytes of data) hash computations causing a long delay
+            ;; before the workflow actually starts running.
+            (intern-file (maybe-assoc-set input
+                           (cons "path" (just path))
+                           (cons "checksum" (just (or (assoc-ref input "checksum")
+                                                      (checksum path)))))
+                         store)))
+      (maybe-assoc-set interned-input
         (cons "basename" (just (basename path)))
         (cons "nameroot" (just (file-name-stem path)))
         (cons "nameext" (just (file-name-extension path)))
-        (cons "checksum" (just (checksum path)))
         (cons "size" (just (stat:size (stat path))))
         (cons "secondaryFiles"
-              (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
+              (maybe-let* ((secondary-files (maybe-assoc-ref (just interned-input)
                                                              "secondaryFiles")))
                 (just (vector-map canonicalize-file-input
                                   secondary-files)))))))