aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2024-09-30 01:49:00 +0100
committerArun Isaac2024-10-01 01:12:51 +0100
commit0b7241bffd43ef3ce26734a020ca5dfe72dcc95a (patch)
tree8ff1120c0541fc205a7d91e9480a0232653b36bb
parent889dd94cfb1de3cdc98073a869e6cc80757850a5 (diff)
downloadravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.tar.gz
ravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.tar.lz
ravanan-0b7241bffd43ef3ce26734a020ca5dfe72dcc95a.zip
command-line-tool: Move inputs resolution to (ravanan workflow).
We resolve inputs in run-workflow before doing anything else. We thus avoid bugs due to partially or insufficiently resolved inputs. * ravanan/command-line-tool.scm: Do not import (web uri). (%store-files-directory, %store-data-directory, %store-logs-directory): Export. (location->path, resolve-inputs, intern-file): Move to (ravanan workflow). (build-command-line-tool-script): Do not call resolve-inputs. * ravanan/workflow.scm: Import (web uri) and (ravanan work types). (run-workflow): Call resolve-inputs.
-rw-r--r--ravanan/command-line-tool.scm141
-rw-r--r--ravanan/workflow.scm202
2 files changed, 173 insertions, 170 deletions
diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm
index 5cca5d0..8d1f8ef 100644
--- a/ravanan/command-line-tool.scm
+++ b/ravanan/command-line-tool.scm
@@ -27,7 +27,6 @@
#:use-module (ice-9 format)
#:use-module (ice-9 match)
#:use-module (ice-9 popen)
- #:use-module (web uri)
#:use-module (gcrypt base16)
#:use-module (gcrypt hash)
#:use-module ((gnu packages gnupg) #:select (guile-gcrypt))
@@ -59,7 +58,11 @@
%command-line-tool-supported-requirements
script->store-stdout-file
script->store-stderr-file
- capture-command-line-tool-output))
+ capture-command-line-tool-output
+
+ %store-files-directory
+ %store-data-directory
+ %store-logs-directory))
(define %store-files-directory
"files")
@@ -297,119 +300,6 @@ G-expressions are inserted."
(< (command-line-binding-position binding1)
(command-line-binding-position binding2))))))
-(define (location->path location)
- "Convert file @var{location} URI to path."
- (if (string-prefix? "/" location)
- ;; Sometimes location is actually a path. In that case, return as is.
- location
- ;; If location is an URI, parse the URI and return the path part.
- (uri-path (string->uri location))))
-
-(define (resolve-inputs inputs formal-inputs store)
- "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
-files found into the @var{store} and return a tree of the fully
-resolved inputs.
-
-The returned @code{File} type objects are updated with @code{basename},
-@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and
-store-interned paths in the @code{location} and @code{path} fields. The
-@code{basename} field contains the basename of the original path, and not the
-store-interned path."
- (define (canonicalize-file-input input)
- "Canonicalize @code{File} type @var{input} and its secondary files."
- (let* ((path (or (and (assoc-ref input "location")
- (location->path (assoc-ref input "location")))
- (assoc-ref input "path")))
- (interned-path (intern-file path store)))
- (maybe-assoc-set input
- (cons "location" (just interned-path))
- (cons "path" (just interned-path))
- (cons "basename" (just (basename path)))
- (cons "nameroot" (just (file-name-stem path)))
- (cons "nameext" (just (file-name-extension path)))
- (cons "checksum" (just (checksum path)))
- (cons "size" (just (stat:size (stat path))))
- (cons "secondaryFiles"
- (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
- "secondaryFiles")))
- (just (vector-map canonicalize-file-input
- secondary-files)))))))
-
- (define (match-secondary-file-pattern input pattern)
- "Return @code{#t} if secondary file @var{pattern} matches at least one secondary
-file in @var{input}."
- ;; TODO: Implement caret characters in SecondaryFileSchema DSL.
- (vector-any (lambda (secondary-file)
- (string=? (assoc-ref* secondary-file "path")
- (string-append (assoc-ref* input "path")
- pattern)))
- (or (assoc-ref input "secondaryFiles")
- (user-error "Missing secondaryFiles in input ~a"
- input))))
-
- (define (check-secondary-files input secondary-files)
- "Check if all required @var{secondary-files} are present in @var{input}. If not,
-error out."
- (vector-for-each (lambda (secondary-file)
- (let ((pattern (assoc-ref* secondary-file "pattern")))
- (when (and (assoc-ref* secondary-file "required")
- (not (match-secondary-file-pattern input pattern)))
- (user-error "Secondary file ~a missing in input ~a"
- pattern
- input))))
- secondary-files))
-
- (define (resolve inputs types maybe-secondary-files)
- (vector-map (lambda (input type-tree maybe-secondary-files)
- ;; Check type.
- (let* ((type (formal-parameter-type type-tree))
- (matched-type (match-type input type)))
- (unless matched-type
- (error input "Type mismatch" input type))
- ;; TODO: Implement record and enum types.
- (cond
- ;; Recurse over array types.
- ((array-type? matched-type)
- (resolve input
- (make-vector (vector-length input)
- (assoc-ref type-tree "items"))
- (make-vector (vector-length input)
- maybe-secondary-files)))
- ;; Intern File type inputs and fully resolve them.
- ((eq? matched-type 'File)
- (let ((resolved-input (canonicalize-file-input input)))
- ;; Ensure secondary files are provided with File type
- ;; inputs.
- (maybe-bind maybe-secondary-files
- (cut check-secondary-files resolved-input <>))
- resolved-input))
- ;; Return other input types unchanged.
- (else input))))
- inputs
- types
- maybe-secondary-files))
-
- (vector-map->list (lambda (input formal-input)
- (cons (assoc-ref formal-input "id")
- input))
- (resolve (vector-map (lambda (formal-input)
- (let ((id (assoc-ref* formal-input "id")))
- (or (assoc-ref inputs id)
- (assoc-ref formal-input "default")
- 'null)))
- formal-inputs)
- (vector-map (lambda (formal-input)
- (let ((id (assoc-ref* formal-input "id")))
- (or (assoc-ref formal-input "type")
- (user-error "Type of input ~a not specified"
- id))))
- formal-inputs)
- (vector-map (lambda (formal-input)
- (maybe-assoc-ref (just formal-input)
- "secondaryFiles"))
- formal-inputs))
- formal-inputs))
-
(define (command-line-binding->args binding)
"Return a list of arguments for @var{binding}. The returned list may
contain strings or G-expressions. The G-expressions may reference an
@@ -603,23 +493,6 @@ path."
(call-with-input-file store-data-file
json->scm)))
-(define (intern-file file store)
- "Intern @var{file} into the ravanan @var{store} unless it is already a store
-path. Return the interned path."
- (if (string-prefix? store file)
- ;; If file is already a store path, return it as is.
- file
- ;; Else, intern it and return the interned path.
- (let ((interned-path
- (expand-file-name
- (file-name-join* %store-files-directory
- (string-append (sha1-hash file)
- "-"
- (basename file)))
- store)))
- (copy-file file interned-path)
- interned-path)))
-
(define (copy-input-files-gexp inputs)
"Return a G-expression that copies @code{File} type inputs (along with secondary
files) from @var{inputs} into @code{inputs-directory} and return a new
@@ -1010,9 +883,7 @@ directory of the workflow."
(call-with-temporary-directory
(lambda (inputs-directory)
- (let ((inputs
- #$(copy-input-files-gexp
- (resolve-inputs inputs (assoc-ref* cwl "inputs") store)))
+ (let ((inputs #$(copy-input-files-gexp inputs))
(runtime `(("cores" . ,(total-processor-count)))))
;; Set environment defined by workflow.
diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm
index bbea430..015f8d5 100644
--- a/ravanan/workflow.scm
+++ b/ravanan/workflow.scm
@@ -25,12 +25,14 @@
#:use-module (srfi srfi-71)
#:use-module (ice-9 filesystem)
#:use-module (ice-9 match)
+ #:use-module (web uri)
#:use-module (ravanan command-line-tool)
#:use-module (ravanan job-state)
#:use-module (ravanan propnet)
#:use-module (ravanan reader)
#:use-module (ravanan work command-line-tool)
#:use-module (ravanan work monads)
+ #:use-module (ravanan work types)
#:use-module (ravanan work ui)
#:use-module (ravanan work utils)
#:use-module (ravanan work vectors)
@@ -356,6 +358,135 @@ exit if job has failed."
(scheduler schedule poll capture-output))
+(define (location->path location)
+ "Convert file @var{location} URI to path."
+ (if (string-prefix? "/" location)
+ ;; Sometimes location is actually a path. In that case, return as is.
+ location
+ ;; If location is an URI, parse the URI and return the path part.
+ (uri-path (string->uri location))))
+
+(define (intern-file file store)
+ "Intern @var{file} into the ravanan @var{store} unless it is already a store
+path. Return the interned path."
+ (if (string-prefix? store file)
+ ;; If file is already a store path, return it as is.
+ file
+ ;; Else, intern it and return the interned path.
+ (let ((interned-path
+ (expand-file-name
+ (file-name-join* %store-files-directory
+ (string-append (sha1-hash file)
+ "-"
+ (basename file)))
+ store)))
+ (copy-file file interned-path)
+ interned-path)))
+
+(define (resolve-inputs inputs formal-inputs store)
+ "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any
+files found into the @var{store} and return a tree of the fully resolved inputs.
+
+The returned @code{File} type objects are updated with @code{basename},
+@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and
+store-interned paths in the @code{location} and @code{path} fields. The
+@code{basename} field contains the basename of the original path, and not the
+store-interned path."
+ (define (canonicalize-file-input input)
+ "Canonicalize @code{File} type @var{input} and its secondary files."
+ (let* ((path (or (and (assoc-ref input "location")
+ (location->path (assoc-ref input "location")))
+ (assoc-ref input "path")))
+ (interned-path (intern-file path store)))
+ (maybe-assoc-set input
+ (cons "location" (just interned-path))
+ (cons "path" (just interned-path))
+ (cons "basename" (just (basename path)))
+ (cons "nameroot" (just (file-name-stem path)))
+ (cons "nameext" (just (file-name-extension path)))
+ (cons "checksum" (just (checksum path)))
+ (cons "size" (just (stat:size (stat path))))
+ (cons "secondaryFiles"
+ (maybe-let* ((secondary-files (maybe-assoc-ref (just input)
+ "secondaryFiles")))
+ (just (vector-map canonicalize-file-input
+ secondary-files)))))))
+
+ (define (match-secondary-file-pattern input pattern)
+ "Return @code{#t} if secondary file @var{pattern} matches at least one secondary
+file in @var{input}."
+ ;; TODO: Implement caret characters in SecondaryFileSchema DSL.
+ (vector-any (lambda (secondary-file)
+ (string=? (assoc-ref* secondary-file "path")
+ (string-append (assoc-ref* input "path")
+ pattern)))
+ (or (assoc-ref input "secondaryFiles")
+ (user-error "Missing secondaryFiles in input ~a"
+ input))))
+
+ (define (check-secondary-files input secondary-files)
+ "Check if all required @var{secondary-files} are present in @var{input}. If not,
+error out."
+ (vector-for-each (lambda (secondary-file)
+ (let ((pattern (assoc-ref* secondary-file "pattern")))
+ (when (and (assoc-ref* secondary-file "required")
+ (not (match-secondary-file-pattern input pattern)))
+ (user-error "Secondary file ~a missing in input ~a"
+ pattern
+ input))))
+ secondary-files))
+
+ (define (resolve inputs types maybe-secondary-files)
+ (vector-map (lambda (input type-tree maybe-secondary-files)
+ ;; Check type.
+ (let* ((type (formal-parameter-type type-tree))
+ (matched-type (match-type input type)))
+ (unless matched-type
+ (error input "Type mismatch" input type))
+ ;; TODO: Implement record and enum types.
+ (cond
+ ;; Recurse over array types.
+ ((array-type? matched-type)
+ (resolve input
+ (make-vector (vector-length input)
+ (assoc-ref type-tree "items"))
+ (make-vector (vector-length input)
+ maybe-secondary-files)))
+ ;; Intern File type inputs and fully resolve them.
+ ((eq? matched-type 'File)
+ (let ((resolved-input (canonicalize-file-input input)))
+ ;; Ensure secondary files are provided with File type
+ ;; inputs.
+ (maybe-bind maybe-secondary-files
+ (cut check-secondary-files resolved-input <>))
+ resolved-input))
+ ;; Return other input types unchanged.
+ (else input))))
+ inputs
+ types
+ maybe-secondary-files))
+
+ (vector-map->list (lambda (input formal-input)
+ (cons (assoc-ref formal-input "id")
+ input))
+ (resolve (vector-map (lambda (formal-input)
+ (let ((id (assoc-ref* formal-input "id")))
+ (or (assoc-ref inputs id)
+ (assoc-ref formal-input "default")
+ 'null)))
+ formal-inputs)
+ (vector-map (lambda (formal-input)
+ (let ((id (assoc-ref* formal-input "id")))
+ (or (assoc-ref formal-input "type")
+ (user-error "Type of input ~a not specified"
+ id))))
+ formal-inputs)
+ (vector-map (lambda (formal-input)
+ (maybe-assoc-ref (just formal-input)
+ "secondaryFiles"))
+ formal-inputs))
+ formal-inputs))
+
(define* (run-workflow name manifest cwl inputs
scratch store batch-system
#:key guix-daemon-socket
@@ -399,38 +530,39 @@ authenticate to the slurm API with. @var{slurm-api-endpoint} and
(else
(error "output not found" output-id)))))
- ;; Ensure required inputs are specified.
- (vector-for-each (lambda (input)
- (let ((input-id (assoc-ref input "id")))
- (unless (or (optional-input? input)
- (assoc input-id inputs))
- (user-error "Required input `~a' not specified"
- input-id))))
- (assoc-ref cwl "inputs"))
- (let loop ((state (schedule-propnet
- (propnet (workflow->propagators name cwl)
- value=?
- merge-values
- (workflow-scheduler
- manifest scratch store batch-system
- #:guix-daemon-socket guix-daemon-socket
- #:slurm-api-endpoint slurm-api-endpoint
- #:slurm-jwt slurm-jwt))
- inputs)))
- ;; Poll.
- (let ((status state (poll-propnet state)))
- (if (eq? status 'pending)
- (begin
- ;; Pause before looping and polling again so we don't bother the job
- ;; server too often.
- (sleep (case batch-system
- ;; Single machine jobs are run synchronously. So, there is
- ;; no need to wait to poll them.
- ((single-machine) 0)
- ((slurm-api) %job-poll-interval)))
- (loop state))
- ;; Capture outputs.
- (vector-filter-map->list (cute capture-output
- (capture-propnet-output state)
- <>)
- (assoc-ref* cwl "outputs"))))))
+ (let ((inputs (resolve-inputs inputs (assoc-ref* cwl "inputs") store)))
+ ;; Ensure required inputs are specified.
+ (vector-for-each (lambda (input)
+ (let ((input-id (assoc-ref input "id")))
+ (unless (or (optional-input? input)
+ (assoc input-id inputs))
+ (user-error "Required input `~a' not specified"
+ input-id))))
+ (assoc-ref cwl "inputs"))
+ (let loop ((state (schedule-propnet
+ (propnet (workflow->propagators name cwl)
+ value=?
+ merge-values
+ (workflow-scheduler
+ manifest scratch store batch-system
+ #:guix-daemon-socket guix-daemon-socket
+ #:slurm-api-endpoint slurm-api-endpoint
+ #:slurm-jwt slurm-jwt))
+ inputs)))
+ ;; Poll.
+ (let ((status state (poll-propnet state)))
+ (if (eq? status 'pending)
+ (begin
+ ;; Pause before looping and polling again so we don't bother the job
+ ;; server too often.
+ (sleep (case batch-system
+ ;; Single machine jobs are run synchronously. So, there is
+ ;; no need to wait to poll them.
+ ((single-machine) 0)
+ ((slurm-api) %job-poll-interval)))
+ (loop state))
+ ;; Capture outputs.
+ (vector-filter-map->list (cute capture-output
+ (capture-propnet-output state)
+ <>)
+ (assoc-ref* cwl "outputs")))))))