From 0b7241bffd43ef3ce26734a020ca5dfe72dcc95a Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Mon, 30 Sep 2024 01:49:00 +0100 Subject: command-line-tool: Move inputs resolution to (ravanan workflow). We resolve inputs in run-workflow before doing anything else. We thus avoid bugs due to partially or insufficiently resolved inputs. * ravanan/command-line-tool.scm: Do not import (web uri). (%store-files-directory, %store-data-directory, %store-logs-directory): Export. (location->path, resolve-inputs, intern-file): Move to (ravanan workflow). (build-command-line-tool-script): Do not call resolve-inputs. * ravanan/workflow.scm: Import (web uri) and (ravanan work types). (run-workflow): Call resolve-inputs. --- ravanan/command-line-tool.scm | 141 ++--------------------------- ravanan/workflow.scm | 202 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 173 insertions(+), 170 deletions(-) diff --git a/ravanan/command-line-tool.scm b/ravanan/command-line-tool.scm index 5cca5d0..8d1f8ef 100644 --- a/ravanan/command-line-tool.scm +++ b/ravanan/command-line-tool.scm @@ -27,7 +27,6 @@ #:use-module (ice-9 format) #:use-module (ice-9 match) #:use-module (ice-9 popen) - #:use-module (web uri) #:use-module (gcrypt base16) #:use-module (gcrypt hash) #:use-module ((gnu packages gnupg) #:select (guile-gcrypt)) @@ -59,7 +58,11 @@ %command-line-tool-supported-requirements script->store-stdout-file script->store-stderr-file - capture-command-line-tool-output)) + capture-command-line-tool-output + + %store-files-directory + %store-data-directory + %store-logs-directory)) (define %store-files-directory "files") @@ -297,119 +300,6 @@ G-expressions are inserted." (< (command-line-binding-position binding1) (command-line-binding-position binding2)))))) -(define (location->path location) - "Convert file @var{location} URI to path." - (if (string-prefix? "/" location) - ;; Sometimes location is actually a path. In that case, return as is. - location - ;; If location is an URI, parse the URI and return the path part. - (uri-path (string->uri location)))) - -(define (resolve-inputs inputs formal-inputs store) - "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any -files found into the @var{store} and return a tree of the fully -resolved inputs. - -The returned @code{File} type objects are updated with @code{basename}, -@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and -store-interned paths in the @code{location} and @code{path} fields. The -@code{basename} field contains the basename of the original path, and not the -store-interned path." - (define (canonicalize-file-input input) - "Canonicalize @code{File} type @var{input} and its secondary files." - (let* ((path (or (and (assoc-ref input "location") - (location->path (assoc-ref input "location"))) - (assoc-ref input "path"))) - (interned-path (intern-file path store))) - (maybe-assoc-set input - (cons "location" (just interned-path)) - (cons "path" (just interned-path)) - (cons "basename" (just (basename path))) - (cons "nameroot" (just (file-name-stem path))) - (cons "nameext" (just (file-name-extension path))) - (cons "checksum" (just (checksum path))) - (cons "size" (just (stat:size (stat path)))) - (cons "secondaryFiles" - (maybe-let* ((secondary-files (maybe-assoc-ref (just input) - "secondaryFiles"))) - (just (vector-map canonicalize-file-input - secondary-files))))))) - - (define (match-secondary-file-pattern input pattern) - "Return @code{#t} if secondary file @var{pattern} matches at least one secondary -file in @var{input}." - ;; TODO: Implement caret characters in SecondaryFileSchema DSL. - (vector-any (lambda (secondary-file) - (string=? (assoc-ref* secondary-file "path") - (string-append (assoc-ref* input "path") - pattern))) - (or (assoc-ref input "secondaryFiles") - (user-error "Missing secondaryFiles in input ~a" - input)))) - - (define (check-secondary-files input secondary-files) - "Check if all required @var{secondary-files} are present in @var{input}. If not, -error out." - (vector-for-each (lambda (secondary-file) - (let ((pattern (assoc-ref* secondary-file "pattern"))) - (when (and (assoc-ref* secondary-file "required") - (not (match-secondary-file-pattern input pattern))) - (user-error "Secondary file ~a missing in input ~a" - pattern - input)))) - secondary-files)) - - (define (resolve inputs types maybe-secondary-files) - (vector-map (lambda (input type-tree maybe-secondary-files) - ;; Check type. - (let* ((type (formal-parameter-type type-tree)) - (matched-type (match-type input type))) - (unless matched-type - (error input "Type mismatch" input type)) - ;; TODO: Implement record and enum types. - (cond - ;; Recurse over array types. - ((array-type? matched-type) - (resolve input - (make-vector (vector-length input) - (assoc-ref type-tree "items")) - (make-vector (vector-length input) - maybe-secondary-files))) - ;; Intern File type inputs and fully resolve them. - ((eq? matched-type 'File) - (let ((resolved-input (canonicalize-file-input input))) - ;; Ensure secondary files are provided with File type - ;; inputs. - (maybe-bind maybe-secondary-files - (cut check-secondary-files resolved-input <>)) - resolved-input)) - ;; Return other input types unchanged. - (else input)))) - inputs - types - maybe-secondary-files)) - - (vector-map->list (lambda (input formal-input) - (cons (assoc-ref formal-input "id") - input)) - (resolve (vector-map (lambda (formal-input) - (let ((id (assoc-ref* formal-input "id"))) - (or (assoc-ref inputs id) - (assoc-ref formal-input "default") - 'null))) - formal-inputs) - (vector-map (lambda (formal-input) - (let ((id (assoc-ref* formal-input "id"))) - (or (assoc-ref formal-input "type") - (user-error "Type of input ~a not specified" - id)))) - formal-inputs) - (vector-map (lambda (formal-input) - (maybe-assoc-ref (just formal-input) - "secondaryFiles")) - formal-inputs)) - formal-inputs)) - (define (command-line-binding->args binding) "Return a list of arguments for @var{binding}. The returned list may contain strings or G-expressions. The G-expressions may reference an @@ -603,23 +493,6 @@ path." (call-with-input-file store-data-file json->scm))) -(define (intern-file file store) - "Intern @var{file} into the ravanan @var{store} unless it is already a store -path. Return the interned path." - (if (string-prefix? store file) - ;; If file is already a store path, return it as is. - file - ;; Else, intern it and return the interned path. - (let ((interned-path - (expand-file-name - (file-name-join* %store-files-directory - (string-append (sha1-hash file) - "-" - (basename file))) - store))) - (copy-file file interned-path) - interned-path))) - (define (copy-input-files-gexp inputs) "Return a G-expression that copies @code{File} type inputs (along with secondary files) from @var{inputs} into @code{inputs-directory} and return a new @@ -1010,9 +883,7 @@ directory of the workflow." (call-with-temporary-directory (lambda (inputs-directory) - (let ((inputs - #$(copy-input-files-gexp - (resolve-inputs inputs (assoc-ref* cwl "inputs") store))) + (let ((inputs #$(copy-input-files-gexp inputs)) (runtime `(("cores" . ,(total-processor-count))))) ;; Set environment defined by workflow. diff --git a/ravanan/workflow.scm b/ravanan/workflow.scm index bbea430..015f8d5 100644 --- a/ravanan/workflow.scm +++ b/ravanan/workflow.scm @@ -25,12 +25,14 @@ #:use-module (srfi srfi-71) #:use-module (ice-9 filesystem) #:use-module (ice-9 match) + #:use-module (web uri) #:use-module (ravanan command-line-tool) #:use-module (ravanan job-state) #:use-module (ravanan propnet) #:use-module (ravanan reader) #:use-module (ravanan work command-line-tool) #:use-module (ravanan work monads) + #:use-module (ravanan work types) #:use-module (ravanan work ui) #:use-module (ravanan work utils) #:use-module (ravanan work vectors) @@ -356,6 +358,135 @@ exit if job has failed." (scheduler schedule poll capture-output)) +(define (location->path location) + "Convert file @var{location} URI to path." + (if (string-prefix? "/" location) + ;; Sometimes location is actually a path. In that case, return as is. + location + ;; If location is an URI, parse the URI and return the path part. + (uri-path (string->uri location)))) + +(define (intern-file file store) + "Intern @var{file} into the ravanan @var{store} unless it is already a store +path. Return the interned path." + (if (string-prefix? store file) + ;; If file is already a store path, return it as is. + file + ;; Else, intern it and return the interned path. + (let ((interned-path + (expand-file-name + (file-name-join* %store-files-directory + (string-append (sha1-hash file) + "-" + (basename file))) + store))) + (copy-file file interned-path) + interned-path))) + +(define (resolve-inputs inputs formal-inputs store) + "Traverse @var{inputs} and @var{formal-inputs} recursively, intern any +files found into the @var{store} and return a tree of the fully resolved inputs. + +The returned @code{File} type objects are updated with @code{basename}, +@code{nameroot}, @code{nameext}, @code{checksum} and @code{size} fields, and +store-interned paths in the @code{location} and @code{path} fields. The +@code{basename} field contains the basename of the original path, and not the +store-interned path." + (define (canonicalize-file-input input) + "Canonicalize @code{File} type @var{input} and its secondary files." + (let* ((path (or (and (assoc-ref input "location") + (location->path (assoc-ref input "location"))) + (assoc-ref input "path"))) + (interned-path (intern-file path store))) + (maybe-assoc-set input + (cons "location" (just interned-path)) + (cons "path" (just interned-path)) + (cons "basename" (just (basename path))) + (cons "nameroot" (just (file-name-stem path))) + (cons "nameext" (just (file-name-extension path))) + (cons "checksum" (just (checksum path))) + (cons "size" (just (stat:size (stat path)))) + (cons "secondaryFiles" + (maybe-let* ((secondary-files (maybe-assoc-ref (just input) + "secondaryFiles"))) + (just (vector-map canonicalize-file-input + secondary-files))))))) + + (define (match-secondary-file-pattern input pattern) + "Return @code{#t} if secondary file @var{pattern} matches at least one secondary +file in @var{input}." + ;; TODO: Implement caret characters in SecondaryFileSchema DSL. + (vector-any (lambda (secondary-file) + (string=? (assoc-ref* secondary-file "path") + (string-append (assoc-ref* input "path") + pattern))) + (or (assoc-ref input "secondaryFiles") + (user-error "Missing secondaryFiles in input ~a" + input)))) + + (define (check-secondary-files input secondary-files) + "Check if all required @var{secondary-files} are present in @var{input}. If not, +error out." + (vector-for-each (lambda (secondary-file) + (let ((pattern (assoc-ref* secondary-file "pattern"))) + (when (and (assoc-ref* secondary-file "required") + (not (match-secondary-file-pattern input pattern))) + (user-error "Secondary file ~a missing in input ~a" + pattern + input)))) + secondary-files)) + + (define (resolve inputs types maybe-secondary-files) + (vector-map (lambda (input type-tree maybe-secondary-files) + ;; Check type. + (let* ((type (formal-parameter-type type-tree)) + (matched-type (match-type input type))) + (unless matched-type + (error input "Type mismatch" input type)) + ;; TODO: Implement record and enum types. + (cond + ;; Recurse over array types. + ((array-type? matched-type) + (resolve input + (make-vector (vector-length input) + (assoc-ref type-tree "items")) + (make-vector (vector-length input) + maybe-secondary-files))) + ;; Intern File type inputs and fully resolve them. + ((eq? matched-type 'File) + (let ((resolved-input (canonicalize-file-input input))) + ;; Ensure secondary files are provided with File type + ;; inputs. + (maybe-bind maybe-secondary-files + (cut check-secondary-files resolved-input <>)) + resolved-input)) + ;; Return other input types unchanged. + (else input)))) + inputs + types + maybe-secondary-files)) + + (vector-map->list (lambda (input formal-input) + (cons (assoc-ref formal-input "id") + input)) + (resolve (vector-map (lambda (formal-input) + (let ((id (assoc-ref* formal-input "id"))) + (or (assoc-ref inputs id) + (assoc-ref formal-input "default") + 'null))) + formal-inputs) + (vector-map (lambda (formal-input) + (let ((id (assoc-ref* formal-input "id"))) + (or (assoc-ref formal-input "type") + (user-error "Type of input ~a not specified" + id)))) + formal-inputs) + (vector-map (lambda (formal-input) + (maybe-assoc-ref (just formal-input) + "secondaryFiles")) + formal-inputs)) + formal-inputs)) + (define* (run-workflow name manifest cwl inputs scratch store batch-system #:key guix-daemon-socket @@ -399,38 +530,39 @@ authenticate to the slurm API with. @var{slurm-api-endpoint} and (else (error "output not found" output-id))))) - ;; Ensure required inputs are specified. - (vector-for-each (lambda (input) - (let ((input-id (assoc-ref input "id"))) - (unless (or (optional-input? input) - (assoc input-id inputs)) - (user-error "Required input `~a' not specified" - input-id)))) - (assoc-ref cwl "inputs")) - (let loop ((state (schedule-propnet - (propnet (workflow->propagators name cwl) - value=? - merge-values - (workflow-scheduler - manifest scratch store batch-system - #:guix-daemon-socket guix-daemon-socket - #:slurm-api-endpoint slurm-api-endpoint - #:slurm-jwt slurm-jwt)) - inputs))) - ;; Poll. - (let ((status state (poll-propnet state))) - (if (eq? status 'pending) - (begin - ;; Pause before looping and polling again so we don't bother the job - ;; server too often. - (sleep (case batch-system - ;; Single machine jobs are run synchronously. So, there is - ;; no need to wait to poll them. - ((single-machine) 0) - ((slurm-api) %job-poll-interval))) - (loop state)) - ;; Capture outputs. - (vector-filter-map->list (cute capture-output - (capture-propnet-output state) - <>) - (assoc-ref* cwl "outputs")))))) + (let ((inputs (resolve-inputs inputs (assoc-ref* cwl "inputs") store))) + ;; Ensure required inputs are specified. + (vector-for-each (lambda (input) + (let ((input-id (assoc-ref input "id"))) + (unless (or (optional-input? input) + (assoc input-id inputs)) + (user-error "Required input `~a' not specified" + input-id)))) + (assoc-ref cwl "inputs")) + (let loop ((state (schedule-propnet + (propnet (workflow->propagators name cwl) + value=? + merge-values + (workflow-scheduler + manifest scratch store batch-system + #:guix-daemon-socket guix-daemon-socket + #:slurm-api-endpoint slurm-api-endpoint + #:slurm-jwt slurm-jwt)) + inputs))) + ;; Poll. + (let ((status state (poll-propnet state))) + (if (eq? status 'pending) + (begin + ;; Pause before looping and polling again so we don't bother the job + ;; server too often. + (sleep (case batch-system + ;; Single machine jobs are run synchronously. So, there is + ;; no need to wait to poll them. + ((single-machine) 0) + ((slurm-api) %job-poll-interval))) + (loop state)) + ;; Capture outputs. + (vector-filter-map->list (cute capture-output + (capture-propnet-output state) + <>) + (assoc-ref* cwl "outputs"))))))) -- cgit v1.2.3