diff options
author | Arun Isaac | 2023-11-14 22:18:38 +0000 |
---|---|---|
committer | Arun Isaac | 2023-11-14 22:52:03 +0000 |
commit | 060ddf677d01e408c7a49791150bfd24b76416e0 (patch) | |
tree | d32a9fed56fb06d944a4d9b8bbdd6414b54168d5 | |
parent | 7a43162925dffeb6cc1a70cfe6297f9ae62427a3 (diff) | |
download | ccwl-060ddf677d01e408c7a49791150bfd24b76416e0.tar.gz ccwl-060ddf677d01e408c7a49791150bfd24b76416e0.tar.lz ccwl-060ddf677d01e408c7a49791150bfd24b76416e0.zip |
ccwl: Implement scatter.
* ccwl/ccwl.scm (<step>): Wrap constructor make-step.
[scattered-inputs, scatter-method]: New fields.
(collect-scatter-step): New function.
(collect-steps): Implement scatter.
(key->output): If step scatters, convert output to an array type.
(workflow): Add syntax to initialize scattered-inputs and
scatter-method fields of <step> object.
* ccwl/cwl.scm (workflow->cwl-scm): Add ScatterFeatureRequirement if
workflow contains scattering steps. Serialize scattered-inputs and
scatter-method.
* doc/ccwl.skb (Cookbook)[Scatter/gather]: New section.
-rw-r--r-- | ccwl/ccwl.scm | 60 | ||||
-rw-r--r-- | ccwl/cwl.scm | 13 | ||||
-rw-r--r-- | doc/ccwl.skb | 7 |
3 files changed, 71 insertions, 9 deletions
diff --git a/ccwl/ccwl.scm b/ccwl/ccwl.scm index 4055edb..66a8ab7 100644 --- a/ccwl/ccwl.scm +++ b/ccwl/ccwl.scm @@ -80,6 +80,8 @@ step-run step-in step-out + step-scattered-inputs + step-scatter-method unspecified-default?)) (define-immutable-record-type <input> @@ -274,11 +276,16 @@ object." function)) (define-immutable-record-type <step> - (make-step id run in) + (-make-step id run in scattered-inputs scatter-method) step? (id step-id) (run step-run) - (in step-in)) + (in step-in) + (scattered-inputs step-scattered-inputs set-step-scattered-inputs) + (scatter-method step-scatter-method set-step-scatter-method)) + +(define* (make-step id run in #:key (scattered-inputs '()) (scatter-method 'dotproduct)) + (-make-step id run in scattered-inputs scatter-method)) (define step-out (compose function-outputs step-run)) @@ -543,12 +550,30 @@ return #f." (cwl-workflow? result)) result))) +(define (collect-scatter-step x input-keys scatter-method) + "Return a list of output keys and a list of steps from scatter workflow +clause @var{x} and @var{scatter-method}. @var{input-keys} is a list of +supplied input keys." + (syntax-case x () + ((_ (function-spec ...) scattered-args ...) + (let ((keys steps + (collect-steps #'(function-spec ... scattered-args ...) + input-keys))) + (values keys + (map (lambda (step) + (set-step-scattered-inputs + (set-step-scatter-method step scatter-method) + (map (match-lambda + ((key . _) (keyword->symbol key))) + (syntax->datum (pairify #'(scattered-args ...)))))) + steps)))))) + (define (collect-steps x input-keys) "Traverse ccwl workflow body X and return two values---a list of output keys and a list of steps. INPUT-KEYS is a list of supplied input keys. Keys are represented by <key> objects, and steps are represented by <step> objects." - (syntax-case x (pipe tee rename) + (syntax-case x (pipe tee rename scatter) ;; pipe ((pipe expressions ...) (foldn (lambda (expression input-keys steps) @@ -572,6 +597,9 @@ represented by <step> objects." key)) input-keys) (list))) + ;; TODO: Support cross product scatter methods. + ((scatter _ ...) + (collect-scatter-step x input-keys 'dotproduct)) ((function (step-id) args ...) ;; Run a whole bunch of tests so that we can produce useful error ;; messages. @@ -706,9 +734,21 @@ commands." (cwl-key-address key)) (key-name key)) ;; Convert stdout type outputs to File type outputs. - (if (eq? (output-type output-for-key) 'stdout) - 'File - (output-type output-for-key))))) + (let ((type + (cond + ((eq? (output-type output-for-key) + 'stdout) + 'File) + ((and (array-type? (output-type output-for-key)) + (eq? (array-type-member-type (output-type output-for-key)) + 'stdout)) + (make-array-type 'File)) + (else + (output-type output-for-key))))) + ;; If step scatters, convert to an array type. + (if (null? (step-scattered-inputs step-with-output)) + type + (make-array-type type)))))) ;; Construct syntax to recreate output object. #`(make-output #,(with-syntax ((id (datum->syntax #f (output-id output)))) @@ -737,7 +777,13 @@ commands." #''id) #,(step-run step) #,(with-syntax ((in (datum->syntax #f (step-in step)))) - #''in))) + #''in) + #:scattered-inputs #,(with-syntax ((scattered-inputs + (datum->syntax #f (step-scattered-inputs step)))) + #''scattered-inputs) + #:scatter-method #,(with-syntax ((scatter-method + (datum->syntax #f (step-scatter-method step)))) + #''scatter-method))) steps)) (list #,@(map input #'(inputs ...))) ;; Find the output object for each output diff --git a/ccwl/cwl.scm b/ccwl/cwl.scm index c61d8a8..cdb4503 100644 --- a/ccwl/cwl.scm +++ b/ccwl/cwl.scm @@ -64,7 +64,11 @@ association list." "Render WORKFLOW, a <workflow> object, into a CWL tree." `((cwlVersion . ,%cwl-version) (class . Workflow) - (requirements (SubworkflowFeatureRequirement)) + (requirements . ((SubworkflowFeatureRequirement) + ,@(if (every (compose null? step-scattered-inputs) + (workflow-steps workflow)) + '() + '((ScatterFeatureRequirement))))) ,@(workflow-other workflow) (inputs . ,(map input->cwl-scm (workflow-inputs workflow))) @@ -85,7 +89,12 @@ association list." (command->cwl-scm command)) ((? cwl-workflow? cwl-workflow) (cwl-workflow-file cwl-workflow)) - (tree tree))))) + (tree tree))) + ,@(match (step-scattered-inputs step) + (() '()) + (scattered-inputs + `((scatter . ,(list->vector scattered-inputs)) + (scatterMethod . ,(step-scatter-method step))))))) (workflow-steps workflow))))) (define* (output->cwl-scm output #:key workflow?) diff --git a/doc/ccwl.skb b/doc/ccwl.skb index 69e5a38..2e84d58 100644 --- a/doc/ccwl.skb +++ b/doc/ccwl.skb @@ -332,6 +332,13 @@ to ,(emph "stage") the input file into the output directory. We may express this in ccwl using the ,(code "#:stage?") parameter to the inputs to be staged. Here is a rather concocted example.] (scheme-source "doc/staging-input-files.scm"))) + (section :title [Scatter/gather] + (p [ccwl supports CWL's dotproduct scatter/gather feature using +the following syntax. Here, the ,(code [other-messages]) input to the +workflow is an array of strings that is scattered over the ,(code +[print]) step. Each run of the ,(code [print]) step gets an element of +,(code [other-messages]) as its ,(code [other-message]) argument.] + (scheme-source "doc/scatter-gather.scm"))) (section :title [Reuse external CWL workflows] (p [Even though you may be a ccwl convert (hurrah!), others may not be. And, you might have to work with CWL workflows written by |