aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2023-11-14 22:18:38 +0000
committerArun Isaac2023-11-14 22:52:03 +0000
commit060ddf677d01e408c7a49791150bfd24b76416e0 (patch)
treed32a9fed56fb06d944a4d9b8bbdd6414b54168d5
parent7a43162925dffeb6cc1a70cfe6297f9ae62427a3 (diff)
downloadccwl-060ddf677d01e408c7a49791150bfd24b76416e0.tar.gz
ccwl-060ddf677d01e408c7a49791150bfd24b76416e0.tar.lz
ccwl-060ddf677d01e408c7a49791150bfd24b76416e0.zip
ccwl: Implement scatter.
* ccwl/ccwl.scm (<step>): Wrap constructor make-step. [scattered-inputs, scatter-method]: New fields. (collect-scatter-step): New function. (collect-steps): Implement scatter. (key->output): If step scatters, convert output to an array type. (workflow): Add syntax to initialize scattered-inputs and scatter-method fields of <step> object. * ccwl/cwl.scm (workflow->cwl-scm): Add ScatterFeatureRequirement if workflow contains scattering steps. Serialize scattered-inputs and scatter-method. * doc/ccwl.skb (Cookbook)[Scatter/gather]: New section.
-rw-r--r--ccwl/ccwl.scm60
-rw-r--r--ccwl/cwl.scm13
-rw-r--r--doc/ccwl.skb7
3 files changed, 71 insertions, 9 deletions
diff --git a/ccwl/ccwl.scm b/ccwl/ccwl.scm
index 4055edb..66a8ab7 100644
--- a/ccwl/ccwl.scm
+++ b/ccwl/ccwl.scm
@@ -80,6 +80,8 @@
step-run
step-in
step-out
+ step-scattered-inputs
+ step-scatter-method
unspecified-default?))
(define-immutable-record-type <input>
@@ -274,11 +276,16 @@ object."
function))
(define-immutable-record-type <step>
- (make-step id run in)
+ (-make-step id run in scattered-inputs scatter-method)
step?
(id step-id)
(run step-run)
- (in step-in))
+ (in step-in)
+ (scattered-inputs step-scattered-inputs set-step-scattered-inputs)
+ (scatter-method step-scatter-method set-step-scatter-method))
+
+(define* (make-step id run in #:key (scattered-inputs '()) (scatter-method 'dotproduct))
+ (-make-step id run in scattered-inputs scatter-method))
(define step-out (compose function-outputs step-run))
@@ -543,12 +550,30 @@ return #f."
(cwl-workflow? result))
result)))
+(define (collect-scatter-step x input-keys scatter-method)
+ "Return a list of output keys and a list of steps from scatter workflow
+clause @var{x} and @var{scatter-method}. @var{input-keys} is a list of
+supplied input keys."
+ (syntax-case x ()
+ ((_ (function-spec ...) scattered-args ...)
+ (let ((keys steps
+ (collect-steps #'(function-spec ... scattered-args ...)
+ input-keys)))
+ (values keys
+ (map (lambda (step)
+ (set-step-scattered-inputs
+ (set-step-scatter-method step scatter-method)
+ (map (match-lambda
+ ((key . _) (keyword->symbol key)))
+ (syntax->datum (pairify #'(scattered-args ...))))))
+ steps))))))
+
(define (collect-steps x input-keys)
"Traverse ccwl workflow body X and return two values---a list of
output keys and a list of steps. INPUT-KEYS is a list of supplied
input keys. Keys are represented by <key> objects, and steps are
represented by <step> objects."
- (syntax-case x (pipe tee rename)
+ (syntax-case x (pipe tee rename scatter)
;; pipe
((pipe expressions ...)
(foldn (lambda (expression input-keys steps)
@@ -572,6 +597,9 @@ represented by <step> objects."
key))
input-keys)
(list)))
+ ;; TODO: Support cross product scatter methods.
+ ((scatter _ ...)
+ (collect-scatter-step x input-keys 'dotproduct))
((function (step-id) args ...)
;; Run a whole bunch of tests so that we can produce useful error
;; messages.
@@ -706,9 +734,21 @@ commands."
(cwl-key-address key))
(key-name key))
;; Convert stdout type outputs to File type outputs.
- (if (eq? (output-type output-for-key) 'stdout)
- 'File
- (output-type output-for-key)))))
+ (let ((type
+ (cond
+ ((eq? (output-type output-for-key)
+ 'stdout)
+ 'File)
+ ((and (array-type? (output-type output-for-key))
+ (eq? (array-type-member-type (output-type output-for-key))
+ 'stdout))
+ (make-array-type 'File))
+ (else
+ (output-type output-for-key)))))
+ ;; If step scatters, convert to an array type.
+ (if (null? (step-scattered-inputs step-with-output))
+ type
+ (make-array-type type))))))
;; Construct syntax to recreate output object.
#`(make-output
#,(with-syntax ((id (datum->syntax #f (output-id output))))
@@ -737,7 +777,13 @@ commands."
#''id)
#,(step-run step)
#,(with-syntax ((in (datum->syntax #f (step-in step))))
- #''in)))
+ #''in)
+ #:scattered-inputs #,(with-syntax ((scattered-inputs
+ (datum->syntax #f (step-scattered-inputs step))))
+ #''scattered-inputs)
+ #:scatter-method #,(with-syntax ((scatter-method
+ (datum->syntax #f (step-scatter-method step))))
+ #''scatter-method)))
steps))
(list #,@(map input #'(inputs ...)))
;; Find the output object for each output
diff --git a/ccwl/cwl.scm b/ccwl/cwl.scm
index c61d8a8..cdb4503 100644
--- a/ccwl/cwl.scm
+++ b/ccwl/cwl.scm
@@ -64,7 +64,11 @@ association list."
"Render WORKFLOW, a <workflow> object, into a CWL tree."
`((cwlVersion . ,%cwl-version)
(class . Workflow)
- (requirements (SubworkflowFeatureRequirement))
+ (requirements . ((SubworkflowFeatureRequirement)
+ ,@(if (every (compose null? step-scattered-inputs)
+ (workflow-steps workflow))
+ '()
+ '((ScatterFeatureRequirement)))))
,@(workflow-other workflow)
(inputs . ,(map input->cwl-scm
(workflow-inputs workflow)))
@@ -85,7 +89,12 @@ association list."
(command->cwl-scm command))
((? cwl-workflow? cwl-workflow)
(cwl-workflow-file cwl-workflow))
- (tree tree)))))
+ (tree tree)))
+ ,@(match (step-scattered-inputs step)
+ (() '())
+ (scattered-inputs
+ `((scatter . ,(list->vector scattered-inputs))
+ (scatterMethod . ,(step-scatter-method step)))))))
(workflow-steps workflow)))))
(define* (output->cwl-scm output #:key workflow?)
diff --git a/doc/ccwl.skb b/doc/ccwl.skb
index 69e5a38..2e84d58 100644
--- a/doc/ccwl.skb
+++ b/doc/ccwl.skb
@@ -332,6 +332,13 @@ to ,(emph "stage") the input file into the output directory. We may
express this in ccwl using the ,(code "#:stage?") parameter to the
inputs to be staged. Here is a rather concocted example.]
(scheme-source "doc/staging-input-files.scm")))
+ (section :title [Scatter/gather]
+ (p [ccwl supports CWL's dotproduct scatter/gather feature using
+the following syntax. Here, the ,(code [other-messages]) input to the
+workflow is an array of strings that is scattered over the ,(code
+[print]) step. Each run of the ,(code [print]) step gets an element of
+,(code [other-messages]) as its ,(code [other-message]) argument.]
+ (scheme-source "doc/scatter-gather.scm")))
(section :title [Reuse external CWL workflows]
(p [Even though you may be a ccwl convert (hurrah!), others may
not be. And, you might have to work with CWL workflows written by