From bf485dd43386305177a2ba3ea1a5ee7e164984c6 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Tue, 15 Jun 2021 16:45:49 +0530 Subject: doc: Move documentation from texinfo to skribilo. * doc/ccwl.texi: Delete file. * ccwl/skribilo.scm, doc/ccwl.skb, doc/capture-output-file-with-parameter-reference.out, doc/capture-output-file-with-parameter-reference.scm, doc/capture-output-file.out, doc/capture-output-file.scm, doc/capture-stdout.out, doc/capture-stdout.scm, doc/checksum.out, doc/checksum.scm, doc/decompress-compile-run.out, doc/decompress-compile-run.scm, doc/hello-world.out, doc/hello-world.scm, doc/hello.tar.out: New files. * configure.ac: Check for skribilo. * Makefile.am: Unregister doc/ccwl.texi. Add documentation build rules. * guix.scm (ccwl)[native-inputs]: Replace texinfo with skribilo. * .gitignore: Add doc/skribilo. Remove doc/version.texi, doc/*.html, doc/*.info. --- .gitignore | 4 +- Makefile.am | 45 +- ccwl/skribilo.scm | 68 +++ configure.ac | 1 + ...apture-output-file-with-parameter-reference.out | 24 + ...apture-output-file-with-parameter-reference.scm | 9 + doc/capture-output-file.out | 23 + doc/capture-output-file.scm | 8 + doc/capture-stdout.out | 21 + doc/capture-stdout.scm | 6 + doc/ccwl.skb | 216 +++++++++ doc/ccwl.texi | 529 --------------------- doc/checksum.out | 49 ++ doc/checksum.scm | 16 + doc/decompress-compile-run.out | 36 ++ doc/decompress-compile-run.scm | 18 + doc/hello-world.out | 13 + doc/hello-world.scm | 5 + doc/hello.tar.out | 2 + guix.scm | 2 +- 20 files changed, 553 insertions(+), 542 deletions(-) create mode 100644 ccwl/skribilo.scm create mode 100644 doc/capture-output-file-with-parameter-reference.out create mode 100644 doc/capture-output-file-with-parameter-reference.scm create mode 100644 doc/capture-output-file.out create mode 100644 doc/capture-output-file.scm create mode 100644 doc/capture-stdout.out create mode 100644 doc/capture-stdout.scm create mode 100644 doc/ccwl.skb delete mode 100644 doc/ccwl.texi create mode 100644 doc/checksum.out create mode 100644 doc/checksum.scm create mode 100644 doc/decompress-compile-run.out create mode 100644 doc/decompress-compile-run.scm create mode 100644 doc/hello-world.out create mode 100644 doc/hello-world.scm create mode 100644 doc/hello.tar.out diff --git a/.gitignore b/.gitignore index 310c038..83e9439 100644 --- a/.gitignore +++ b/.gitignore @@ -8,11 +8,9 @@ build-aux/missing config.log config.status configure -doc/*.html -doc/*.info doc/.dirstamp +doc/skribilo doc/stamp-vti -doc/version.texi pre-inst-env scripts/ccwl test-suite.log diff --git a/Makefile.am b/Makefile.am index 8eab1cb..33b0426 100644 --- a/Makefile.am +++ b/Makefile.am @@ -39,7 +39,6 @@ moddir = $(prefix)/share/guile/site/$(GUILE_EFFECTIVE_VERSION) godir = $(libdir)/guile/$(GUILE_EFFECTIVE_VERSION)/site-ccache bin_SCRIPTS = scripts/ccwl -info_TEXINFOS = doc/ccwl.texi SOURCES = \ ccwl/ccwl.scm \ @@ -64,9 +63,38 @@ EXTRA_DIST += \ COPYING \ README.org -# Build website +# Build documentation + +SKRIBILO_BUILD_DIR = $(srcdir)/doc/skribilo + +# The info and html targets depend on the info-local and html-local +# targets respectively. So, we use them to extend the info and html +# targets. +info-local: $(SKRIBILO_BUILD_DIR)/ccwl.info +html-local: $(SKRIBILO_BUILD_DIR)/ccwl.html + +$(SKRIBILO_BUILD_DIR)/ccwl.info: doc/ccwl.skb + $(MKDIR_P) $(SKRIBILO_BUILD_DIR) + $(AM_V_GEN)$(top_builddir)/pre-inst-env $(SKRIBILO) -t info $^ -o $@ + +$(SKRIBILO_BUILD_DIR)/ccwl.html: doc/ccwl.skb + $(MKDIR_P) $(SKRIBILO_BUILD_DIR) + $(AM_V_GEN)$(top_builddir)/pre-inst-env $(SKRIBILO) -t html $^ -o $@ + +install-data-local: install-info-local -AM_MAKEINFOHTMLFLAGS = --css-ref=/style.css +install-info-local: $(SKRIBILO_BUILD_DIR)/ccwl.info + $(MKDIR_P) $(DESTDIR)$(infodir) + $(INSTALL_DATA) $(SKRIBILO_BUILD_DIR)/ccwl.info $(DESTDIR)$(infodir) + +install-html-local: $(SKRIBILO_BUILD_DIR)/ccwl.html + $(MKDIR_P) $(DESTDIR)$(htmldir) + $(INSTALL_DATA) $(SKRIBILO_BUILD_DIR)/ccwl.html $(DESTDIR)$(htmldir) + +uninstall-local: + rm -f $(DESTDIR)$(infodir)/ccwl.info $(DESTDIR)$(htmldir)/ccwl.html + +# Build website website: website/index.html website/style.css website/manual/dev/en @@ -78,12 +106,11 @@ website/style.css: style.css mkdir -p $(dir $@) cp -v $< $@ -website/manual/dev/en: doc/ccwl.html - rm -rf $@ - mkdir -p $(dir $@) - cp -vr doc/ccwl.html $@ +website/manual/dev/en: $(SKRIBILO_BUILD_DIR)/ccwl.html + $(MKDIR_P) $@ + cp -v $< $@/index.html # The clean target depends on clean-local. We use clean-local to clean -# up the website built by the website target. +# up the website and the skribilo build directory. clean-local: - rm -rf website + rm -rf website $(SKRIBILO_BUILD_DIR) diff --git a/ccwl/skribilo.scm b/ccwl/skribilo.scm new file mode 100644 index 0000000..8e4b198 --- /dev/null +++ b/ccwl/skribilo.scm @@ -0,0 +1,68 @@ +;;; ccwl --- Concise Common Workflow Language +;;; Copyright © 2021 Arun Isaac +;;; +;;; This file is part of ccwl. +;;; +;;; ccwl is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; ccwl is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with ccwl. If not, see . + +;;; Commentary: + +;; This module customizes Skribilo to our needs. + +;;; Code: + +(define-module (ccwl skribilo) + #:use-module (skribilo ast) + #:use-module (skribilo engine) + #:use-module (skribilo lib) + #:use-module (skribilo package base) + #:use-module (skribilo source lisp) + #:use-module (skribilo utils keywords) + #:use-module (skribilo writer) + #:export (command file scheme-source)) + +;; Aliases +(define file samp) +(define command code) + +;; Shorthand for scheme source +(define (scheme-source file) + (prog (source #:language scheme #:file file) + #:line #f)) + +;; Abbreviations +(define-markup (abbr #:rest opts + #:key (ident #f) (class "abbr") (short #f) (long #f)) + (new container + (markup 'abbr) + (ident (or ident (symbol->string (gensym "abbr")))) + (class class) + (loc &invocation-location) + (required-options '(#:short #:long)) + (options `((#:short ,short) + (#:long ,long) + ,@(the-options opts #:ident #:class #:short #:long))) + (body (the-body opts)))) + +;; HTML engine customizations +(let ((html-engine (find-engine 'html))) + (engine-custom-set! html-engine 'css "/style.css") + (engine-custom-set! html-engine 'charset "UTF-8") + (markup-writer 'abbr html-engine + #:options '(#:short #:long) + #:action (lambda (markup engine) + (format #t "~a (~a)" + (markup-option markup #:long) + (markup-option markup #:short) + (markup-option markup #:long))))) diff --git a/configure.ac b/configure.ac index a7338b9..b59319a 100644 --- a/configure.ac +++ b/configure.ac @@ -23,6 +23,7 @@ AM_SILENT_RULES([yes]) GUILE_PKG([3.0 2.2]) GUILE_PROGS +AC_PATH_PROG([SKRIBILO], [skribilo]) AC_CONFIG_FILES([Makefile]) AC_CONFIG_FILES([pre-inst-env], [chmod +x pre-inst-env]) diff --git a/doc/capture-output-file-with-parameter-reference.out b/doc/capture-output-file-with-parameter-reference.out new file mode 100644 index 0000000..57369d2 --- /dev/null +++ b/doc/capture-output-file-with-parameter-reference.out @@ -0,0 +1,24 @@ +$ ccwl compile capture-output-file.scm > capture-output-file.cwl +$ cwltool capture-output-file.cwl --archive hello.tar --extractfile hello.txt +[workflow ] start +[workflow ] starting step extract-specific-file +[step extract-specific-file] start +[job extract-specific-file] /tmp/751nydd1$ tar \ + --extract \ + --file \ + /tmp/1zzw2n6m/stgc851e003-b5bd-437e-844b-311f6f66a7f1/hello.tar \ + hello.txt +[job extract-specific-file] completed success +[step extract-specific-file] completed success +[workflow ] completed success +{ + "extracted-file": { + "location": "file:///home/manimekalai/hello.txt", + "basename": "hello.txt", + "class": "File", + "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", + "size": 13, + "path": "/home/manimekalai/hello.txt" + } +} +Final process status is success diff --git a/doc/capture-output-file-with-parameter-reference.scm b/doc/capture-output-file-with-parameter-reference.scm new file mode 100644 index 0000000..3104fae --- /dev/null +++ b/doc/capture-output-file-with-parameter-reference.scm @@ -0,0 +1,9 @@ +(define extract-specific-file + (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) + (input 'extractfile #:type 'string) + #:outputs (output 'extracted-file + #:type 'File + #:binding '((glob . "$(inputs.extractfile)"))))) + +(workflow ((archive #:type File) (extractfile #:type string)) + (extract-specific-file #:archive archive #:extractfile extractfile)) diff --git a/doc/capture-output-file.out b/doc/capture-output-file.out new file mode 100644 index 0000000..32ee625 --- /dev/null +++ b/doc/capture-output-file.out @@ -0,0 +1,23 @@ +$ ccwl compile capture-output-file.scm > capture-output-file.cwl +$ cwltool capture-output-file.cwl --archive hello.tar +[workflow ] start +[workflow ] starting step extract +[step extract] start +[job extract] /tmp/nrolttex$ tar \ + --extract \ + --file \ + /tmp/z7pp7qwh/stg3ac272aa-3459-4f20-a033-86f53ba72caf/hello.tar +[job extract] completed success +[step extract] completed success +[workflow ] completed success +{ + "extracted-file": { + "location": "file:///home/manimekalai/hello.txt", + "basename": "hello.txt", + "class": "File", + "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", + "size": 13, + "path": "/home/manimekalai/hello.txt" + } +} +Final process status is success diff --git a/doc/capture-output-file.scm b/doc/capture-output-file.scm new file mode 100644 index 0000000..b13549e --- /dev/null +++ b/doc/capture-output-file.scm @@ -0,0 +1,8 @@ +(define extract + (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) + #:outputs (output 'extracted-file + #:type 'File + #:binding '((glob . "hello.txt"))))) + +(workflow ((archive #:type File)) + (extract #:archive archive)) diff --git a/doc/capture-stdout.out b/doc/capture-stdout.out new file mode 100644 index 0000000..19568af --- /dev/null +++ b/doc/capture-stdout.out @@ -0,0 +1,21 @@ +$ ccwl compile capture-stdout.scm > capture-stdout.cwl +$ cwltool capture-stdout.cwl --message "Hello World!" +[workflow ] start +[workflow ] starting step print +[step print] start +[job print] /tmp/7zksx3xm$ echo \ + 'Hello World!' > /tmp/7zksx3xm/51fe79d15e7790a9ded795304220d7a44aa84b48 +[job print] completed success +[step print] completed success +[workflow ] completed success +{ + "printed-message": { + "location": "file:///home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48", + "basename": "51fe79d15e7790a9ded795304220d7a44aa84b48", + "class": "File", + "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", + "size": 13, + "path": "/home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48" + } +} +Final process status is success diff --git a/doc/capture-stdout.scm b/doc/capture-stdout.scm new file mode 100644 index 0000000..1f26a95 --- /dev/null +++ b/doc/capture-stdout.scm @@ -0,0 +1,6 @@ +(define print + (command #:run "echo" (input 'message #:type 'string) + #:outputs (output 'printed-message #:type 'stdout))) + +(workflow ((message #:type string)) + (print #:message message)) diff --git a/doc/ccwl.skb b/doc/ccwl.skb new file mode 100644 index 0000000..c684caa --- /dev/null +++ b/doc/ccwl.skb @@ -0,0 +1,216 @@ +(use-modules (skribilo lib) + (ccwl skribilo)) + +(document :title [Concise Common Workflow Language] + (chapter :title [Introduction] + (p [,(abbr :short "CWL" :long "Common Workflow +Language") is an open standard for describing analysis workflows and +tools in a way that makes them portable and scalable across a variety +of software and hardware environments.]) + (p [,(abbr :short "ccwl" :long "Concise Common +Workflow Language") is a concise syntax to express CWL workflows. It +is implemented as an ,(abbr :short "EDSL" :long "Embedded Domain +Specific Language") in the Scheme programming language, a minimalist +dialect of the Lisp family of programming languages.]) + (p [ccwl is a compiler to generate CWL workflows +from concise descriptions in ccwl. In the future, ccwl will also have +a runtime whereby users can interactively execute workflows while +developing them.])) + (chapter :title [Tutorial] + (p [This tutorial will introduce you to writing +workflows in ccwl. Some knowledge of CWL is assumed. To learn about +CWL, please see the ,(ref :url "https://www.commonwl.org/user_guide/" +:text "Common Workflow Language User Guide")]) + + (section :title [Important concepts] + (p [The CWL and ccwl workflow languages +are statically typed programming languages where functions accept +multiple named inputs and return multiple named outputs. Let 's break +down what that means.]) + (subsection :title [Static typing] + (p [In CWL ,the type of arguments accepted by a function and +the type of outputs returned by that function are specified explicitly +by the programmer ,and are known at compile time even before the code +has been run. Hence ,we say that it is statically typed.])) + (subsection :title [Positional arguments and named arguments] + (p [In many languages ,the order of arguments passed to a +function is significant. The position of each argument determines +which formal argument it gets mapped to. For example ,passing +positional arguments in Scheme looks like]) + (prog :line #f [(foo 1 2)]) + (p [In a language that supports named arguments ,the order of +arguments is not significant. Each argument explicitly names the +formal argument it gets mapped to. For example , in Scheme ,passing +named arguments may look like] + (prog :line #f [(foo #:bar 1 #:baz 2)]))) + (subsection :title [Multiple function arguments and return values] + (p [In most languages, functions accept multiple input +arguments but only return a single output value. However, in CWL, a +function can return multiple output values as well. These multiple +outputs are unordered and are each addressed by a unique name.]))) + + (section :title [First example] + (p [As is tradition, let us start with a simple "Hello World" +workflow in ccwl. This workflow accepts a string input and prints that +string.]) + + (scheme-source "doc/hello-world.scm") + + (p [The first form in this code defines the ,(code "print") +command. This form is the equivalent of defining a +,(code "CommandLineTool") class workflow in CWL. All arguments after +,(code "#:run") specify the command that will be run. One of those +arguments ,(code "(input 'message #:type 'string)") refers to a +,(code "string") type input named ,(code "message"). Notice how the +command definition is very close to a shell command, only that it is +slightly annotated with inputs and their types.]) + + (p [The second form describes the actual workflow and is the +equivalent of defining a ,(code "Workflow") class workflow in CWL. The +form ,(code "((message #:type string))") specifies the inputs of the +workflow. In this case, there is only one input---,(code "message") of +type ,(code "string"). The body of the workflow specifies the commands +that will be executed. The body of this workflow executes only a +single command---the ,(code "print") command---passing the +,(code "message") input of the workflow as the ,(code "message") input +to the ,(code "print") command.]) + + (p [If this workflow is written to a file +,(file "hello-world.scm"), we may compile it to CWL by running]) + + (prog :line #f [$ ccwl compile hello-world.scm]) + + (p [This prints a big chunk of generated CWL to standard +output. We have achieved quite a lot of concision already! We write +the generated CWL to a file and execute it using (command "cwltool") +as follows. The expected output is also shown.]) + + (prog :line #f (source :file "doc/hello-world.out"))) + + (section :title [Capturing the standard output stream of a command] + (p [Let us return to the “Hello World” example in the previous +section. But now ,let us capture the standard output of the +,(code "print") command in an output object. The ccwl code is the same +as earlier with only the addition of an ,(code "stdout") type output +object to the command definition.]) + + (scheme-source "doc/capture-stdout.scm") + + (p [Let's write this code to a file +,(file "capture-stdout.scm"), generate CWL, write the generated CWL to +,(file "capture-stdout.cwl"), and run it using ,(code "cwltool"). We +might expect something like the output below. Notice how the standard +output of the ,(code "print") command has been captured in the file +,(file "51fe79d15e7790a9ded795304220d7a44aa84b48").]) + + (prog :line #f (source :file "doc/capture-stdout.out"))) + + (section :title [Capturing output files] + (p [In the previous section ,we captured the standard output +stream of a command. But ,how do we capture any output files created +by a command? Let us see.]) + + (p [Consider a tar archive ,(file "hello.tar") containing a file +,(file "hello.txt").]) + + (prog :line #f (source :file "doc/hello.tar.out")) + + (p [Let us write a workflow to extract the file +,(file "hello.txt") from the archive. Everything in the following +workflow except the ,(code "#:binding") parameter will already be +familiar to you. The ,(code "#:binding") parameter sets the +,(code "outputBinding") field in the generated CWL. In the example +below, we set the ,(code "glob") field to look for a file named +,(file "hello.txt").]) + + (scheme-source "doc/capture-output-file.scm") + + (p [Writing this workflow to ,(file "capture-output-file.scm"), +compiling and running it gives us the following output. Notice that +the file ,(file "hello.txt") has been captured and is now present in +our current working directory.]) + + (prog :line #f (source :file "doc/capture-output-file.out")) + + (p [The above workflow is not awfully flexible. The name of the +file to extract is hardcoded into the workflow. Let us modify the +workflow to accept the name of the file to extract. We introduce +,(code "extractfile"), a ,(code "string") type input that is passed to +,(command "tar") and is referenced in the ,(code "glob") field.]) + + (scheme-source "doc/capture-output-file-with-parameter-reference.scm") + + (p [Compiling and running this workflow gives us the following +output.]) + + (prog :line #f (source :file "doc/capture-output-file-with-parameter-reference.out"))) + + (section :title [Workflow with multiple steps] + (p [Till now, we have only written trivial workflows with a +single command. If we were only interested in executing single +commands, we would hardly need a workflow language! So, in this +section, let us write our first multi-step workflow and learn how to +connect steps together in an arbitrary topology.]) + + (subsection :title [pipe] + (p [First ,the simplest of topologies---a linear chain +representing sequential execution of steps. The following workflow +decompresses a compressed C source file ,compiles and then executes +it.]) + + (scheme-source "doc/decompress-compile-run.scm") + + (p [Notice the ,(code "pipe") form in the body of the +workflow. The ,(code "pipe") form specifies a list of steps to be +executed sequentially. The workflow inputs coming into ,(code "pipe") +are passed into the first step. Thereafter, the outputs of each step +are passed as inputs into the next. Note that this has nothing to do +with the Unix pipe. The inputs/outputs passed between steps are +general CWL inputs/outputs. They need not be the standard stdin and +stdout streams.]) + + ;; TODO: Add workflow graph + + (p [Writing this worklow to +,(file "decompress-compile-run.scm"), compiling and running it with +the compressed C source file ,(file "hello.c.gz") gives us the +following output.]) + + (prog :line #f (source :file "doc/decompress-compile-run.out")) + + (p [The steps run in succession, and the stdout of the +compiled executable is in +,(file "c32c587f7afbdf87cf991c14a43edecf09cd93bf"). Success!])) + + (subsection :title [tee] + (p [Next, the tee topology. The following workflow computes +three different checksums of a given input file.]) + + (scheme-source "doc/checksum.scm") + + (p [Notice the ,(code "tee") form in the body of the +workflow. The ,(code "tee") form specifies a list of steps that are +independent of each other. The workflow inputs coming into +,(code "tee") are passed into every step contained in the body of the +,(code "tee"). The outputs of each step are collected together and +unioned as the output of the ,(code "tee").]) + + ;; TODO: Add workflow graph + + (p [Writing this workflow to ,(file "checksum.scm"), compiling +and running it with some file ,(file "hello.txt") gives us the +following output.]) + + (prog :line #f (source :file "doc/checksum.out")) + + (p [The MD5, SHA1 and SHA256 checksums are in the files +,(file "112be1054505027982e64d56b0879049c12737c6"), +,(file "d2f19c786fcd3feb329004c8747803fba581a02d") and +,(file "0d2eaa5619c14b43326101200d0f27b0d8a1a4b1") respectively.])))) + + (chapter :title [Contributing] + (p [ccwl is developed on GitHub at ,(ref +:url "https://github.com/arunisaac/ccwl"). Feedback, suggestions, +feature requests, bug reports and pull requests are all +welcome. Unclear and unspecific error messages are considered a +bug. Do report them!]))) diff --git a/doc/ccwl.texi b/doc/ccwl.texi deleted file mode 100644 index afc7e7d..0000000 --- a/doc/ccwl.texi +++ /dev/null @@ -1,529 +0,0 @@ -\input texinfo -@settitle Concise Common Workflow Language - -@include version.texi - -@copying -Copyright @copyright{} 2021 Arun Isaac@* - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.3 or -any later version published by the Free Software Foundation; with no -Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A -copy of the license is included in the section entitled ``GNU Free -Documentation License''. -@end copying - -@titlepage -@title Concise Common Workflow Language -@subtitle Writing Common Workflow Language workflows using ccwl -@author The ccwl authors - -@page -@vskip 0pt plus 1filll -Edition @value{EDITION} @* -@value{UPDATED} @* -@insertcopying -@end titlepage - -@contents - -@ifnottex -@node Top -@top Concise Common Workflow Language - -This manual documents @abbr{ccwl, Concise Common Workflow Language} -version @value{VERSION}. ccwl is a concise syntax to express @abbr{CWL, -Common Workflow Language} workflows. -@end ifnottex - -@menu -* Introduction:: What is ccwl? -* Tutorial:: A quick tutorial to get started with ccwl -* Contributing:: Contributing -@end menu - -@node Introduction -@chapter Introduction - -@abbr{CWL, Common Workflow Language} is an open standard for describing -analysis workflows and tools in a way that makes them portable and -scalable across a variety of software and hardware environments. - -@abbr{ccwl, Concise Common Workflow Language} is a concise syntax to -express CWL workflows. It is implemented as an @abbr{EDSL, Embedded -Domain Specific Language} in the Scheme programming language, a -minimalist dialect of the Lisp family of programming languages. - -ccwl is a compiler to generate CWL workflows from concise descriptions -in ccwl. In the future, ccwl will also have a runtime whereby users can -interactively execute workflows while developing them. - -@node Tutorial -@chapter Tutorial - -This tutorial will introduce you to writing workflows in ccwl. Some -knowledge of CWL is assumed. To learn about CWL, please see the -@url{https://www.commonwl.org/user_guide/, Common Workflow Language User -Guide}. - -@menu -* Important concepts:: Static typing, multiple named inputs and outputs -* First example:: Our first ccwl workflow -* Capturing stdout:: Capturing the standard output stream of a command -* Capturing output files:: Capturing output files produced by a command -* Workflow with multiple steps:: Connecting steps together in a graph -@end menu - -@node Important concepts -@section The CWL and ccwl workflow languages - -The CWL and ccwl workflow languages are statically typed programming -languages where functions accept multiple named inputs and return -multiple named outputs. Let's break down what that means. - -@subsection Static typing - -In CWL, the type of arguments accepted by a function and the type of -outputs returned by that function are specified explicitly by the -programmer, and are known at compile time even before the code has been -run. Hence, we say that it is statically typed. - -@subsection Positional arguments and named arguments - -In many languages, the order of arguments passed to a function is -significant. The position of each argument determines which formal -argument it gets mapped to. For example, passing positional arguments in -Scheme looks like - -@lisp -(foo 1 2) -@end lisp - -In a language that supports named arguments, the order of arguments is -not significant. Each argument explicitly names the formal argument it -gets mapped to. For example, in Scheme, passing named arguments may look -like - -@lisp -(foo #:bar 1 #:baz 2) -@end lisp - -@subsection Multiple function arguments and return values - -In most languages, functions accept multiple input arguments but only -return a single output value. However, in CWL, a function can return -multiple output values as well. These multiple outputs are unordered and -are each addressed by a unique name. - -@node First example -@section First example - -As is tradition, let us start with a simple ``Hello World'' workflow in -ccwl. This workflow accepts a string input and prints that string. - -@lisp -(define print - (command #:run "echo" (input 'message #:type 'string))) - -(workflow ((message #:type string)) - (print #:message message)) -@end lisp - -The first form in this code defines the @code{print} command. This form -is the equivalent of defining a @code{CommandLineTool} class workflow in -CWL. All arguments after @code{#:run} specify the command that will be -run. One of those arguments @code{(input 'message #:type 'string)} -refers to a @code{string} type input named @code{message}. Notice how -the command definition is very close to a shell command, only that it is -slightly annotated with inputs and their types. - -The second form describes the actual workflow and is the equivalent of -defining a @code{Workflow} class workflow in CWL. The form -@code{((message #:type string))} specifies the inputs of the -workflow. In this case, there is only one input---@code{message} of type -@code{string}. The body of the workflow specifies the commands that will -be executed. The body of this workflow executes only a single -command---the @code{print} command---passing the @code{message} input of -the workflow as the @code{message} input to the @code{print} command. - -If this workflow is written to a file @file{hello-world.scm}, we may -compile it to CWL by running - -@example -$ ccwl compile hello-world.scm -@end example - -This prints a big chunk of generated CWL to standard output. We have -achieved quite a lot of concision already! We write the generated CWL to -a file and execute it using @code{cwltool} as follows. The expected -output is also shown. - -@example -$ ccwl compile hello-world.scm > hello-world.cwl -$ cwltool hello-world.cwl --message "Hello World!" -[workflow ] start -[workflow ] starting step echo -[step echo] start -[job echo] /tmp/zprgn3x0$ echo \ - 'Hello World!' -Hello World! -[job echo] completed success -[step echo] completed success -[workflow ] completed success -@{@} -Final process status is success -@end example - -@node Capturing stdout -@section Capturing the standard output stream of a command - -Let us return to the ``Hello World'' example in the previous -section. But now, let us capture the standard output of the @code{print} -command in an output object. The ccwl code is the same as earlier with -only the addition of an @code{stdout} type output object to the command -definition. - -@lisp -(define print - (command #:run "echo" (input 'message #:type 'string) - #:outputs (output 'printed-message #:type 'stdout))) - -(workflow ((message #:type string)) - (print #:message message)) -@end lisp - -Let's write this code to a file @file{capture-stdout.scm}, generate CWL, -write the generated CWL to @file{capture-stdout.cwl}, and run it using -@code{cwltool}. We might expect something like the output below. Notice -how the standard output of the @code{print} command has been captured in -the file @file{51fe79d15e7790a9ded795304220d7a44aa84b48}. - -@example -$ ccwl compile capture-stdout.scm > capture-stdout.cwl -$ cwltool capture-stdout.cwl --message "Hello World!" -[workflow ] start -[workflow ] starting step print -[step print] start -[job print] /tmp/7zksx3xm$ echo \ - 'Hello World!' > /tmp/7zksx3xm/51fe79d15e7790a9ded795304220d7a44aa84b48 -[job print] completed success -[step print] completed success -[workflow ] completed success -@{ - "printed-message": @{ - "location": "file:///home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48", - "basename": "51fe79d15e7790a9ded795304220d7a44aa84b48", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48" - @} -@} -Final process status is success -@end example - -@node Capturing output files -@section Capturing output files - -In the previous section, we captured the standard output stream of a -command. But, how do we capture any output files created by a command? -Let us see. - -Consider a tar archive @file{hello.tar} containing a file -@file{hello.txt}. - -@example -$ tar --list --file hello.tar -hello.txt -@end example - -Let us write a workflow to extract the file @file{hello.txt} from the -archive. Everything in the following workflow except the -@code{#:binding} parameter will already be familiar to you. The -@code{#:binding} parameter sets the @code{outputBinding} field in the -generated CWL. In the example below, we set the @code{glob} field to -look for a file named @file{hello.txt}. - -@lisp -(define extract - (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) - #:outputs (output 'extracted-file - #:type 'File - #:binding '((glob . "hello.txt"))))) - -(workflow ((archive #:type File)) - (extract #:archive archive)) -@end lisp - -Writing this workflow to @file{capture-output-file.scm}, compiling and -running it gives us the following output. Notice that the file -@file{hello.txt} has been captured and is now present in our current -working directory. - -@example -$ ccwl compile capture-output-file.scm > capture-output-file.cwl -$ cwltool capture-output-file.cwl --archive hello.tar -[workflow ] start -[workflow ] starting step extract -[step extract] start -[job extract] /tmp/nrolttex$ tar \ - --extract \ - --file \ - /tmp/z7pp7qwh/stg3ac272aa-3459-4f20-a033-86f53ba72caf/hello.tar -[job extract] completed success -[step extract] completed success -[workflow ] completed success -@{ - "extracted-file": @{ - "location": "file:///home/manimekalai/hello.txt", - "basename": "hello.txt", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/hello.txt" - @} -@} -Final process status is success -@end example - -The above workflow is not awfully flexible. The name of the file to -extract is hardcoded into the workflow. Let us modify the workflow to -accept the name of the file to extract. We introduce @code{extractfile}, -a @code{string} type input that is passed to @command{tar} and is -referenced in the @code{glob} field. - -@lisp -(define extract-specific-file - (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) - (input 'extractfile #:type 'string) - #:outputs (output 'extracted-file - #:type 'File - #:binding '((glob . "$(inputs.extractfile)"))))) - -(workflow ((archive #:type File) (extractfile #:type string)) - (extract-specific-file #:archive archive #:extractfile extractfile)) -@end lisp - -Compiling and running this workflow gives us the following output. - -@example -$ ccwl compile capture-output-file.scm > capture-output-file.cwl -$ cwltool capture-output-file.cwl --archive hello.tar --extractfile hello.txt -[workflow ] start -[workflow ] starting step extract-specific-file -[step extract-specific-file] start -[job extract-specific-file] /tmp/751nydd1$ tar \ - --extract \ - --file \ - /tmp/1zzw2n6m/stgc851e003-b5bd-437e-844b-311f6f66a7f1/hello.tar \ - hello.txt -[job extract-specific-file] completed success -[step extract-specific-file] completed success -[workflow ] completed success -@{ - "extracted-file": @{ - "location": "file:///home/manimekalai/hello.txt", - "basename": "hello.txt", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/hello.txt" - @} -@} -Final process status is success -@end example - -@node Workflow with multiple steps -@section Workflow with multiple steps - -Till now, we have only written trivial workflows with a single -command. If we were only interested in executing single commands, we -would hardly need a workflow language! So, in this section, let us write -our first multi-step workflow and learn how to connect steps together in -an arbitrary topology. - -@subsection pipe - -First, the simplest of topologies---a linear chain representing -sequential execution of steps. The following workflow decompresses a -compressed C source file, compiles and then executes it. - -@lisp -(define decompress - (command #:run "gzip" "--stdout" "--decompress" (input 'compressed #:type 'File) - #:outputs (output 'decompressed #:type 'stdout))) - -(define compile - (command #:run "gcc" "-x" "c" (input 'source #:type 'File) - #:outputs (output 'executable - #:type 'File - #:binding '((glob . "a.out"))))) - -(define run - (command #:run (input 'executable) - #:outputs (output 'stdout #:type 'stdout))) - -(workflow ((compressed-source #:type File)) - (pipe (decompress #:compressed compressed-source) - (compile #:source decompressed) - (run #:executable executable))) -@end lisp - -Notice the @code{pipe} form in the body of the workflow. The @code{pipe} -form specifies a list of steps to be executed sequentially. The workflow -inputs coming into @code{pipe} are passed into the first -step. Thereafter, the outputs of each step are passed as inputs into the -next. Note that this has nothing to do with the Unix pipe. The -inputs/outputs passed between steps are general CWL inputs/outputs. They -need not be the standard stdin and stdout streams. - -@c TODO: Add workflow graph. - -Writing this worklow to @file{decompress-compile-run.scm}, compiling and -running it with the compressed C source file @file{hello.c.gz} gives us -the following output. - -@example -$ ccwl compile decompress-compile-run.scm > decompress-compile-run.cwl -$ cwltool decompress-compile-run.cwl --compressed-source hello.c.gz -[workflow ] start -[workflow ] starting step decompress -[step decompress] start -[job decompress] /tmp/3bsk5yfm$ gzip \ - --stdout \ - --decompress \ - /tmp/yn4wh0j8/stg1e0bc56d-f845-4a28-a685-1faf96129eac/hello.c.gz > /tmp/3bsk5yfm/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 -[job decompress] completed success -[step decompress] completed success -[workflow ] starting step compile -[step compile] start -[job compile] /tmp/lnjz1vik$ gcc \ - -x \ - c \ - /tmp/rpf9g_lj/stg1be6bb98-7101-4f46-9885-fe0a985dee73/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 -[job compile] completed success -[step compile] completed success -[workflow ] starting step run -[step run] start -[job run] /tmp/fftn945x$ /tmp/favjw7d5/stg2576ae91-5240-4731-b98d-dee0f8ef7703/a.out > /tmp/fftn945x/c32c587f7afbdf87cf991c14a43edecf09cd93bf -[job run] completed success -[step run] completed success -[workflow ] completed success -@{ - "stdout": @{ - "location": "file:///home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf", - "basename": "c32c587f7afbdf87cf991c14a43edecf09cd93bf", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf" - @} -@} -Final process status is success -@end example - -The steps run in succession, and the stdout of the compiled executable -is in @file{c32c587f7afbdf87cf991c14a43edecf09cd93bf}. Success! - -@subsection tee - -Next, the tee topology. The following workflow computes three different -checksums of a given input file. - -@lisp -(define md5sum - (command #:run "md5sum" (input 'file #:type 'File) - #:outputs (output 'md5 #:type 'stdout))) - -(define sha1sum - (command #:run "sha1sum" (input 'file #:type 'File) - #:outputs (output 'sha1 #:type 'stdout))) - -(define sha256sum - (command #:run "sha256sum" (input 'file #:type 'File) - #:outputs (output 'sha256 #:type 'stdout))) - -(workflow ((file #:type File)) - (tee (md5sum #:file file) - (sha1sum #:file file) - (sha256sum #:file file))) -@end lisp - -Notice the @code{tee} form in the body of the workflow. The @code{tee} -form specifies a list of steps that are independent of each other. The -workflow inputs coming into @code{tee} are passed into every step -contained in the body of the @code{tee}. The outputs of each step are -collected together and unioned as the output of the @code{tee}. - -@c TODO: Add workflow graph. - -Writing this workflow to @file{checksum.scm}, compiling and running it -with some file @file{hello.txt} gives us the following output. - -@example -$ ccwl compile checksum.scm > checksum.cwl -$ cwltool checksum.cwl --file hello.txt -[workflow ] start -[workflow ] starting step sha256sum -[step sha256sum] start -[job sha256sum] /tmp/rjbcjppq$ sha256sum \ - /tmp/pc2bbl6o/stg2f7cdda0-9d89-47b7-96b6-fa377cc61c49/hello.txt > /tmp/rjbcjppq/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1 -[job sha256sum] completed success -[step sha256sum] completed success -[workflow ] starting step sha1sum -[step sha1sum] start -[job sha1sum] /tmp/1cjtot5q$ sha1sum \ - /tmp/wliybbsp/stg993b2838-c803-4527-89d6-6a0cd7a0587a/hello.txt > /tmp/1cjtot5q/d2f19c786fcd3feb329004c8747803fba581a02d -[job sha1sum] completed success -[step sha1sum] completed success -[workflow ] starting step md5sum -[step md5sum] start -[job md5sum] /tmp/z7fe89c7$ md5sum \ - /tmp/41nnygw9/stgebdc428b-ec84-4283-88ae-682c7f4628ac/hello.txt > /tmp/z7fe89c7/112be1054505027982e64d56b0879049c12737c6 -[job md5sum] completed success -[step md5sum] completed success -[workflow ] completed success -@{ - "md5": @{ - "location": "file:///home/manimekalai/112be1054505027982e64d56b0879049c12737c6", - "basename": "112be1054505027982e64d56b0879049c12737c6", - "class": "File", - "checksum": "sha1$dd2e54f3bd22a8bb4ffbf543151050ee9645baf2", - "size": 98, - "path": "/home/manimekalai/112be1054505027982e64d56b0879049c12737c6" - @}, - "sha1": @{ - "location": "file:///home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d", - "basename": "d2f19c786fcd3feb329004c8747803fba581a02d", - "class": "File", - "checksum": "sha1$f4112d33f41bc98a114b35759c26eec9a9f4256c", - "size": 106, - "path": "/home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d" - @}, - "sha256": @{ - "location": "file:///home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", - "basename": "0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", - "class": "File", - "checksum": "sha1$868ce04a610122b1c1f2846e5e9f9fc7a289d120", - "size": 130, - "path": "/home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1" - @} -@} -Final process status is success -@end example - -The MD5, SHA1 and SHA256 checksums are in the files -@file{112be1054505027982e64d56b0879049c12737c6}, -@file{d2f19c786fcd3feb329004c8747803fba581a02d} and -@file{0d2eaa5619c14b43326101200d0f27b0d8a1a4b1} respectively. - -@node Contributing -@chapter Contributing - -ccwl is developed on GitHub at -@url{https://github.com/arunisaac/ccwl}. Feedback, suggestions, feature -requests, bug reports and pull requests are all welcome. Unclear and -unspecific error messages are considered a bug. Do report them! - -@bye diff --git a/doc/checksum.out b/doc/checksum.out new file mode 100644 index 0000000..01d7155 --- /dev/null +++ b/doc/checksum.out @@ -0,0 +1,49 @@ +$ ccwl compile checksum.scm > checksum.cwl +$ cwltool checksum.cwl --file hello.txt +[workflow ] start +[workflow ] starting step sha256sum +[step sha256sum] start +[job sha256sum] /tmp/rjbcjppq$ sha256sum \ + /tmp/pc2bbl6o/stg2f7cdda0-9d89-47b7-96b6-fa377cc61c49/hello.txt > /tmp/rjbcjppq/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1 +[job sha256sum] completed success +[step sha256sum] completed success +[workflow ] starting step sha1sum +[step sha1sum] start +[job sha1sum] /tmp/1cjtot5q$ sha1sum \ + /tmp/wliybbsp/stg993b2838-c803-4527-89d6-6a0cd7a0587a/hello.txt > /tmp/1cjtot5q/d2f19c786fcd3feb329004c8747803fba581a02d +[job sha1sum] completed success +[step sha1sum] completed success +[workflow ] starting step md5sum +[step md5sum] start +[job md5sum] /tmp/z7fe89c7$ md5sum \ + /tmp/41nnygw9/stgebdc428b-ec84-4283-88ae-682c7f4628ac/hello.txt > /tmp/z7fe89c7/112be1054505027982e64d56b0879049c12737c6 +[job md5sum] completed success +[step md5sum] completed success +[workflow ] completed success +{ + "md5": { + "location": "file:///home/manimekalai/112be1054505027982e64d56b0879049c12737c6", + "basename": "112be1054505027982e64d56b0879049c12737c6", + "class": "File", + "checksum": "sha1$dd2e54f3bd22a8bb4ffbf543151050ee9645baf2", + "size": 98, + "path": "/home/manimekalai/112be1054505027982e64d56b0879049c12737c6" + }, + "sha1": { + "location": "file:///home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d", + "basename": "d2f19c786fcd3feb329004c8747803fba581a02d", + "class": "File", + "checksum": "sha1$f4112d33f41bc98a114b35759c26eec9a9f4256c", + "size": 106, + "path": "/home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d" + }, + "sha256": { + "location": "file:///home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", + "basename": "0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", + "class": "File", + "checksum": "sha1$868ce04a610122b1c1f2846e5e9f9fc7a289d120", + "size": 130, + "path": "/home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1" + } +} +Final process status is success diff --git a/doc/checksum.scm b/doc/checksum.scm new file mode 100644 index 0000000..206c2a0 --- /dev/null +++ b/doc/checksum.scm @@ -0,0 +1,16 @@ +(define md5sum + (command #:run "md5sum" (input 'file #:type 'File) + #:outputs (output 'md5 #:type 'stdout))) + +(define sha1sum + (command #:run "sha1sum" (input 'file #:type 'File) + #:outputs (output 'sha1 #:type 'stdout))) + +(define sha256sum + (command #:run "sha256sum" (input 'file #:type 'File) + #:outputs (output 'sha256 #:type 'stdout))) + +(workflow ((file #:type File)) + (tee (md5sum #:file file) + (sha1sum #:file file) + (sha256sum #:file file))) diff --git a/doc/decompress-compile-run.out b/doc/decompress-compile-run.out new file mode 100644 index 0000000..7ab832e --- /dev/null +++ b/doc/decompress-compile-run.out @@ -0,0 +1,36 @@ +$ ccwl compile decompress-compile-run.scm > decompress-compile-run.cwl +$ cwltool decompress-compile-run.cwl --compressed-source hello.c.gz +[workflow ] start +[workflow ] starting step decompress +[step decompress] start +[job decompress] /tmp/3bsk5yfm$ gzip \ + --stdout \ + --decompress \ + /tmp/yn4wh0j8/stg1e0bc56d-f845-4a28-a685-1faf96129eac/hello.c.gz > /tmp/3bsk5yfm/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 +[job decompress] completed success +[step decompress] completed success +[workflow ] starting step compile +[step compile] start +[job compile] /tmp/lnjz1vik$ gcc \ + -x \ + c \ + /tmp/rpf9g_lj/stg1be6bb98-7101-4f46-9885-fe0a985dee73/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 +[job compile] completed success +[step compile] completed success +[workflow ] starting step run +[step run] start +[job run] /tmp/fftn945x$ /tmp/favjw7d5/stg2576ae91-5240-4731-b98d-dee0f8ef7703/a.out > /tmp/fftn945x/c32c587f7afbdf87cf991c14a43edecf09cd93bf +[job run] completed success +[step run] completed success +[workflow ] completed success +{ + "stdout": { + "location": "file:///home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf", + "basename": "c32c587f7afbdf87cf991c14a43edecf09cd93bf", + "class": "File", + "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", + "size": 13, + "path": "/home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf" + } +} +Final process status is success diff --git a/doc/decompress-compile-run.scm b/doc/decompress-compile-run.scm new file mode 100644 index 0000000..437fb41 --- /dev/null +++ b/doc/decompress-compile-run.scm @@ -0,0 +1,18 @@ +(define decompress + (command #:run "gzip" "--stdout" "--decompress" (input 'compressed #:type 'File) + #:outputs (output 'decompressed #:type 'stdout))) + +(define compile + (command #:run "gcc" "-x" "c" (input 'source #:type 'File) + #:outputs (output 'executable + #:type 'File + #:binding '((glob . "a.out"))))) + +(define run + (command #:run (input 'executable) + #:outputs (output 'stdout #:type 'stdout))) + +(workflow ((compressed-source #:type File)) + (pipe (decompress #:compressed compressed-source) + (compile #:source decompressed) + (run #:executable executable))) diff --git a/doc/hello-world.out b/doc/hello-world.out new file mode 100644 index 0000000..3a783a1 --- /dev/null +++ b/doc/hello-world.out @@ -0,0 +1,13 @@ +$ ccwl compile hello-world.scm > hello-world.cwl +$ cwltool hello-world.cwl --message "Hello World!" +[workflow ] start +[workflow ] starting step echo +[step echo] start +[job echo] /tmp/zprgn3x0$ echo \ + 'Hello World!' +Hello World! +[job echo] completed success +[step echo] completed success +[workflow ] completed success +{} +Final process status is success \ No newline at end of file diff --git a/doc/hello-world.scm b/doc/hello-world.scm new file mode 100644 index 0000000..262b99f --- /dev/null +++ b/doc/hello-world.scm @@ -0,0 +1,5 @@ +(define print + (command #:run "echo" (input 'message #:type 'string))) + +(workflow ((message #:type string)) + (print #:message message)) diff --git a/doc/hello.tar.out b/doc/hello.tar.out new file mode 100644 index 0000000..2d492b7 --- /dev/null +++ b/doc/hello.tar.out @@ -0,0 +1,2 @@ +$ tar --list --file hello.tar +hello.txt diff --git a/guix.scm b/guix.scm index 47fce64..3714808 100644 --- a/guix.scm +++ b/guix.scm @@ -76,7 +76,7 @@ `(("autoconf" ,autoconf) ("automake" ,automake) ("pkg-config" ,pkg-config) - ("texinfo" ,texinfo))) + ("skribilo" ,skribilo))) (home-page "https://git.systemreboot.net/ccwl") (synopsis "Concise common workflow language") (description "Concise common workflow language") -- cgit v1.2.3