From bf485dd43386305177a2ba3ea1a5ee7e164984c6 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Tue, 15 Jun 2021 16:45:49 +0530 Subject: doc: Move documentation from texinfo to skribilo. * doc/ccwl.texi: Delete file. * ccwl/skribilo.scm, doc/ccwl.skb, doc/capture-output-file-with-parameter-reference.out, doc/capture-output-file-with-parameter-reference.scm, doc/capture-output-file.out, doc/capture-output-file.scm, doc/capture-stdout.out, doc/capture-stdout.scm, doc/checksum.out, doc/checksum.scm, doc/decompress-compile-run.out, doc/decompress-compile-run.scm, doc/hello-world.out, doc/hello-world.scm, doc/hello.tar.out: New files. * configure.ac: Check for skribilo. * Makefile.am: Unregister doc/ccwl.texi. Add documentation build rules. * guix.scm (ccwl)[native-inputs]: Replace texinfo with skribilo. * .gitignore: Add doc/skribilo. Remove doc/version.texi, doc/*.html, doc/*.info. --- doc/ccwl.texi | 529 ---------------------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 doc/ccwl.texi (limited to 'doc/ccwl.texi') diff --git a/doc/ccwl.texi b/doc/ccwl.texi deleted file mode 100644 index afc7e7d..0000000 --- a/doc/ccwl.texi +++ /dev/null @@ -1,529 +0,0 @@ -\input texinfo -@settitle Concise Common Workflow Language - -@include version.texi - -@copying -Copyright @copyright{} 2021 Arun Isaac@* - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.3 or -any later version published by the Free Software Foundation; with no -Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A -copy of the license is included in the section entitled ``GNU Free -Documentation License''. -@end copying - -@titlepage -@title Concise Common Workflow Language -@subtitle Writing Common Workflow Language workflows using ccwl -@author The ccwl authors - -@page -@vskip 0pt plus 1filll -Edition @value{EDITION} @* -@value{UPDATED} @* -@insertcopying -@end titlepage - -@contents - -@ifnottex -@node Top -@top Concise Common Workflow Language - -This manual documents @abbr{ccwl, Concise Common Workflow Language} -version @value{VERSION}. ccwl is a concise syntax to express @abbr{CWL, -Common Workflow Language} workflows. -@end ifnottex - -@menu -* Introduction:: What is ccwl? -* Tutorial:: A quick tutorial to get started with ccwl -* Contributing:: Contributing -@end menu - -@node Introduction -@chapter Introduction - -@abbr{CWL, Common Workflow Language} is an open standard for describing -analysis workflows and tools in a way that makes them portable and -scalable across a variety of software and hardware environments. - -@abbr{ccwl, Concise Common Workflow Language} is a concise syntax to -express CWL workflows. It is implemented as an @abbr{EDSL, Embedded -Domain Specific Language} in the Scheme programming language, a -minimalist dialect of the Lisp family of programming languages. - -ccwl is a compiler to generate CWL workflows from concise descriptions -in ccwl. In the future, ccwl will also have a runtime whereby users can -interactively execute workflows while developing them. - -@node Tutorial -@chapter Tutorial - -This tutorial will introduce you to writing workflows in ccwl. Some -knowledge of CWL is assumed. To learn about CWL, please see the -@url{https://www.commonwl.org/user_guide/, Common Workflow Language User -Guide}. - -@menu -* Important concepts:: Static typing, multiple named inputs and outputs -* First example:: Our first ccwl workflow -* Capturing stdout:: Capturing the standard output stream of a command -* Capturing output files:: Capturing output files produced by a command -* Workflow with multiple steps:: Connecting steps together in a graph -@end menu - -@node Important concepts -@section The CWL and ccwl workflow languages - -The CWL and ccwl workflow languages are statically typed programming -languages where functions accept multiple named inputs and return -multiple named outputs. Let's break down what that means. - -@subsection Static typing - -In CWL, the type of arguments accepted by a function and the type of -outputs returned by that function are specified explicitly by the -programmer, and are known at compile time even before the code has been -run. Hence, we say that it is statically typed. - -@subsection Positional arguments and named arguments - -In many languages, the order of arguments passed to a function is -significant. The position of each argument determines which formal -argument it gets mapped to. For example, passing positional arguments in -Scheme looks like - -@lisp -(foo 1 2) -@end lisp - -In a language that supports named arguments, the order of arguments is -not significant. Each argument explicitly names the formal argument it -gets mapped to. For example, in Scheme, passing named arguments may look -like - -@lisp -(foo #:bar 1 #:baz 2) -@end lisp - -@subsection Multiple function arguments and return values - -In most languages, functions accept multiple input arguments but only -return a single output value. However, in CWL, a function can return -multiple output values as well. These multiple outputs are unordered and -are each addressed by a unique name. - -@node First example -@section First example - -As is tradition, let us start with a simple ``Hello World'' workflow in -ccwl. This workflow accepts a string input and prints that string. - -@lisp -(define print - (command #:run "echo" (input 'message #:type 'string))) - -(workflow ((message #:type string)) - (print #:message message)) -@end lisp - -The first form in this code defines the @code{print} command. This form -is the equivalent of defining a @code{CommandLineTool} class workflow in -CWL. All arguments after @code{#:run} specify the command that will be -run. One of those arguments @code{(input 'message #:type 'string)} -refers to a @code{string} type input named @code{message}. Notice how -the command definition is very close to a shell command, only that it is -slightly annotated with inputs and their types. - -The second form describes the actual workflow and is the equivalent of -defining a @code{Workflow} class workflow in CWL. The form -@code{((message #:type string))} specifies the inputs of the -workflow. In this case, there is only one input---@code{message} of type -@code{string}. The body of the workflow specifies the commands that will -be executed. The body of this workflow executes only a single -command---the @code{print} command---passing the @code{message} input of -the workflow as the @code{message} input to the @code{print} command. - -If this workflow is written to a file @file{hello-world.scm}, we may -compile it to CWL by running - -@example -$ ccwl compile hello-world.scm -@end example - -This prints a big chunk of generated CWL to standard output. We have -achieved quite a lot of concision already! We write the generated CWL to -a file and execute it using @code{cwltool} as follows. The expected -output is also shown. - -@example -$ ccwl compile hello-world.scm > hello-world.cwl -$ cwltool hello-world.cwl --message "Hello World!" -[workflow ] start -[workflow ] starting step echo -[step echo] start -[job echo] /tmp/zprgn3x0$ echo \ - 'Hello World!' -Hello World! -[job echo] completed success -[step echo] completed success -[workflow ] completed success -@{@} -Final process status is success -@end example - -@node Capturing stdout -@section Capturing the standard output stream of a command - -Let us return to the ``Hello World'' example in the previous -section. But now, let us capture the standard output of the @code{print} -command in an output object. The ccwl code is the same as earlier with -only the addition of an @code{stdout} type output object to the command -definition. - -@lisp -(define print - (command #:run "echo" (input 'message #:type 'string) - #:outputs (output 'printed-message #:type 'stdout))) - -(workflow ((message #:type string)) - (print #:message message)) -@end lisp - -Let's write this code to a file @file{capture-stdout.scm}, generate CWL, -write the generated CWL to @file{capture-stdout.cwl}, and run it using -@code{cwltool}. We might expect something like the output below. Notice -how the standard output of the @code{print} command has been captured in -the file @file{51fe79d15e7790a9ded795304220d7a44aa84b48}. - -@example -$ ccwl compile capture-stdout.scm > capture-stdout.cwl -$ cwltool capture-stdout.cwl --message "Hello World!" -[workflow ] start -[workflow ] starting step print -[step print] start -[job print] /tmp/7zksx3xm$ echo \ - 'Hello World!' > /tmp/7zksx3xm/51fe79d15e7790a9ded795304220d7a44aa84b48 -[job print] completed success -[step print] completed success -[workflow ] completed success -@{ - "printed-message": @{ - "location": "file:///home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48", - "basename": "51fe79d15e7790a9ded795304220d7a44aa84b48", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/51fe79d15e7790a9ded795304220d7a44aa84b48" - @} -@} -Final process status is success -@end example - -@node Capturing output files -@section Capturing output files - -In the previous section, we captured the standard output stream of a -command. But, how do we capture any output files created by a command? -Let us see. - -Consider a tar archive @file{hello.tar} containing a file -@file{hello.txt}. - -@example -$ tar --list --file hello.tar -hello.txt -@end example - -Let us write a workflow to extract the file @file{hello.txt} from the -archive. Everything in the following workflow except the -@code{#:binding} parameter will already be familiar to you. The -@code{#:binding} parameter sets the @code{outputBinding} field in the -generated CWL. In the example below, we set the @code{glob} field to -look for a file named @file{hello.txt}. - -@lisp -(define extract - (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) - #:outputs (output 'extracted-file - #:type 'File - #:binding '((glob . "hello.txt"))))) - -(workflow ((archive #:type File)) - (extract #:archive archive)) -@end lisp - -Writing this workflow to @file{capture-output-file.scm}, compiling and -running it gives us the following output. Notice that the file -@file{hello.txt} has been captured and is now present in our current -working directory. - -@example -$ ccwl compile capture-output-file.scm > capture-output-file.cwl -$ cwltool capture-output-file.cwl --archive hello.tar -[workflow ] start -[workflow ] starting step extract -[step extract] start -[job extract] /tmp/nrolttex$ tar \ - --extract \ - --file \ - /tmp/z7pp7qwh/stg3ac272aa-3459-4f20-a033-86f53ba72caf/hello.tar -[job extract] completed success -[step extract] completed success -[workflow ] completed success -@{ - "extracted-file": @{ - "location": "file:///home/manimekalai/hello.txt", - "basename": "hello.txt", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/hello.txt" - @} -@} -Final process status is success -@end example - -The above workflow is not awfully flexible. The name of the file to -extract is hardcoded into the workflow. Let us modify the workflow to -accept the name of the file to extract. We introduce @code{extractfile}, -a @code{string} type input that is passed to @command{tar} and is -referenced in the @code{glob} field. - -@lisp -(define extract-specific-file - (command #:run "tar" "--extract" "--file" (input 'archive #:type 'File) - (input 'extractfile #:type 'string) - #:outputs (output 'extracted-file - #:type 'File - #:binding '((glob . "$(inputs.extractfile)"))))) - -(workflow ((archive #:type File) (extractfile #:type string)) - (extract-specific-file #:archive archive #:extractfile extractfile)) -@end lisp - -Compiling and running this workflow gives us the following output. - -@example -$ ccwl compile capture-output-file.scm > capture-output-file.cwl -$ cwltool capture-output-file.cwl --archive hello.tar --extractfile hello.txt -[workflow ] start -[workflow ] starting step extract-specific-file -[step extract-specific-file] start -[job extract-specific-file] /tmp/751nydd1$ tar \ - --extract \ - --file \ - /tmp/1zzw2n6m/stgc851e003-b5bd-437e-844b-311f6f66a7f1/hello.tar \ - hello.txt -[job extract-specific-file] completed success -[step extract-specific-file] completed success -[workflow ] completed success -@{ - "extracted-file": @{ - "location": "file:///home/manimekalai/hello.txt", - "basename": "hello.txt", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/hello.txt" - @} -@} -Final process status is success -@end example - -@node Workflow with multiple steps -@section Workflow with multiple steps - -Till now, we have only written trivial workflows with a single -command. If we were only interested in executing single commands, we -would hardly need a workflow language! So, in this section, let us write -our first multi-step workflow and learn how to connect steps together in -an arbitrary topology. - -@subsection pipe - -First, the simplest of topologies---a linear chain representing -sequential execution of steps. The following workflow decompresses a -compressed C source file, compiles and then executes it. - -@lisp -(define decompress - (command #:run "gzip" "--stdout" "--decompress" (input 'compressed #:type 'File) - #:outputs (output 'decompressed #:type 'stdout))) - -(define compile - (command #:run "gcc" "-x" "c" (input 'source #:type 'File) - #:outputs (output 'executable - #:type 'File - #:binding '((glob . "a.out"))))) - -(define run - (command #:run (input 'executable) - #:outputs (output 'stdout #:type 'stdout))) - -(workflow ((compressed-source #:type File)) - (pipe (decompress #:compressed compressed-source) - (compile #:source decompressed) - (run #:executable executable))) -@end lisp - -Notice the @code{pipe} form in the body of the workflow. The @code{pipe} -form specifies a list of steps to be executed sequentially. The workflow -inputs coming into @code{pipe} are passed into the first -step. Thereafter, the outputs of each step are passed as inputs into the -next. Note that this has nothing to do with the Unix pipe. The -inputs/outputs passed between steps are general CWL inputs/outputs. They -need not be the standard stdin and stdout streams. - -@c TODO: Add workflow graph. - -Writing this worklow to @file{decompress-compile-run.scm}, compiling and -running it with the compressed C source file @file{hello.c.gz} gives us -the following output. - -@example -$ ccwl compile decompress-compile-run.scm > decompress-compile-run.cwl -$ cwltool decompress-compile-run.cwl --compressed-source hello.c.gz -[workflow ] start -[workflow ] starting step decompress -[step decompress] start -[job decompress] /tmp/3bsk5yfm$ gzip \ - --stdout \ - --decompress \ - /tmp/yn4wh0j8/stg1e0bc56d-f845-4a28-a685-1faf96129eac/hello.c.gz > /tmp/3bsk5yfm/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 -[job decompress] completed success -[step decompress] completed success -[workflow ] starting step compile -[step compile] start -[job compile] /tmp/lnjz1vik$ gcc \ - -x \ - c \ - /tmp/rpf9g_lj/stg1be6bb98-7101-4f46-9885-fe0a985dee73/eae8fb860f3b6eaf6ae2b9d9285b5c07cc457e90 -[job compile] completed success -[step compile] completed success -[workflow ] starting step run -[step run] start -[job run] /tmp/fftn945x$ /tmp/favjw7d5/stg2576ae91-5240-4731-b98d-dee0f8ef7703/a.out > /tmp/fftn945x/c32c587f7afbdf87cf991c14a43edecf09cd93bf -[job run] completed success -[step run] completed success -[workflow ] completed success -@{ - "stdout": @{ - "location": "file:///home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf", - "basename": "c32c587f7afbdf87cf991c14a43edecf09cd93bf", - "class": "File", - "checksum": "sha1$a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b", - "size": 13, - "path": "/home/manimekalai/c32c587f7afbdf87cf991c14a43edecf09cd93bf" - @} -@} -Final process status is success -@end example - -The steps run in succession, and the stdout of the compiled executable -is in @file{c32c587f7afbdf87cf991c14a43edecf09cd93bf}. Success! - -@subsection tee - -Next, the tee topology. The following workflow computes three different -checksums of a given input file. - -@lisp -(define md5sum - (command #:run "md5sum" (input 'file #:type 'File) - #:outputs (output 'md5 #:type 'stdout))) - -(define sha1sum - (command #:run "sha1sum" (input 'file #:type 'File) - #:outputs (output 'sha1 #:type 'stdout))) - -(define sha256sum - (command #:run "sha256sum" (input 'file #:type 'File) - #:outputs (output 'sha256 #:type 'stdout))) - -(workflow ((file #:type File)) - (tee (md5sum #:file file) - (sha1sum #:file file) - (sha256sum #:file file))) -@end lisp - -Notice the @code{tee} form in the body of the workflow. The @code{tee} -form specifies a list of steps that are independent of each other. The -workflow inputs coming into @code{tee} are passed into every step -contained in the body of the @code{tee}. The outputs of each step are -collected together and unioned as the output of the @code{tee}. - -@c TODO: Add workflow graph. - -Writing this workflow to @file{checksum.scm}, compiling and running it -with some file @file{hello.txt} gives us the following output. - -@example -$ ccwl compile checksum.scm > checksum.cwl -$ cwltool checksum.cwl --file hello.txt -[workflow ] start -[workflow ] starting step sha256sum -[step sha256sum] start -[job sha256sum] /tmp/rjbcjppq$ sha256sum \ - /tmp/pc2bbl6o/stg2f7cdda0-9d89-47b7-96b6-fa377cc61c49/hello.txt > /tmp/rjbcjppq/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1 -[job sha256sum] completed success -[step sha256sum] completed success -[workflow ] starting step sha1sum -[step sha1sum] start -[job sha1sum] /tmp/1cjtot5q$ sha1sum \ - /tmp/wliybbsp/stg993b2838-c803-4527-89d6-6a0cd7a0587a/hello.txt > /tmp/1cjtot5q/d2f19c786fcd3feb329004c8747803fba581a02d -[job sha1sum] completed success -[step sha1sum] completed success -[workflow ] starting step md5sum -[step md5sum] start -[job md5sum] /tmp/z7fe89c7$ md5sum \ - /tmp/41nnygw9/stgebdc428b-ec84-4283-88ae-682c7f4628ac/hello.txt > /tmp/z7fe89c7/112be1054505027982e64d56b0879049c12737c6 -[job md5sum] completed success -[step md5sum] completed success -[workflow ] completed success -@{ - "md5": @{ - "location": "file:///home/manimekalai/112be1054505027982e64d56b0879049c12737c6", - "basename": "112be1054505027982e64d56b0879049c12737c6", - "class": "File", - "checksum": "sha1$dd2e54f3bd22a8bb4ffbf543151050ee9645baf2", - "size": 98, - "path": "/home/manimekalai/112be1054505027982e64d56b0879049c12737c6" - @}, - "sha1": @{ - "location": "file:///home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d", - "basename": "d2f19c786fcd3feb329004c8747803fba581a02d", - "class": "File", - "checksum": "sha1$f4112d33f41bc98a114b35759c26eec9a9f4256c", - "size": 106, - "path": "/home/manimekalai/d2f19c786fcd3feb329004c8747803fba581a02d" - @}, - "sha256": @{ - "location": "file:///home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", - "basename": "0d2eaa5619c14b43326101200d0f27b0d8a1a4b1", - "class": "File", - "checksum": "sha1$868ce04a610122b1c1f2846e5e9f9fc7a289d120", - "size": 130, - "path": "/home/manimekalai/0d2eaa5619c14b43326101200d0f27b0d8a1a4b1" - @} -@} -Final process status is success -@end example - -The MD5, SHA1 and SHA256 checksums are in the files -@file{112be1054505027982e64d56b0879049c12737c6}, -@file{d2f19c786fcd3feb329004c8747803fba581a02d} and -@file{0d2eaa5619c14b43326101200d0f27b0d8a1a4b1} respectively. - -@node Contributing -@chapter Contributing - -ccwl is developed on GitHub at -@url{https://github.com/arunisaac/ccwl}. Feedback, suggestions, feature -requests, bug reports and pull requests are all welcome. Unclear and -unspecific error messages are considered a bug. Do report them! - -@bye -- cgit v1.2.3