From 621eb1945aec8f26f5aee4bdf896f2434e145182 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 13 Mar 2022 21:38:13 +0530 Subject: reader: Add Gemtext reader. * src/guile/skribilo/reader/gemtext.scm: New file. * configure.ac: Set BUILD_GEMTEXT_READER automake conditional to true if (srfi srfi-171) is found. Else, set it to false. * src/guile/Makefile.am (readers): Add skribilo/reader/gemtext.scm if BUILD_GEMTEXT_READER is true. (EXTRA_DIST): Add skribilo/reader/gemtext.scm if BUILD_GEMTEXT_READER is false. * doc/user/syntax.skb (The Gemtext Syntax): New section. * tests/readers/gemtext.test: New file. * tests/Makefile.am (TESTS): Add readers/gemtext.test if BUILD_GEMTEXT_READER is true. (EXTRA_DIST): Add readers/gemtext.text if BUILD_GEMTEXT_READER is false. --- configure.ac | 9 ++ doc/user/syntax.skb | 21 +++- src/guile/Makefile.am | 10 ++ src/guile/skribilo/reader/gemtext.scm | 231 ++++++++++++++++++++++++++++++++++ tests/Makefile.am | 10 ++ tests/readers/gemtext.test | 133 ++++++++++++++++++++ 6 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 src/guile/skribilo/reader/gemtext.scm create mode 100644 tests/readers/gemtext.test diff --git a/configure.ac b/configure.ac index 04c7eac..5ad964a 100644 --- a/configure.ac +++ b/configure.ac @@ -66,6 +66,15 @@ fi AM_CONDITIONAL([BUILD_RSS2_READER], [test "x$have_sxml_simple$have_htmlprag" == "xyesyes"]) +# Check for SRFI-171, needed for the `gemtext' reader. +GUILE_MODULE_AVAILABLE([have_srfi_171], [(srfi srfi-171)]) +if test "x$have_srfi_171" != "xyes"; then + AC_MSG_WARN([SRFI-171 needed by the `gemtext' reader is missing.]) +fi + +AM_CONDITIONAL([BUILD_GEMTEXT_READER], + [test "x$have_srfi_171" == "xyes"]) + # Look for `convert', from ImageMagick. AC_PATH_PROG([CONVERT], [convert]) if test "x$CONVERT" == "x"; then diff --git a/doc/user/syntax.skb b/doc/user/syntax.skb index 9a4070c..2de7cbd 100644 --- a/doc/user/syntax.skb +++ b/doc/user/syntax.skb @@ -211,7 +211,26 @@ documents that can be output in variety of formats (see ,(numref :text [Chapter] :ident "engines")). The downside is that, being a very simple markup-less document format, there are many things that cannot be done using it, most notably tables, bibliographies, and cross-references.])) - + + (section :title [The Gemtext Syntax] :ident "gemtext-syntax" + (p [,(ref +:url "https://gemini.circumlunar.space/docs/gemtext.gmi" +:text "Gemtext"), the lightweight markup language used by the ,(ref +:url "https://gemini.circumlunar.space" :text "Gemini protocol"), is +supported as an input syntax. To use it, just pass ,(tt +[--reader=gemtext]) to the compiler. When used programmatically, the +Gemtext reader can be customized using the following options.]) + + (doc-markup 'make-gemtext-reader + '((:join-lines? [If ,(code "#t"), lines which are not +separated by a blank line are joined into a single paragraph. This is +a relaxation of the Gemtext standard, and is not done by default.]) + (:section-numbers? [If ,(code "#t"), sections are +numbered. Else, they are not.])) + :common-args '() + :source "skribilo/reader/gemtext.scm" + :idx *function-index*)) + (section :title [The RSS 2.0 Syntax] :ident "rss2-syntax" diff --git a/src/guile/Makefile.am b/src/guile/Makefile.am index 98f2873..09bb7da 100644 --- a/src/guile/Makefile.am +++ b/src/guile/Makefile.am @@ -1,5 +1,6 @@ # Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2012, # 2015, 2018, 2020 Ludovic Courtès +# Copyright 2022 Arun Isaac # # This file is part of Skribilo. # @@ -59,6 +60,15 @@ EXTRA_DIST += skribilo/reader/rss-2.scm endif !BUILD_RSS2_READER +if BUILD_GEMTEXT_READER + +readers += skribilo/reader/gemtext.scm + +else !BUILD_GEMTEXT_READER + +EXTRA_DIST += skribilo/reader/gemtext.scm + +endif !BUILD_GEMTEXT_READER engines = \ skribilo/engine/base.scm skribilo/engine/context.scm \ diff --git a/src/guile/skribilo/reader/gemtext.scm b/src/guile/skribilo/reader/gemtext.scm new file mode 100644 index 0000000..7f5905c --- /dev/null +++ b/src/guile/skribilo/reader/gemtext.scm @@ -0,0 +1,231 @@ +;;; gemtext.scm -- A reader for the Gemini protocol's Gemtext markup +;;; +;;; Copyright © 2022 Arun Isaac +;;; +;;; +;;; This file is part of Skribilo. +;;; +;;; Skribilo is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; Skribilo is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Skribilo. If not, see . + +(define-module (skribilo reader gemtext) + #:use-module (rnrs io ports) + #:use-module (srfi srfi-26) + #:use-module (srfi srfi-71) + #:use-module (srfi srfi-171) + #:use-module (ice-9 match) + #:use-module ((ice-9 textual-ports) #:select (unget-char unget-string)) + #:use-module (skribilo reader) + #:use-module (skribilo utils syntax) + #:export (reader-specification + make-gemtext-reader)) + +(skribilo-module-syntax) + +;;; Author: Arun Isaac +;;; +;;; Commentary: +;;; +;;; A reader for gemtext, the lightweight markup language used by the +;;; Gemini protocol +;;; +;;; Code: + +(define %join-lines? + (make-parameter #f)) + +(define %section-numbers? + (make-parameter #f)) + +(define (string-blank? str) + "Return #t if STR contains only whitespace characters. Else, return +#f." + (string-every char-set:whitespace str)) + +(define (string-remove-prefix prefix str) + "Return STR with PREFIX removed. If PREFIX is not a prefix of STR, +return #f." + (and (string-prefix? prefix str) + (substring str (string-length prefix)))) + +(define (string-partition str char-pred) + "Return the part of STR before and after the first occurrence of +CHAR-PRED as two values." + (let ((partition-index (string-index str char-pred))) + (if partition-index + (values (substring str 0 partition-index) + (substring str partition-index)) + (values str #f)))) + +(define (unget-line port line) + "Place the string LINE in PORT so that subsequent read operations +will read LINE followed by a newline character." + (unget-char port #\newline) + (unget-string port line)) + +(define (read-preformatted-text in out) + "Read preformatted text from port IN and write it to port OUT." + (let ((line (get-line in))) + (unless (or (eof-object? line) + (string-prefix? "```" line)) + (put-string out line) + (newline out) + (read-preformatted-text in out)))) + +(define (heading-level line) + "Return the level of the heading in LINE. If LINE is not a heading, +return #f." + (cond + ((string-prefix? "### " line) 3) + ((string-prefix? "## " line) 2) + ((string-prefix? "# " line) 1) + (else #f))) + +(define (read-section-children level port) + "Read section elements of LEVEL from PORT. Return as a list." + (let ((line (get-line port))) + (cond + ;; End of file + ((eof-object? line) (list)) + ;; If another heading of same or higher level begins, unget line + ;; and end section. + ((let ((heading-level (heading-level line))) + (and heading-level + (<= heading-level level))) + (unget-line port line) + (list)) + ;; If blank line, continue. + ((string-blank? line) + (read-section-children level port)) + ;; Else, add element and continue. + (else + (unget-line port line) + (cons (read-gemtext-element port) + (read-section-children level port)))))) + +(define (paragraph-line? line) + "Return #t if LINE is a paragraph line. Else, return #f." + (not (or (string-blank? line) + (heading-level line) + (string-prefix? "* " line) + (string-prefix? ">" line) + (string-prefix? "=>" line) + (string-prefix? "```" line)))) + +(define (link-line->item line) + "Convert link LINE to a skribilo ref expression." + (let* ((trimmed-line (string-trim (string-remove-prefix "=>" line))) + (url text (string-partition trimmed-line (char-set #\space #\tab)))) + (if text + `(item (ref #:url ,url #:text ,(string-trim text))) + `(item (ref #:url ,url))))) + +(define (retf-unget-line port result line) + "Unget LINE to PORT and return RESULT. This function is used as an +argument to ttake-while." + (unget-line port line) + result) + +(define (read-gemtext-element port) + "Read next gemtext element from PORT." + (let ((line (get-line port))) + (cond + ;; End of file + ((eof-object? line) line) + ;; Section + ((heading-level line) + => (lambda (level) + `(,(case level + ((1) 'section) + ((2) 'subsection) + ((3) 'subsubsection)) + #:title ,(substring line (1+ level)) + #:number ,(%section-numbers?) + ,@(read-section-children level port)))) + ;; List + ((string-remove-prefix "* " line) + => (lambda (first-item) + `(itemize + ,@(port-transduce (compose (ttake-while (cut string-prefix? "* " <>) + (cut retf-unget-line port <> <>)) + (tmap (lambda (line) + `(item ,(string-remove-prefix "* " line))))) + rcons + (list `(item ,first-item)) + get-line + port)))) + ;; Blockquote + ((string-remove-prefix ">" line) + => (lambda (first-line) + (list 'blockquote + (if (%join-lines?) + (string-join + (port-transduce (compose (ttake-while (cut string-prefix? ">" <>) + (cut retf-unget-line port <> <>)) + (tmap (cut string-remove-prefix ">" <>))) + rcons + (list first-line) + get-line + port) + " ") + line)))) + ;; Link + ((string-prefix? "=>" line) + (cons 'itemize + (port-transduce (compose (ttake-while (cut string-prefix? "=>" <>) + (cut retf-unget-line port <> <>)) + (tmap link-line->item)) + rcons + (list (link-line->item line)) + get-line + port))) + ;; Preformatted text + ((string-remove-prefix "```" line) + => (lambda (alt-text) + ;; We don't use the alt text. + `(pre ,(call-with-output-string + (cut read-preformatted-text port <>))))) + ;; Ignore blank lines. + ((string-blank? line) (read-gemtext-element port)) + ;; Paragraph + (else + (list 'paragraph + (if (%join-lines?) + (string-join + (port-transduce (ttake-while paragraph-line? + (cut retf-unget-line port <> <>)) + rcons + (list line) + get-line + port) + " ") + line)))))) + +(define* (make-gemtext-reader :key join-lines? section-numbers?) + "Return a gemtext reader. + +If JOIN-LINES? is #t, lines which are not separated by a blank line +are joined into a single paragraph. + +If SECTION-NUMBERS? is #t, sections are numbered. Else, they are not." + (lambda (port) + (parameterize ((%join-lines? join-lines?) + (%section-numbers? section-numbers?)) + (match (port-transduce (tmap identity) + rcons + read-gemtext-element + port) + (() (eof-object)) + (elements `(document ,@elements)))))) + +(define-reader gemtext "0.1" make-gemtext-reader) diff --git a/tests/Makefile.am b/tests/Makefile.am index 8ba7637..26b05ad 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -26,5 +26,15 @@ EXTRA_DIST = $(TESTS) readers/rss-2.test endif !BUILD_RSS2_READER +if BUILD_GEMTEXT_READER + +TESTS += readers/gemtext.test +EXTRA_DIST = $(TESTS) + +else !BUILD_GEMTEXT_READER + +EXTRA_DIST = $(TESTS) readers/gemtext.test + +endif !BUILD_GEMTEXT_READER CLEANFILES = ast.log resolve.log rss-2.log location.log info.log diff --git a/tests/readers/gemtext.test b/tests/readers/gemtext.test new file mode 100644 index 0000000..2340dc0 --- /dev/null +++ b/tests/readers/gemtext.test @@ -0,0 +1,133 @@ +;;; Exercise Gemtext reader. -*- Scheme -*- +;;; +;;; Copyright © 2022 Arun Isaac +;;; +;;; +;;; This file is part of Skribilo. +;;; +;;; Skribilo is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; Skribilo is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Skribilo. If not, see . + +(define-module (tests gemtext) + #:use-module (srfi srfi-64) + #:use-module (ice-9 match) + #:use-module (skribilo reader)) + +(define make-gemtext-reader + (reader:make (lookup-reader 'gemtext))) + +(define-syntax-rule (match? exp pattern) + (match exp + (pattern #t) + (_ #f))) + + + +(test-begin "gemtext") + +(test-assert "basic gemtext document" + (match? (call-with-input-string "# Heading +* Mercury +* Gemini +* Apollo +## Subheading + +### Subsubheading + +> I contend that text-based websites should not exceed in size the major works of Russian literature. + +# Links + +=>https://example.com A cool website +=>gopher://example.com An even cooler gopherhole +=> gemini://example.com A supremely cool Gemini capsule +=> sftp://example.com + +``` +This is a preformatted block. +``` + +```alt +This is a preformatted block with \"alt text\". +```" + (make-gemtext-reader)) + `(document + (section #:title "Heading" #:number #f + (itemize (item "Mercury") + (item "Gemini") + (item "Apollo")) + (subsection #:title "Subheading" #:number #f + (subsubsection #:title "Subsubheading" #:number #f + (blockquote "> I contend that text-based websites should not exceed in size the major works of Russian literature.")))) + (section #:title "Links" #:number #f + (itemize (item (ref #:url "https://example.com" #:text "A cool website")) + (item (ref #:url "gopher://example.com" #:text "An even cooler gopherhole")) + (item (ref #:url "gemini://example.com" #:text "A supremely cool Gemini capsule")) + (item (ref #:url "sftp://example.com"))) + (pre "This is a preformatted block.\n") + (pre "This is a preformatted block with \"alt text\".\n"))))) + +(test-assert "do not join short lines into paragraph" + (match? (call-with-input-string "Foo +Bar" + (make-gemtext-reader)) + `(document + (paragraph "Foo") + (paragraph "Bar")))) + +(test-assert "join short lines into paragraphs" + (match? (call-with-input-string "Foo +Bar" + (make-gemtext-reader #:join-lines? #t)) + `(document + (paragraph "Foo Bar")))) + +(test-assert "do not number sections" + (match? (call-with-input-string "# Foo +## Bar" + (make-gemtext-reader)) + `(document + (section #:title "Foo" #:number #f + (subsection #:title "Bar" #:number #f))))) + +(test-assert "number sections" + (match? (call-with-input-string "# Foo +## Bar" + (make-gemtext-reader #:section-numbers? #t)) + `(document + (section #:title "Foo" #:number #t + (subsection #:title "Bar" #:number #t))))) + +(test-assert "break up links separated by blank lines into separate lists" + (match? (call-with-input-string "=>https://example.com A cool website +=>gopher://example.com An even cooler gopherhole + +=> gemini://example.com A supremely cool Gemini capsule +=> sftp://example.com" + (make-gemtext-reader)) + `(document + (itemize (item (ref #:url "https://example.com" #:text "A cool website")) + (item (ref #:url "gopher://example.com" #:text "An even cooler gopherhole"))) + (itemize (item (ref #:url "gemini://example.com" #:text "A supremely cool Gemini capsule")) + (item (ref #:url "sftp://example.com")))))) + +(test-assert "ignore blank lines that have a non-zero number of whitespace characters" + (match? (call-with-input-string "Foo + +Bar" + (make-gemtext-reader)) + `(document + (paragraph "Foo") + (paragraph "Bar")))) + +(test-end "gemtext") -- cgit v1.2.3