;;; guile-email --- Guile email parser
;;; Copyright © 2018, 2019, 2020 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email. If not, see
;;; <http://www.gnu.org/licenses/>.
(define-module (email utils)
#:use-module (ice-9 match)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 binary-ports)
#:use-module (ice-9 textual-ports)
#:use-module (rnrs bytevectors)
#:use-module ((rnrs io ports)
#:select (call-with-port))
#:use-module (rnrs io simple)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:export (get-line-with-delimiter
read-objects
read-while
read-bytes-till
acons*
alist-combine))
(define-syntax-rule (not-end-let (var expr) body ...)
"Bind result of EXPR to VAR. If VAR is an end-of-file object, return
it. Else, execute BODY."
(let ((var expr))
(if (eof-object? var)
var
(begin body ...))))
(define (read-objects read-proc port)
"Read all objects using READ-PROC from PORT and return them as a
list."
(let ((x (read-proc port)))
(if (eof-object? x)
(list)
(cons x (read-objects read-proc port)))))
(define* (read-while port read-proc pred)
"Read from PORT using READ-PROC while PRED returns #t and the
end-of-file has not been reached. READ-PROC is invoked with the input
port as argument. PRED is invoked with each string returned by
READ-PROC as argument. The string for which PRED returns #f is ungot
back into PORT."
(define (read-while-loop output)
(let ((x (read-proc port)))
(cond
((eof-object? x) x)
((pred x)
(put-string output x)
(read-while-loop output))
(else (unget-string port x)))))
(call-with-port (open-output-string)
(lambda (out)
(let ((read-result (read-while-loop out))
(str (get-output-string out)))
(if (and (eof-object? read-result)
(string-null? str))
read-result
str)))))
(define (bytevector-match bv1 start1 bv2 start2)
"Return #t if bytevector BV1 starting from START1 is equal to
bytevector BV2 starting from START2, else return #f. If the
bytevectors are of unequal length, they are only compared up to the
shorter of their lengths."
(let ((len (min (- (bytevector-length bv1) start1)
(- (bytevector-length bv2) start2))))
(let loop ((i 0))
(cond
((= i len) #t)
((not (= (bytevector-u8-ref bv1 (+ start1 i))
(bytevector-u8-ref bv2 (+ start2 i))))
#f)
(else (loop (1+ i)))))))
(define (bytevector-overlap bv1 bv2)
"Return the index of bytevector BV1 from which it is equal to
bytevector BV2 in the sense of bytevector-match. If there is no such
index, then return the length of BV1."
;; TODO: Maybe implement the Boyer-Moore string search algorithm
(let loop ((offset 0))
(cond
((bytevector-match bv1 offset bv2 0) offset)
(else (loop (1+ offset))))))
(define (lookahead-bytevector-n port count)
"Look ahead COUNT bytes into port, and return the seen bytevector."
(let ((bv (get-bytevector-n port count)))
(unget-bytevector port bv)
bv))
(define (read-bytes-till port sequence)
"Read bytes from PORT until byte SEQUENCE is seen or end-of-file is
reached. If SEQUENCE is seen, unget it to PORT and return."
(define (read-loop out)
(not-end-let (bv (get-bytevector-some port))
(let ((offset (bytevector-overlap bv sequence)))
;; Write bytes before match point to output
(put-bytevector out bv 0 offset)
(cond
;; Matched nothing, continue searching
((= offset (bytevector-length bv))
(read-loop out))
;; Matched full sequence, unget bytes from match point and
;; quit searching
((> (- (bytevector-length bv) offset)
(bytevector-length sequence))
(unget-bytevector port bv offset))
;; Matched partial sequence, try to match the rest
(else
(unget-bytevector port bv offset)
(not-end-let
(lookahead (lookahead-bytevector-n
port (bytevector-length sequence)))
;; Failed to match, take back the first matched byte,
;; write it to the output and continue searching
(unless (bytevector=? lookahead sequence)
(put-u8 out (get-u8 port))
(read-loop out))))))))
(call-with-values open-bytevector-output-port
(lambda (out get-bytevector)
(let ((read-result (read-loop out))
(bv (get-bytevector)))
(if (and (eof-object? read-result)
(bytevector=? bv #vu8()))
read-result
bv)))))
(define (get-line-with-delimiter port)
"Read a line from PORT and return it as a string including the
delimiting linefeed character."
(let ((line (get-line port)))
(if (eof-object? line)
line
(string-append line "\n"))))
(define acons*
(match-lambda*
((key value)
(acons key value (list)))
((key value . rest)
(acons key value (apply acons* rest)))
((alist) alist)))
(define (alist-combine alist1 alist2)
"Combine two association lists ALIST1 and ALIST2 into a single
association list. Key-value pairs in ALIST2 are more significant and
override those in ALIST1."
(append alist2
(remove (match-lambda
((key . _)
(assoc key alist2)))
alist1)))
(define (cg-string-ci pat accum)
(syntax-case pat ()
((pat-str-syntax) (string? (syntax->datum #'pat-str-syntax))
(let ((pat-str (syntax->datum #'pat-str-syntax)))
(let ((plen (string-length pat-str)))
#`(lambda (str len pos)
(let ((end (+ pos #,plen)))
(and (<= end len)
(string-ci= str #,pat-str pos end)
#,(case accum
((all) #`(list end (list 'cg-string #,pat-str)))
((name) #`(list end 'cg-string))
((body) #`(list end #,pat-str))
((none) #`(list end '()))
(else (error "bad accum" accum)))))))))))
(add-peg-compiler! 'string-ci cg-string-ci)