aboutsummaryrefslogtreecommitdiff
;;; guile-email --- Guile email parser
;;; Copyright © 2018, 2019, 2020 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.

(define-module (email utils)
  #:use-module (ice-9 match)
  #:use-module (ice-9 peg codegen)
  #:use-module (ice-9 binary-ports)
  #:use-module (ice-9 textual-ports)
  #:use-module (rnrs bytevectors)
  #:use-module ((rnrs io ports)
                #:select (call-with-port))
  #:use-module (rnrs io simple)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:export (get-line-with-delimiter
            read-objects
            read-while
            read-bytes-till
            acons*
	    alist-combine))

(define-syntax-rule (not-end-let (var expr) body ...)
  "Bind result of EXPR to VAR. If VAR is an end-of-file object, return
it. Else, execute BODY."
  (let ((var expr))
    (if (eof-object? var)
        var
        (begin body ...))))

(define (read-objects read-proc port)
  "Read all objects using READ-PROC from PORT and return them as a
list."
  (let ((x (read-proc port)))
    (if (eof-object? x)
        (list)
        (cons x (read-objects read-proc port)))))

(define* (read-while port read-proc pred)
  "Read from PORT using READ-PROC while PRED returns #t and the
end-of-file has not been reached. READ-PROC is invoked with the input
port as argument. PRED is invoked with each string returned by
READ-PROC as argument. The string for which PRED returns #f is ungot
back into PORT."
  (define (read-while-loop output)
    (let ((x (read-proc port)))
      (cond
       ((eof-object? x) x)
       ((pred x)
        (put-string output x)
        (read-while-loop output))
       (else (unget-string port x)))))

  (call-with-port (open-output-string)
    (lambda (out)
      (let ((read-result (read-while-loop out))
            (str (get-output-string out)))
        (if (and (eof-object? read-result)
                 (string-null? str))
            read-result
            str)))))

(define (bytevector-match bv1 start1 bv2 start2)
  "Return #t if bytevector BV1 starting from START1 is equal to
bytevector BV2 starting from START2, else return #f. If the
bytevectors are of unequal length, they are only compared up to the
shorter of their lengths."
  (let ((len (min (- (bytevector-length bv1) start1)
                  (- (bytevector-length bv2) start2))))
    (let loop ((i 0))
      (cond
       ((= i len) #t)
       ((not (= (bytevector-u8-ref bv1 (+ start1 i))
                (bytevector-u8-ref bv2 (+ start2 i))))
        #f)
       (else (loop (1+ i)))))))

(define (bytevector-overlap bv1 bv2)
  "Return the index of bytevector BV1 from which it is equal to
bytevector BV2 in the sense of bytevector-match. If there is no such
index, then return the length of BV1."
  ;; TODO: Maybe implement the Boyer-Moore string search algorithm
  (let loop ((offset 0))
    (cond
     ((bytevector-match bv1 offset bv2 0) offset)
     (else (loop (1+ offset))))))

(define (lookahead-bytevector-n port count)
  "Look ahead COUNT bytes into port, and return the seen bytevector."
  (let ((bv (get-bytevector-n port count)))
    (unget-bytevector port bv)
    bv))

(define (read-bytes-till port sequence)
  "Read bytes from PORT until byte SEQUENCE is seen or end-of-file is
reached. If SEQUENCE is seen, unget it to PORT and return."
  (define (read-loop out)
    (not-end-let (bv (get-bytevector-some port))
      (let ((offset (bytevector-overlap bv sequence)))
        ;; Write bytes before match point to output
        (put-bytevector out bv 0 offset)
        (cond
         ;; Matched nothing, continue searching
         ((= offset (bytevector-length bv))
          (read-loop out))
         ;; Matched full sequence, unget bytes from match point and
         ;; quit searching
         ((> (- (bytevector-length bv) offset)
             (bytevector-length sequence))
          (unget-bytevector port bv offset))
         ;; Matched partial sequence, try to match the rest
         (else
          (unget-bytevector port bv offset)
          (not-end-let
              (lookahead (lookahead-bytevector-n
                          port (bytevector-length sequence)))
            ;; Failed to match, take back the first matched byte,
            ;; write it to the output and continue searching
            (unless (bytevector=? lookahead sequence)
              (put-u8 out (get-u8 port))
              (read-loop out))))))))

  (call-with-values open-bytevector-output-port
    (lambda (out get-bytevector)
      (let ((read-result (read-loop out))
            (bv (get-bytevector)))
        (if (and (eof-object? read-result)
                 (bytevector=? bv #vu8()))
            read-result
            bv)))))

(define (get-line-with-delimiter port)
  "Read a line from PORT and return it as a string including the
delimiting linefeed character."
  (let ((line (get-line port)))
    (if (eof-object? line)
        line
        (string-append line "\n"))))

(define acons*
  (match-lambda*
    ((key value)
     (acons key value (list)))
    ((key value . rest)
     (acons key value (apply acons* rest)))
    ((alist) alist)))

(define (alist-combine alist1 alist2)
  "Combine two association lists ALIST1 and ALIST2 into a single
association list. Key-value pairs in ALIST2 are more significant and
override those in ALIST1."
  (append alist2
          (remove (match-lambda
                    ((key . _)
                     (assoc key alist2)))
                  alist1)))

(define (cg-string-ci pat accum)
  (syntax-case pat ()
    ((pat-str-syntax) (string? (syntax->datum #'pat-str-syntax))
     (let ((pat-str (syntax->datum #'pat-str-syntax)))
       (let ((plen (string-length pat-str)))
         #`(lambda (str len pos)
             (let ((end (+ pos #,plen)))
               (and (<= end len)
                    (string-ci= str #,pat-str pos end)
                    #,(case accum
                        ((all) #`(list end (list 'cg-string #,pat-str)))
                        ((name) #`(list end 'cg-string))
                        ((body) #`(list end #,pat-str))
                        ((none) #`(list end '()))
                        (else (error "bad accum" accum)))))))))))

(add-peg-compiler! 'string-ci cg-string-ci)