;;; guile-email --- Guile email parser ;;; Copyright © 2018, 2019, 2020 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This file is part of guile-email. ;;; ;;; guile-email is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU Affero General Public License as ;;; published by the Free Software Foundation; either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; guile-email is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with guile-email. If not, see ;;; <http://www.gnu.org/licenses/>. (define-module (email utils) #:use-module (ice-9 match) #:use-module (ice-9 peg codegen) #:use-module (ice-9 binary-ports) #:use-module (ice-9 textual-ports) #:use-module (rnrs bytevectors) #:use-module ((rnrs io ports) #:select (call-with-port)) #:use-module (rnrs io simple) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:export (get-line-with-delimiter read-objects read-while read-bytes-till acons* alist-combine)) (define-syntax-rule (not-end-let (var expr) body ...) "Bind result of EXPR to VAR. If VAR is an end-of-file object, return it. Else, execute BODY." (let ((var expr)) (if (eof-object? var) var (begin body ...)))) (define (read-objects read-proc port) "Read all objects using READ-PROC from PORT and return them as a list." (let ((x (read-proc port))) (if (eof-object? x) (list) (cons x (read-objects read-proc port))))) (define* (read-while port read-proc pred) "Read from PORT using READ-PROC while PRED returns #t and the end-of-file has not been reached. READ-PROC is invoked with the input port as argument. PRED is invoked with each string returned by READ-PROC as argument. The string for which PRED returns #f is ungot back into PORT." (define (read-while-loop output) (let ((x (read-proc port))) (cond ((eof-object? x) x) ((pred x) (put-string output x) (read-while-loop output)) (else (unget-string port x))))) (call-with-port (open-output-string) (lambda (out) (let ((read-result (read-while-loop out)) (str (get-output-string out))) (if (and (eof-object? read-result) (string-null? str)) read-result str))))) (define (bytevector-match bv1 start1 bv2 start2) "Return #t if bytevector BV1 starting from START1 is equal to bytevector BV2 starting from START2, else return #f. If the bytevectors are of unequal length, they are only compared up to the shorter of their lengths." (let ((len (min (- (bytevector-length bv1) start1) (- (bytevector-length bv2) start2)))) (let loop ((i 0)) (cond ((= i len) #t) ((not (= (bytevector-u8-ref bv1 (+ start1 i)) (bytevector-u8-ref bv2 (+ start2 i)))) #f) (else (loop (1+ i))))))) (define (bytevector-overlap bv1 bv2) "Return the index of bytevector BV1 from which it is equal to bytevector BV2 in the sense of bytevector-match. If there is no such index, then return the length of BV1." ;; TODO: Maybe implement the Boyer-Moore string search algorithm (let loop ((offset 0)) (cond ((bytevector-match bv1 offset bv2 0) offset) (else (loop (1+ offset)))))) (define (lookahead-bytevector-n port count) "Look ahead COUNT bytes into port, and return the seen bytevector." (let ((bv (get-bytevector-n port count))) (unget-bytevector port bv) bv)) (define (read-bytes-till port sequence) "Read bytes from PORT until byte SEQUENCE is seen or end-of-file is reached. If SEQUENCE is seen, unget it to PORT and return." (define (read-loop out) (not-end-let (bv (get-bytevector-some port)) (let ((offset (bytevector-overlap bv sequence))) ;; Write bytes before match point to output (put-bytevector out bv 0 offset) (cond ;; Matched nothing, continue searching ((= offset (bytevector-length bv)) (read-loop out)) ;; Matched full sequence, unget bytes from match point and ;; quit searching ((> (- (bytevector-length bv) offset) (bytevector-length sequence)) (unget-bytevector port bv offset)) ;; Matched partial sequence, try to match the rest (else (unget-bytevector port bv offset) (not-end-let (lookahead (lookahead-bytevector-n port (bytevector-length sequence))) ;; Failed to match, take back the first matched byte, ;; write it to the output and continue searching (unless (bytevector=? lookahead sequence) (put-u8 out (get-u8 port)) (read-loop out)))))))) (call-with-values open-bytevector-output-port (lambda (out get-bytevector) (let ((read-result (read-loop out)) (bv (get-bytevector))) (if (and (eof-object? read-result) (bytevector=? bv #vu8())) read-result bv))))) (define (get-line-with-delimiter port) "Read a line from PORT and return it as a string including the delimiting linefeed character." (let ((line (get-line port))) (if (eof-object? line) line (string-append line "\n")))) (define acons* (match-lambda* ((key value) (acons key value (list))) ((key value . rest) (acons key value (apply acons* rest))) ((alist) alist))) (define (alist-combine alist1 alist2) "Combine two association lists ALIST1 and ALIST2 into a single association list. Key-value pairs in ALIST2 are more significant and override those in ALIST1." (append alist2 (remove (match-lambda ((key . _) (assoc key alist2))) alist1))) (define (cg-string-ci pat accum) (syntax-case pat () ((pat-str-syntax) (string? (syntax->datum #'pat-str-syntax)) (let ((pat-str (syntax->datum #'pat-str-syntax))) (let ((plen (string-length pat-str))) #`(lambda (str len pos) (let ((end (+ pos #,plen))) (and (<= end len) (string-ci= str #,pat-str pos end) #,(case accum ((all) #`(list end (list 'cg-string #,pat-str))) ((name) #`(list end 'cg-string)) ((body) #`(list end #,pat-str)) ((none) #`(list end '())) (else (error "bad accum" accum))))))))))) (add-peg-compiler! 'string-ci cg-string-ci)