summaryrefslogtreecommitdiff
path: root/email/utils.scm
blob: 718ad526507275dbc88500fe8cfcc49f2e2a41f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
;;; guile-email --- Guile email parser
;;; Copyright © 2018, 2019, 2020 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.

(define-module (email utils)
  #:use-module (ice-9 match)
  #:use-module (ice-9 peg codegen)
  #:use-module (ice-9 binary-ports)
  #:use-module (ice-9 textual-ports)
  #:use-module (rnrs bytevectors)
  #:use-module ((rnrs io ports)
                #:select (call-with-port))
  #:use-module (rnrs io simple)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:export (get-line-with-delimiter
            read-objects
            read-while
            read-bytes-till
            acons*
	    alist-combine))

(define-syntax-rule (not-end-let (var expr) body ...)
  "Bind result of EXPR to VAR. If VAR is an end-of-file object, return
it. Else, execute BODY."
  (let ((var expr))
    (if (eof-object? var)
        var
        (begin body ...))))

(define (read-objects read-proc port)
  "Read all objects using READ-PROC from PORT and return them as a
list."
  (let ((x (read-proc port)))
    (if (eof-object? x)
        (list)
        (cons x (read-objects read-proc port)))))

(define* (read-while port read-proc pred)
  "Read from PORT using READ-PROC while PRED returns #t and the
end-of-file has not been reached. READ-PROC is invoked with the input
port as argument. PRED is invoked with each string returned by
READ-PROC as argument. The string for which PRED returns #f is ungot
back into PORT."
  (define (read-while-loop output)
    (let ((x (read-proc port)))
      (cond
       ((eof-object? x) x)
       ((pred x)
        (put-string output x)
        (read-while-loop output))
       (else (unget-string port x)))))

  (call-with-port (open-output-string)
    (lambda (out)
      (let ((read-result (read-while-loop out))
            (str (get-output-string out)))
        (if (and (eof-object? read-result)
                 (string-null? str))
            read-result
            str)))))

(define (bytevector-match bv1 start1 bv2 start2)
  "Return #t if bytevector BV1 starting from START1 is equal to
bytevector BV2 starting from START2, else return #f. If the
bytevectors are of unequal length, they are only compared up to the
shorter of their lengths."
  (let ((len (min (- (bytevector-length bv1) start1)
                  (- (bytevector-length bv2) start2))))
    (let loop ((i 0))
      (cond
       ((= i len) #t)
       ((not (= (bytevector-u8-ref bv1 (+ start1 i))
                (bytevector-u8-ref bv2 (+ start2 i))))
        #f)
       (else (loop (1+ i)))))))

(define (bytevector-overlap bv1 bv2)
  "Return the index of bytevector BV1 from which it is equal to
bytevector BV2 in the sense of bytevector-match. If there is no such
index, then return the length of BV1."
  ;; TODO: Maybe implement the Boyer-Moore string search algorithm
  (let loop ((offset 0))
    (cond
     ((bytevector-match bv1 offset bv2 0) offset)
     (else (loop (1+ offset))))))

(define (lookahead-bytevector-n port count)
  "Look ahead COUNT bytes into port, and return the seen bytevector."
  (let ((bv (get-bytevector-n port count)))
    (unget-bytevector port bv)
    bv))

(define (read-bytes-till port sequence)
  "Read bytes from PORT until byte SEQUENCE is seen or end-of-file is
reached. If SEQUENCE is seen, unget it to PORT and return."
  (define (read-loop out)
    (not-end-let (bv (get-bytevector-some port))
      (let ((offset (bytevector-overlap bv sequence)))
        ;; Write bytes before match point to output
        (put-bytevector out bv 0 offset)
        (cond
         ;; Matched nothing, continue searching
         ((= offset (bytevector-length bv))
          (read-loop out))
         ;; Matched full sequence, unget bytes from match point and
         ;; quit searching
         ((> (- (bytevector-length bv) offset)
             (bytevector-length sequence))
          (unget-bytevector port bv offset))
         ;; Matched partial sequence, try to match the rest
         (else
          (unget-bytevector port bv offset)
          (not-end-let
              (lookahead (lookahead-bytevector-n
                          port (bytevector-length sequence)))
            ;; Failed to match, take back the first matched byte,
            ;; write it to the output and continue searching
            (unless (bytevector=? lookahead sequence)
              (put-u8 out (get-u8 port))
              (read-loop out))))))))

  (call-with-values open-bytevector-output-port
    (lambda (out get-bytevector)
      (let ((read-result (read-loop out))
            (bv (get-bytevector)))
        (if (and (eof-object? read-result)
                 (bytevector=? bv #vu8()))
            read-result
            bv)))))

(define (get-line-with-delimiter port)
  "Read a line from PORT and return it as a string including the
delimiting linefeed character."
  (let ((line (get-line port)))
    (if (eof-object? line)
        line
        (string-append line "\n"))))

(define acons*
  (match-lambda*
    ((key value)
     (acons key value (list)))
    ((key value . rest)
     (acons key value (apply acons* rest)))
    ((alist) alist)))

(define (alist-combine alist1 alist2)
  "Combine two association lists ALIST1 and ALIST2 into a single
association list. Key-value pairs in ALIST2 are more significant and
override those in ALIST1."
  (append alist2
          (remove (match-lambda
                    ((key . _)
                     (assoc key alist2)))
                  alist1)))

(define (cg-string-ci pat accum)
  (syntax-case pat ()
    ((pat-str-syntax) (string? (syntax->datum #'pat-str-syntax))
     (let ((pat-str (syntax->datum #'pat-str-syntax)))
       (let ((plen (string-length pat-str)))
         #`(lambda (str len pos)
             (let ((end (+ pos #,plen)))
               (and (<= end len)
                    (string-ci= str #,pat-str pos end)
                    #,(case accum
                        ((all) #`(list end (list 'cg-string #,pat-str)))
                        ((name) #`(list end 'cg-string))
                        ((body) #`(list end #,pat-str))
                        ((none) #`(list end '()))
                        (else (error "bad accum" accum)))))))))))

(add-peg-compiler! 'string-ci cg-string-ci)