summary refs log tree commit diff
path: root/email
diff options
context:
space:
mode:
Diffstat (limited to 'email')
-rw-r--r--email/base64.scm255
-rw-r--r--email/email.scm804
-rw-r--r--email/quoted-printable.scm57
-rw-r--r--email/utils.scm95
4 files changed, 1211 insertions, 0 deletions
diff --git a/email/base64.scm b/email/base64.scm
new file mode 100644
index 0000000..6b11b3f
--- /dev/null
+++ b/email/base64.scm
@@ -0,0 +1,255 @@
+;; -*- mode: scheme; coding: utf-8 -*-
+;;
+;; This module was renamed from (weinholt text base64 (1 0 20100612)) to
+;; (guix base64) by Nikita Karetnikov <nikita@karetnikov.org> on
+;; February 12, 2014.
+;;
+;; Some optimizations made by Ludovic Courtès <ludo@gnu.org>, 2015.
+;; Turned into a Guile module (instead of R6RS).
+;;
+;; This module was imported into the source tree of guile-email by
+;; Arun Isaac <arunisaac@systemreboot.net> on September 6, 2018.
+;;
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;
+;; This file incorporates work covered by the following copyright and  
+;; permission notice:
+;;
+;;   Copyright © 2009, 2010 Göran Weinholt <goran@weinholt.se>
+;;
+;;   Permission is hereby granted, free of charge, to any person obtaining a
+;;   copy of this software and associated documentation files (the "Software"),
+;;   to deal in the Software without restriction, including without limitation
+;;   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+;;   and/or sell copies of the Software, and to permit persons to whom the
+;;   Software is furnished to do so, subject to the following conditions:
+;;
+;;   The above copyright notice and this permission notice shall be included in
+;;   all copies or substantial portions of the Software.
+;;
+;;   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;;   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;;   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+;;   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+;;   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+;;   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+;;   DEALINGS IN THE SOFTWARE.
+
+;; RFC 4648 Base-N Encodings
+
+(define-module (email base64)
+  #:export (base64-encode
+            base64-decode
+            base64-alphabet
+            base64url-alphabet
+            get-delimited-base64
+            put-delimited-base64)
+  #:use-module (rnrs)
+  #:use-module ((srfi srfi-13)
+                #:select (string-index
+                          string-prefix? string-suffix?
+                          string-concatenate string-trim-both)))
+
+(define-syntax define-alias
+  (syntax-rules ()
+    ((_ new old)
+     (define-syntax new (identifier-syntax old)))))
+
+;; Force the use of Guile's own primitives to avoid the overhead of its 'fx'
+;; procedures.
+
+(define-alias fxbit-field bitwise-bit-field)
+(define-alias fxarithmetic-shift ash)
+(define-alias fxarithmetic-shift-left ash)
+(define-alias fxand logand)
+(define-alias fxior logior)
+(define-alias fxxor logxor)
+
+(define base64-alphabet
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+
+(define base64url-alphabet
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_")
+
+(define base64-encode
+  (case-lambda
+    ;; Simple interface. Returns a string containing the canonical
+    ;; base64 representation of the given bytevector.
+    ((bv)
+     (base64-encode bv 0 (bytevector-length bv) #f #f base64-alphabet #f))
+    ((bv start)
+     (base64-encode bv start (bytevector-length bv) #f #f base64-alphabet #f))
+    ((bv start end)
+     (base64-encode bv start end #f #f base64-alphabet #f))
+    ((bv start end line-length)
+     (base64-encode bv start end line-length #f base64-alphabet #f))
+    ((bv start end line-length no-padding)
+     (base64-encode bv start end line-length no-padding base64-alphabet #f))
+    ((bv start end line-length no-padding alphabet)
+     (base64-encode bv start end line-length no-padding alphabet #f))
+    ;; Base64 encodes the bytes [start,end[ in the given bytevector.
+    ;; Lines are limited to line-length characters (unless #f),
+    ;; which must be a multiple of four. To omit the padding
+    ;; characters (#\=) set no-padding to a true value. If port is
+    ;; #f, returns a string.
+    ((bv start end line-length no-padding alphabet port)
+     (assert (or (not line-length) (zero? (mod line-length 4))))
+     (let-values (((p extract) (if port
+                                   (values port (lambda () (values)))
+                                   (open-string-output-port))))
+       (letrec ((put (if line-length
+                         (let ((chars 0))
+                           (lambda (p c)
+                             (when (fx=? chars line-length)
+                               (set! chars 0)
+                               (put-char p #\linefeed))
+                             (set! chars (fx+ chars 1))
+                             (put-char p c)))
+                         put-char)))
+         (let lp ((i start))
+           (cond ((= i end))
+                 ((<= (+ i 3) end)
+                  (let ((x (bytevector-uint-ref bv i (endianness big) 3)))
+                    (put p (string-ref alphabet (fxbit-field x 18 24)))
+                    (put p (string-ref alphabet (fxbit-field x 12 18)))
+                    (put p (string-ref alphabet (fxbit-field x 6 12)))
+                    (put p (string-ref alphabet (fxbit-field x 0 6)))
+                    (lp (+ i 3))))
+                 ((<= (+ i 2) end)
+                  (let ((x (fxarithmetic-shift-left (bytevector-u16-ref bv i (endianness big)) 8)))
+                    (put p (string-ref alphabet (fxbit-field x 18 24)))
+                    (put p (string-ref alphabet (fxbit-field x 12 18)))
+                    (put p (string-ref alphabet (fxbit-field x 6 12)))
+                    (unless no-padding
+                      (put p #\=))))
+                 (else
+                  (let ((x (fxarithmetic-shift-left (bytevector-u8-ref bv i) 16)))
+                    (put p (string-ref alphabet (fxbit-field x 18 24)))
+                    (put p (string-ref alphabet (fxbit-field x 12 18)))
+                    (unless no-padding
+                      (put p #\=)
+                      (put p #\=)))))))
+       (extract)))))
+
+  ;; Decodes a base64 string. The string must contain only pure
+  ;; unpadded base64 data.
+  
+(define base64-decode
+  (case-lambda
+    ((str)
+     (base64-decode str base64-alphabet #f))
+    ((str alphabet)
+     (base64-decode str alphabet #f))
+    ((str alphabet port)
+     (unless (zero? (mod (string-length str) 4))
+       (error 'base64-decode
+              "input string must be a multiple of four characters"))
+     (let-values (((p extract) (if port
+                                   (values port (lambda () (values)))
+                                   (open-bytevector-output-port))))
+       (do ((i 0 (+ i 4)))
+           ((= i (string-length str))
+            (extract))
+         (let ((c1 (string-ref str i))
+               (c2 (string-ref str (+ i 1)))
+               (c3 (string-ref str (+ i 2)))
+               (c4 (string-ref str (+ i 3))))
+           ;; TODO: be more clever than string-index
+           (let ((i1 (string-index alphabet c1))
+                 (i2 (string-index alphabet c2))
+                 (i3 (string-index alphabet c3))
+                 (i4 (string-index alphabet c4)))
+             (cond ((and i1 i2 i3 i4)
+                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
+                                    (fxarithmetic-shift-left i2 12)
+                                    (fxarithmetic-shift-left i3 6)
+                                    i4)))
+                      (put-u8 p (fxbit-field x 16 24))
+                      (put-u8 p (fxbit-field x 8 16))
+                      (put-u8 p (fxbit-field x 0 8))))
+                   ((and i1 i2 i3 (char=? c4 #\=)
+                         (= i (- (string-length str) 4)))
+                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
+                                    (fxarithmetic-shift-left i2 12)
+                                    (fxarithmetic-shift-left i3 6))))
+                      (put-u8 p (fxbit-field x 16 24))
+                      (put-u8 p (fxbit-field x 8 16))))
+                   ((and i1 i2 (char=? c3 #\=) (char=? c4 #\=)
+                         (= i (- (string-length str) 4)))
+                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
+                                    (fxarithmetic-shift-left i2 12))))
+                      (put-u8 p (fxbit-field x 16 24))))
+                   (else
+                    (error 'base64-decode "invalid input"
+                           (list c1 c2 c3 c4)))))))))))
+
+(define (get-line-comp f port)
+  (if (port-eof? port)
+      (eof-object)
+      (f (get-line port))))
+
+  ;; Reads the common -----BEGIN/END type----- delimited format from
+  ;; the given port. Returns two values: a string with the type and a
+  ;; bytevector containing the base64 decoded data. The second value
+  ;; is the eof object if there is an eof before the BEGIN delimiter.
+  
+(define (get-delimited-base64 port)
+  (define (get-first-data-line port)
+    ;; Some MIME data has header fields in the same format as mail
+    ;; or http. These are ignored.
+    (let ((line (get-line-comp string-trim-both port)))
+      (cond ((eof-object? line) line)
+            ((string-index line #\:)
+             (let lp ()                           ;read until empty line
+               (let ((line (get-line-comp string-trim-both port)))
+                 (if (string=? line "")
+                     (get-line-comp string-trim-both port)
+                     (lp)))))
+            (else line))))
+  (let ((line (get-line-comp string-trim-both port)))
+    (cond ((eof-object? line)
+           (values "" (eof-object)))
+          ((string=? line "")
+           (get-delimited-base64 port))
+          ((and (string-prefix? "-----BEGIN " line)
+                (string-suffix? "-----" line))
+           (let* ((type (substring line 11 (- (string-length line) 5)))
+                  (endline (string-append "-----END " type "-----")))
+             (let-values (((outp extract) (open-bytevector-output-port)))
+               (let lp ((line (get-first-data-line port)))
+                 (cond ((eof-object? line)
+                        (error 'get-delimited-base64
+                               "unexpected end of file"))
+                       ((string-prefix? "-" line)
+                        (unless (string=? line endline)
+                          (error 'get-delimited-base64
+                                 "bad end delimiter" type line))
+                        (values type (extract)))
+                       (else
+                        (unless (and (= (string-length line) 5)
+                                     (string-prefix? "=" line)) ;Skip Radix-64 checksum
+                          (base64-decode line base64-alphabet outp))
+                        (lp (get-line-comp string-trim-both port))))))))
+          (else     ;skip garbage (like in openssl x509 -in foo -text output).
+           (get-delimited-base64 port)))))
+
+(define put-delimited-base64
+  (case-lambda
+    ((port type bv line-length)
+     (display (string-append "-----BEGIN " type "-----\n") port)
+     (base64-encode bv 0 (bytevector-length bv)
+                    line-length #f base64-alphabet port)
+     (display (string-append "\n-----END " type "-----\n") port))
+    ((port type bv)
+     (put-delimited-base64 port type bv 76))))
diff --git a/email/email.scm b/email/email.scm
new file mode 100644
index 0000000..ccda3ac
--- /dev/null
+++ b/email/email.scm
@@ -0,0 +1,804 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email.  If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+(define-module (email email)
+  #:use-module (ice-9 iconv)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 peg)
+  #:use-module (ice-9 regex)
+  #:use-module (rnrs io simple)
+  #:use-module (srfi srfi-1)
+  #:use-module (srfi srfi-2)
+  #:use-module (srfi srfi-9)
+  #:use-module (srfi srfi-11)
+  #:use-module (srfi srfi-19)
+  #:use-module (srfi srfi-26)
+  #:use-module (sxml transform)
+  #:use-module (email base64)
+  #:use-module (email quoted-printable)
+  #:use-module ((email utils)
+		#:select (get-line-with-delimiter
+			  read-objects read-while
+			  acons* alist-delete*))
+  #:export (<email>
+	    make-email
+	    email?
+	    email-headers
+	    email-body
+	    <mime-entity>
+	    make-mime-entity
+	    mime-entity?
+	    mime-entity-headers
+	    mime-entity-body
+	    email->headers+body
+	    parse-email
+	    parse-email-headers
+	    parse-email-body
+	    parse-email-address
+	    interpret-address
+	    mbox->emails))
+
+(define (flatten-and-filter terms tree)
+  (filter list? (keyword-flatten terms tree)))
+
+(define-record-type <email>
+  (make-email headers body)
+  email?
+  (headers email-headers)
+  (body email-body))
+
+(define-record-type <mime-entity>
+  (make-mime-entity headers body)
+  mime-entity?
+  (headers mime-entity-headers)
+  (body mime-entity-body))
+
+(define string->lcase-symbol
+  (compose string->symbol string-downcase))
+
+;;; PEG parser implementing the ABNF grammar specified in RFC5322
+;;; (Internet Message Format), RFC6854 (Update to Internet Message
+;;; Format to Allow Group Syntax in the "From:" and "Sender:" Header
+;;; Fields)
+
+;;; Obsolete syntax has not been implemented.
+
+;;; Core ABNF rules from RFC5234
+
+(define-peg-pattern alpha body
+  (or (range #\A #\Z) (range #\a #\z)))
+
+;; Though line endings should be crlf (\r\n), we also tolerate bare
+;; line feeds (\n)
+(define-peg-pattern crlf none
+  (or "\r\n" "\n"))
+
+(define-peg-pattern digit body
+  (range #\0 #\9))
+
+(define-peg-pattern dquote body
+  "\"")
+
+;; Printable ASCII characters and UTF-8 characters > \x7f (RFC6532)
+(define-peg-pattern vchar body
+  (and (not-followed-by (or (range #\Nul #\Space)
+			    "\x7f"))
+       peg-any))
+
+(define-peg-pattern wsp body
+  (or " " "\t"))
+
+(define-peg-pattern lwsp body
+  (* (or wsp (and crlf wsp))))
+
+(define-syntax-rule (define-printable-ascii-character-pattern name . exceptions)
+  (define-peg-pattern name body
+    (and (not-followed-by (or . exceptions))
+	 vchar)))
+
+;;; Quoted characters
+
+(define-peg-pattern quoted-pair body
+  (and (ignore "\\") (or vchar wsp)))
+
+;;; Folding white space and comments
+
+(define-peg-pattern fws body
+  (and (? (and (* wsp) crlf)) (+ wsp)))
+
+(define-printable-ascii-character-pattern ctext "(" ")" "\\")
+
+(define-peg-pattern comment none
+  (and "(" (* (and (? fws) ccontent)) ")"))
+
+(define-peg-pattern ccontent body
+  (or ctext quoted-pair comment))
+
+(define-peg-pattern cfws body
+  (or (and (+ (and (? fws) comment)) (? fws)) fws))
+
+;;; Atom
+
+(define-printable-ascii-character-pattern atext
+  "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]")
+
+(define-peg-pattern atom body
+  (and (? cfws) (+ atext) (? cfws)))
+
+(define-peg-pattern dot-atom-text body
+  (and (+ atext) (* (and "." (+ atext)))))
+
+(define-peg-pattern dot-atom body
+  (and (? cfws) dot-atom-text (? cfws)))
+
+;;; Quoted strings
+
+(define-printable-ascii-character-pattern qtext "\\" "\"")
+
+(define-peg-pattern qcontent body
+  (or qtext quoted-pair))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern quoted-string body
+  (and (? cfws) (ignore (and dquote))
+       (* (and (? fws) qcontent))
+       (? fws)
+       (ignore (and dquote)) (? cfws)))
+
+;;; Miscellaneous tokens
+
+(define-peg-pattern word body
+  (or atom quoted-string))
+
+(define-peg-pattern phrase body
+  (+ word))
+
+;; ABNF modified to ignore leading whitespace
+;; ABNF modified to allow for blank lines in folded field
+(define-peg-pattern unstructured body
+  (and (ignore (? fws))
+       (* (and (? fws) (? vchar)))
+       (ignore (* wsp))))
+
+;;; Date and time specification
+
+(define-peg-pattern day-name body
+  (or "Mon" "Tue" "Wed" "Thu" "Fri" "Sat" "Sun"))
+
+(define-peg-pattern day-of-week all
+  (and (ignore (? fws)) day-name))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern day all
+  (and (ignore (? fws)) digit (? digit) (ignore (and fws))))
+
+(define-peg-pattern month all
+  (or "Jan" "Feb" "Mar" "Apr" "May" "Jun"
+      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern year all
+  (and (ignore (and fws)) digit digit digit digit (ignore (and fws))))
+
+(define-peg-pattern date all
+  (and day month year))
+
+(define-peg-pattern hours all
+  (and digit digit))
+
+(define-peg-pattern minutes all
+  (and digit digit))
+
+(define-peg-pattern seconds all
+  (and digit digit))
+
+(define-peg-pattern time-of-day all
+  (and hours (ignore ":") minutes (? (and (ignore ":") seconds))))
+
+;; zone-sign, zone-hours and zone-minutes do not exist in the ABNF
+;; specified in RFC5322. But, we have it here since it eases parsing
+;; the zone and converting it to a number of seconds as required by
+;; the make-date procedure of SRFI-19.
+(define-peg-pattern zone-sign all
+  (or "+" "-"))
+
+(define-peg-pattern zone-hours all
+  (and digit digit))
+
+(define-peg-pattern zone-minutes all
+  (and digit digit))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern zone all
+  (and (ignore (and fws)) zone-sign zone-hours zone-minutes))
+
+(define-peg-pattern time all
+  (and time-of-day zone))
+
+(define-peg-pattern date-time all
+  (and (? (and day-of-week (ignore ","))) date time (? cfws)))
+
+;;; Address specification
+
+(define-peg-pattern display-name all
+  phrase)
+
+(define-peg-pattern local-part body
+  (or dot-atom quoted-string))
+
+(define-printable-ascii-character-pattern dtext "[" "]" "\\")
+
+(define-peg-pattern domain-literal body
+  (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws)))
+
+(define-peg-pattern domain body
+  (or dot-atom domain-literal))
+
+(define-peg-pattern addr-spec body
+  (and local-part "@" domain))
+
+(define-peg-pattern angle-addr all
+  (and (ignore (? cfws))
+       (ignore"<") addr-spec (ignore">")
+       (ignore (? cfws))))
+
+(define-peg-pattern name-addr body
+  (and (? display-name) angle-addr))
+
+(define-peg-pattern mailbox all
+  (or name-addr addr-spec))
+
+(define-peg-pattern mailbox-list all
+  (and mailbox (* (and (ignore ",") mailbox))))
+
+(define-peg-pattern group all
+  (and display-name (ignore ":")
+       (? group-list) (ignore ";") (? cfws)))
+
+(define-peg-pattern group-list all
+  (or mailbox-list cfws))
+
+(define-peg-pattern address body
+  (or mailbox group))
+
+(define-peg-pattern address-list all
+  (and address (* (and (ignore ",") address))))
+
+;;; Fields
+
+(define-syntax-rule (define-field-pattern name header pattern)
+  (define-peg-pattern name all
+    (and (ignore (string-ci header)) (ignore ":") pattern crlf)))
+
+;;; Origination date field
+
+(define-field-pattern orig-date "Date" date-time)
+
+;;; Originator fields
+
+(define-field-pattern from "From" (or mailbox-list address-list))
+(define-field-pattern sender "Sender" (or mailbox address))
+(define-field-pattern reply-to "Reply-To" address-list)
+
+;; Destination address fields
+
+(define-field-pattern to "To" address-list)
+(define-field-pattern cc "Cc" address-list)
+(define-field-pattern bcc "Bcc" (? (or address-list cfws)))
+
+;;; Identification fields
+
+(define-peg-pattern no-fold-literal body
+  (and "[" (* dtext) "]"))
+
+(define-peg-pattern id-left body
+  dot-atom-text)
+
+(define-peg-pattern id-right body
+  (or dot-atom-text no-fold-literal))
+
+(define-peg-pattern msg-id all
+  (and (ignore (? cfws)) (ignore "<")
+       id-left "@" id-right
+       (ignore ">") (ignore (? cfws))))
+
+(define-field-pattern message-id "Message-ID" msg-id)
+(define-field-pattern in-reply-to "In-Reply-To" (+ msg-id))
+(define-field-pattern references "References" (+ msg-id))
+
+;;; Informational fields
+
+(define-field-pattern subject "Subject" unstructured)
+(define-field-pattern comments "Comments" unstructured)
+(define-field-pattern keywords "Keywords" (and phrase (* (and "," phrase))))
+
+;;; Resent fields
+
+(define-field-pattern resent-date "Resent-Date" date-time)
+(define-field-pattern resent-from "Resent-From" (or mailbox-list address-list))
+(define-field-pattern resent-sender "Resent-Sender" (or mailbox address))
+(define-field-pattern resent-to "Resent-To" address-list)
+(define-field-pattern resent-cc "Resent-Cc" address-list)
+(define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws)))
+(define-field-pattern resent-msg-id "Resent-Message-ID" msg-id)
+
+;;; Trace fields
+
+;; word is given last priority in the ordered choice
+(define-peg-pattern received-token body
+  (or angle-addr addr-spec domain word))
+
+;; TODO: Do not discard the internal structure of received
+(define-field-pattern received "Received"
+  (and (* received-token) (ignore ";") date-time))
+
+(define-peg-pattern path body
+  (or angle-addr (and (? cfws) (ignore "<") (? cfws) (ignore ">") (? cfws))))
+
+(define-field-pattern return "Return-Path" path)
+
+(define-peg-pattern trace all
+  (and (? return) (+ received)))
+
+;;; Optional fields
+
+(define-printable-ascii-character-pattern ftext ":")
+
+(define-peg-pattern field-name all
+  (+ ftext))
+
+(define-peg-pattern optional-field all
+  (and field-name (ignore ":") unstructured crlf))
+
+;;; MIME version
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern mime-version "MIME-Version"
+  (and (ignore (? cfws)) (+ digit) "." (+ digit)))
+
+;;; Content type
+
+;; TODO: What is <"> in RFC2045?
+(define-peg-pattern tspecials body
+  (or "(" ")" "<" ">" "@"
+      "," ";" ":" "\\"
+      "/" "[" "]" "?" "="))
+
+;; TODO: What is a CTL in RFC2045?
+(define-peg-pattern token body
+  (+ (and (not-followed-by (or " " crlf tspecials))
+	  peg-any)))
+
+;; TODO: Implement iana-token, ietf-token and x-token
+(define-peg-pattern iana-token body
+  token)
+
+(define-peg-pattern ietf-token body
+  token)
+
+(define-peg-pattern x-token body
+  token)
+
+(define-peg-pattern extension-token body
+  (or ietf-token x-token))
+
+(define-peg-pattern discrete-type body
+  (or "text" "image" "audio" "video"
+      "application" extension-token))
+
+(define-peg-pattern composite-type body
+  (or "message" "multipart" extension-token))
+
+(define-peg-pattern type all
+  (or discrete-type composite-type))
+
+(define-peg-pattern subtype all
+  (or extension-token iana-token))
+
+(define-peg-pattern attribute all
+  token)
+
+;; quoted-string is given higher precedence
+(define-peg-pattern value all
+  (or quoted-string token))
+
+(define-peg-pattern parameter all
+  (and attribute (ignore "=") value))
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern content "Content-Type"
+  (and (ignore (? cfws)) type (ignore "/") subtype
+       (* (and (ignore ";") (ignore (? cfws)) parameter))))
+
+;;; Content Disposition as defined in RFC2183
+
+(define-peg-pattern disposition-type body
+  (or "inline" "attachment" extension-token))
+
+(define-peg-pattern quoted-date-time all
+  quoted-string)
+
+(define-syntax-rule (define-parameter-pattern name header pattern)
+  (define-peg-pattern name all
+    (and (ignore header) (ignore "=") pattern)))
+
+(define-parameter-pattern filename-parm "filename" value)
+(define-parameter-pattern creation-date-parm "creation-date" quoted-date-time)
+(define-parameter-pattern modification-date-parm "modification-date" quoted-date-time)
+(define-parameter-pattern read-date-parm "read-date" quoted-date-time)
+(define-parameter-pattern size-parm "size" (+ digit))
+
+(define-peg-pattern disposition-parm body
+  (or filename-parm creation-date-parm modification-date-parm
+      read-date-parm size-parm parameter))
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern disposition "Content-Disposition"
+  (and (ignore (? cfws)) disposition-type
+       (* (and (ignore ";") (ignore (? cfws)) disposition-parm))))
+
+;;; Content transfer encoding
+
+(define-peg-pattern mechanism body
+  (or "7bit" "8bit" "binary"
+      "quoted-printable" "base64"
+      ietf-token x-token))
+
+(define-field-pattern encoding "Content-Transfer-Encoding"
+  (and (ignore (? cfws)) mechanism))
+
+;;; Fields
+
+;; The ABNF specified for fields in RFC5322 does not make sense. With
+;; it, all headers are eaten up by optional-field. So, as a temporary
+;; workaround, I am going with the following much simpler (but
+;; possibly incorrect) ABNF.
+
+;; TODO: Try to understand and implement the actual ABNF specified by
+;; RFC5322.
+(define-peg-pattern fields all
+  (* (or trace
+	 resent-date
+	 resent-from
+	 resent-sender
+	 resent-to
+	 resent-cc
+	 resent-bcc
+	 resent-msg-id
+	 orig-date
+	 from
+	 sender
+	 reply-to
+	 to
+	 cc
+	 bcc
+	 message-id
+	 in-reply-to
+	 references
+	 subject
+	 comments
+	 keywords
+	 mime-version
+	 content
+	 disposition
+	 encoding
+	 optional-field)))
+
+(define-peg-pattern mime-extension-field-name all
+  (and "Content-" (+ ftext)))
+
+(define-peg-pattern mime-extension-field all
+  (and mime-extension-field-name (ignore ":") unstructured crlf))
+
+(define-peg-pattern mime-entity-fields all
+  (* (or content
+	 disposition
+	 encoding
+	 mime-extension-field
+	 optional-field)))
+
+(define (decode-mime-encoded-word word)
+  (regexp-substitute/global
+   #f "=\\?([^?]*)\\?([^?]*)\\?([^?]*)\\?=" word
+   'pre (lambda (match-record)
+	  (let ((charset (match:substring match-record 1))
+		(encoding (string->lcase-symbol (match:substring match-record 2)))
+		(encoded-text (match:substring match-record 3)))
+	    (bytevector->string
+	     ((case encoding
+		((b) base64-decode)
+		((q) q-encoding-decode)
+		(else (error "Encoding of MIME word unknown" word)))
+	      encoded-text)
+	     charset)))
+   'post))
+
+(define (body->mime-entities body boundary)
+  "Split BODY into a list of mime entities separated by BOUNDARY (as
+explained in RFC2045), and return that list."
+  (define (read-till-boundary port)
+    (read-while port get-line-with-delimiter
+		(negate (cut string-prefix? (string-append "--" boundary) <>))))
+
+  (define (read-mime-entity port)
+    (if (string-prefix? (string-append "--" boundary "--")
+			(get-line-with-delimiter port))
+	(eof-object)
+	(read-till-boundary port))) 
+  
+  (call-with-input-string body
+    (lambda (port)
+      (read-till-boundary port)
+      (read-objects read-mime-entity port))))
+
+(define (email->headers+body email)
+  "Split EMAIL into headers and body. Return as multiple values."
+  (call-with-input-string email
+    (lambda (port)
+      (let ((headers (read-while port get-line-with-delimiter
+				 (lambda (line)
+				   (not (or (string= line "\n")
+					    (string= line "\r\n")))))))
+	(get-line-with-delimiter port)
+	(values headers
+		(read-while port get-line-with-delimiter identity))))))
+
+(define (post-process-content-transfer-encoding _ value)
+  (list 'content-transfer-encoding (string->lcase-symbol value)))
+
+(define post-process-content-type
+  (match-lambda*
+    (`(content (type ,type)
+	       (subtype ,subtype)
+	       . ,parameters)
+     (let ((type (string->lcase-symbol type))
+	   (subtype (string->lcase-symbol subtype)))
+       `(content-type
+	 ,(acons* 'type type
+		  'subtype subtype
+		  (let ((parameters
+			 (map (match-lambda
+				(`(parameter (attribute ,attribute)
+					     (value ,value))
+				 (cons (string->lcase-symbol attribute) value)))
+			      (flatten-and-filter '(parameter) parameters))))
+		    (if (and (eq? type 'text)
+			     (not (assoc-ref parameters 'charset)))
+			;; UTF-8 is specified as the default charset in RFC6657
+			(acons 'charset "utf-8" parameters)
+			parameters))))))))
+
+(define post-process-content-disposition
+  (match-lambda*
+    (`(disposition ,type . ,parameters)
+     `(content-disposition
+       ,(acons 'type (string->lcase-symbol type)
+	       (map (match-lambda
+		      (('filename-parm ('value filename))
+		       (cons 'filename (basename filename)))
+		      (((? (lambda (date-parm)
+			     (member date-parm '(creation-date-parm modification-date-parm read-date-parm)))
+			   date-parm) value)
+		       ;; TODO: Convert to SRFI-19 datetime
+		       (cons date-parm value))
+		      (('size-parm value)
+		       (cons 'size (string->number value)))
+		      (`(parameter (attribute ,attribute)
+				   (value ,value))
+		       (cons (string->lcase-symbol attribute) value)))
+		    (flatten-and-filter
+		     '(filename-parm creation-date-parm modification-date-parm
+				     read-date-parm size-parm parameter)
+		     parameters)))))))
+
+(define post-process-optional-field
+  (match-lambda*
+    (`(optional-field
+       (field-name ,field-name)
+       ,field-value)
+     (list (string->lcase-symbol field-name)
+	   field-value))
+    (`(optional-field (field-name ,field-name))
+     (list field-name ""))))
+
+(define* (macro-process-address-list _ . addresses)
+  (flatten-and-filter '(address mailbox) addresses))
+
+(define (parse-email-address address)
+  "Parse ADDRESS as an email address and return an association list
+with keys being the symbols name and address, and values being the
+display-name and addr-spec respectively. display-name and addr-spec
+are as defined in RFC5322.
+
+For example,
+(parse-email-address \"Foo <foo@example.org>\")
+=> ((name \"Foo\") (address \"foo@example.org\"))
+(parse-email-address \"foo@example.org\")
+=> ((address \"foo@example.org\"))"
+  (cond
+   ((string-match "([^<]*)<([^>]*)>" address)
+    => (lambda (match-record)
+	 (let ((name (string-trim-both (match:substring match-record 1)))
+	       (address (match:substring match-record 2)))
+	   (if (string-null? name)
+	       `((address . ,address))
+	       `((name . ,name)
+		 (address . ,address))))))
+   (else `((address . ,address)))))
+
+(define interpret-address
+  (match-lambda
+    ((('name . name)
+      ('address . address))
+     (format #f "~a <~a>" name address))
+    ((('address . address)) address)))
+
+(define (parse-email-body headers body)
+  "Parse BODY as email body where HEADERS is an association list of
+header keys and values as returned by parse-email-headers. Return a
+list of <mime-entity> records if the body is a multipart
+message. Else, return a single <mime-entity> record."
+  (let ((content-type (assoc-ref headers 'content-type)))
+    (case (assoc-ref content-type 'type)
+      ((multipart)
+       (map parse-mime-entity
+	    (body->mime-entities body (assoc-ref content-type 'boundary))))
+      ((text)
+       (string-trim-both
+	(decode-body body (assoc-ref headers 'content-transfer-encoding)
+		     (assoc-ref content-type 'charset))))
+      (else (decode-body body (assoc-ref headers 'content-transfer-encoding))))))
+
+(define (add-default-headers headers)
+  ;; Default Content-Type and Content-Transfer-Encoding headers as
+  ;; specified in RFC2045
+  (let ((default-headers
+	  (acons* 'content-type '((type . text)
+				  (subtype . plain)
+				  ;; UTF-8 is specified as the default
+				  ;; charset in RFC6657
+				  (charset . "utf-8"))
+		  'content-transfer-encoding '#{7bit}#)))
+    (append (alist-delete* (append (if (assoc-ref headers 'content-type)
+				       (list 'content-type) (list))
+				   (if (assoc-ref headers 'content-transfer-encoding)
+				       (list 'content-transfer-encoding) (list)))
+			   default-headers)
+	    headers)))
+
+(define (parse-mime-entity text)
+  (let-values (((headers body) (email->headers+body text)))
+    (let ((headers
+	   (pre-post-order
+	    (peg:tree
+	     (match-pattern mime-entity-fields text))
+	    `((content . ,post-process-content-type)
+	      (encoding . ,post-process-content-transfer-encoding)
+	      (disposition . ,post-process-content-disposition)
+	      (optional-field . ,post-process-optional-field)
+	      (mime-entity-fields . ,(lambda (_ . mime-entity-fields)
+				       (add-default-headers
+					(map (match-lambda
+					       ((mime-entity-field value)
+						(cons mime-entity-field value))
+					       ((mime-entity-field . values)
+						(cons mime-entity-field values)))
+					     mime-entity-fields))))
+	      (*text* . ,(lambda (_ text) text))
+	      (*default* . ,(lambda tree tree))))))
+      (make-mime-entity headers (parse-email-body headers body)))))
+
+(define (parse-email email)
+  "Parse string EMAIL and return result as a <email> record."
+  (let-values (((headers body) (email->headers+body email)))
+    (let ((headers-alist (parse-email-headers headers)))
+      (make-email headers-alist (parse-email-body headers-alist body)))))
+
+(define (parse-email-headers headers)
+  "Parse string HEADERS as email headers and return an association
+list of header keys and values."
+  (define (extract-value _ value) value)
+
+  (pre-post-order
+   (peg:tree
+    (match-pattern fields headers))
+   `((date-time . ,(lambda node
+		     (match-let
+			 ((`((day ,day) (month ,month) (year ,year)
+			     (hours ,hours) (minutes ,minutes) (seconds ,seconds)
+			     (zone-sign ,zone-sign) (zone-hours ,zone-hours) (zone-minutes ,zone-minutes))
+			   (flatten-and-filter
+			    '(day month year hours minutes seconds
+				  zone-sign zone-hours zone-minutes)
+			    node)))
+		       (make-date 0
+				  (string->number seconds)
+				  (string->number minutes)
+				  (string->number hours)
+				  (string->number day)
+				  (1+ (list-index
+				       (cut equal? <> month)
+				       (list "Jan" "Feb" "Mar" "Apr" "May" "Jun"
+					     "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")))
+				  (string->number year)
+				  (* (case (string->symbol zone-sign)
+				       ((+) 1)
+				       ((-) -1))
+				     (+ (* 60 60 (string->number zone-hours))
+					(* 60 (string->number zone-minutes))))))))
+     (orig-date . ,(lambda (_ date) (list 'date date)))
+     (angle-addr . ,extract-value)
+     (mailbox . ,(match-lambda*
+		   (`(mailbox (display-name ,name) ,address)
+		    `((name . ,(decode-mime-encoded-word
+				(string-trim-both name)))
+		      (address . ,address)))
+		   (`(mailbox ,address)
+		    `((address . ,(string-trim-both address))))
+		   (_ (error "Failed to parse mailbox"))))
+     (address-list *macro* . ,macro-process-address-list)
+     (mailbox-list *macro* . ,macro-process-address-list)
+     (optional-field . ,post-process-optional-field)
+     (msg-id . ,(match-lambda* (`(msg-id ,msg-id) msg-id)))
+     (content . ,post-process-content-type)
+     (encoding . ,post-process-content-transfer-encoding)
+     (disposition . ,post-process-content-disposition)
+     (fields . ,(lambda (_ . fields)
+		  (add-default-headers
+		   (filter-map (match-lambda
+				 (('trace . _) #f)
+				 ((field value)
+				  (cons field value))
+				 ((field . values)
+				  (cons field values))
+				 (_ #f))
+			       fields))))
+     (*text* . ,extract-value)
+     (*default* . ,(lambda tree tree)))))
+
+(define* (decode-body body encoding #:optional charset)
+  (let ((octets
+	 (case encoding
+	   ((base64)
+	    (base64-decode 
+	     (string-filter
+	      (char-set-union
+	       (ucs-range->char-set (char->integer #\a) (1+ (char->integer #\z)))
+	       (ucs-range->char-set (char->integer #\A) (1+ (char->integer #\Z)))
+	       (ucs-range->char-set (char->integer #\0) (1+ (char->integer #\9)))
+	       (char-set #\+ #\/ #\=))
+	      body)))
+	   ((quoted-printable) (quoted-printable-decode body))
+	   ((#{7bit}# #{8bit}# binary) body)
+	   (else (error "Body decoding failed. Unknown encoding" encoding)))))
+    (if charset
+	(case encoding
+	  ((base64 quoted-printable) (bytevector->string octets charset))
+	  ((#{7bit}# #{8bit}# binary) octets))
+	octets)))
+
+(define (read-next-email-in-mbox port)
+  ;; Read and discard From_ line
+  (get-line-with-delimiter port)
+  ;; Read the actual email
+  (read-while port get-line-with-delimiter
+	      (negate (cut string-prefix? "From " <>))))
+
+(define (mbox->emails port)
+  (read-objects read-next-email-in-mbox port))
diff --git a/email/quoted-printable.scm b/email/quoted-printable.scm
new file mode 100644
index 0000000..f6e3605
--- /dev/null
+++ b/email/quoted-printable.scm
@@ -0,0 +1,57 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email.  If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+(define-module (email quoted-printable)
+  #:use-module (rnrs)
+  #:use-module (ice-9 match)
+  #:export (quoted-printable-decode
+	    q-encoding-decode))
+
+;; TODO: Error out on invalid quoted-printable input
+(define quoted-printable-decode
+  (match-lambda*
+    (((? string? str))
+     (call-with-input-string str quoted-printable-decode))
+    (((? port? in))
+     (let-values (((out get-bytevector)
+		   (open-bytevector-output-port)))
+       (quoted-printable-decode in out)
+       (get-bytevector)))
+    (((? port? in) (? port? out))
+     (let ((c (read-char in)))
+       (cond
+	((eof-object? c)
+	 out)
+	((char=? c #\=)
+	 ;; TODO: Support "\r\n" line ending
+	 (let ((c1 (read-char in)))
+	   (unless (char=? c1 #\Newline)
+	     (let ((c2 (read-char in)))
+	       (put-u8 out (string->number (string c1 c2) 16)))))
+	 (quoted-printable-decode in out))
+	(#t
+	 (put-u8 out (char->integer c))
+	 (quoted-printable-decode in out)))))))
+
+(define (q-encoding-decode str)
+  (quoted-printable-decode
+   (string-map
+    (lambda (c)
+      (if (char=? c #\_) #\Space c))
+    str)))
diff --git a/email/utils.scm b/email/utils.scm
new file mode 100644
index 0000000..7d51ebb
--- /dev/null
+++ b/email/utils.scm
@@ -0,0 +1,95 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email.  If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+(define-module (email utils)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 peg codegen)
+  #:use-module (ice-9 textual-ports)
+  #:use-module (rnrs io simple)
+  #:export (get-line-with-delimiter
+	    read-objects
+	    read-while
+	    acons*
+	    alist-delete*))
+
+(define (read-objects read-proc port)
+  "Read all objects using READ-PROC from PORT and return them as a
+list."
+  (let ((x (read-proc port)))
+    (if (eof-object? x)
+	(list)
+	(cons x (read-objects read-proc port)))))
+
+(define* (read-while port read-proc pred)
+  "Read from PORT using READ-PROC while PRED returns #t. READ-PROC is
+invoked with the input port as argument. PRED is invoked with each
+string returned by READ-PROC as argument."
+  (define (read-while-loop output)
+    (let ((x (read-proc port)))
+      (cond
+       ((eof-object? x) x)
+       ((pred x)
+	(put-string output x)
+	(read-while-loop output))
+       (#t (unget-string port x)))))
+
+  (let ((str (call-with-output-string read-while-loop)))
+    (if (string-null? str) (eof-object) str)))
+
+(define (get-line-with-delimiter port)
+  "Read a line from PORT and return it as a string including the
+delimiting linefeed character."
+  (let ((line (get-line port)))
+    (if (eof-object? line)
+	line
+	(string-append line "\n"))))
+
+(define acons*
+  (match-lambda*
+    ((key value)
+     (acons key value (list)))
+    ((key value . rest)
+     (acons key value (apply acons* rest)))
+    ((alist) alist)))
+
+(define (alist-delete* keys alist)
+  "Return a list containing all elements of ALIST whose keys are not a
+member of KEYS."
+  (filter (match-lambda
+	    ((key . _)
+	     (not (member key keys))))
+	  alist))
+
+(define (cg-string-ci pat accum)
+  (syntax-case pat ()
+    ((pat-str-syntax) (string? (syntax->datum #'pat-str-syntax))
+     (let ((pat-str (syntax->datum #'pat-str-syntax)))
+       (let ((plen (string-length pat-str)))
+	 #`(lambda (str len pos)
+	     (let ((end (+ pos #,plen)))
+	       (and (<= end len)
+		    (string-ci= str #,pat-str pos end)
+		    #,(case accum
+			((all) #`(list end (list 'cg-string #,pat-str)))
+			((name) #`(list end 'cg-string))
+			((body) #`(list end #,pat-str))
+			((none) #`(list end '()))
+			(else (error "bad accum" accum)))))))))))
+
+(add-peg-compiler! 'string-ci cg-string-ci)