summaryrefslogtreecommitdiff
path: root/email/email.scm
diff options
context:
space:
mode:
authorArun Isaac2018-09-08 17:40:37 +0530
committerArun Isaac2018-09-08 17:41:57 +0530
commit277a836aa2e9fca708b8860533ef68227a4c9308 (patch)
tree741666dcde4a31621033634362cfc2e363c6c5b9 /email/email.scm
downloadguile-email-277a836aa2e9fca708b8860533ef68227a4c9308.tar.gz
guile-email-277a836aa2e9fca708b8860533ef68227a4c9308.tar.lz
guile-email-277a836aa2e9fca708b8860533ef68227a4c9308.zip
Initial commit.
Diffstat (limited to 'email/email.scm')
-rw-r--r--email/email.scm804
1 files changed, 804 insertions, 0 deletions
diff --git a/email/email.scm b/email/email.scm
new file mode 100644
index 0000000..ccda3ac
--- /dev/null
+++ b/email/email.scm
@@ -0,0 +1,804 @@
+;;; guile-email --- Guile email parser
+;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email. If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+(define-module (email email)
+ #:use-module (ice-9 iconv)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 peg)
+ #:use-module (ice-9 regex)
+ #:use-module (rnrs io simple)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-2)
+ #:use-module (srfi srfi-9)
+ #:use-module (srfi srfi-11)
+ #:use-module (srfi srfi-19)
+ #:use-module (srfi srfi-26)
+ #:use-module (sxml transform)
+ #:use-module (email base64)
+ #:use-module (email quoted-printable)
+ #:use-module ((email utils)
+ #:select (get-line-with-delimiter
+ read-objects read-while
+ acons* alist-delete*))
+ #:export (<email>
+ make-email
+ email?
+ email-headers
+ email-body
+ <mime-entity>
+ make-mime-entity
+ mime-entity?
+ mime-entity-headers
+ mime-entity-body
+ email->headers+body
+ parse-email
+ parse-email-headers
+ parse-email-body
+ parse-email-address
+ interpret-address
+ mbox->emails))
+
+(define (flatten-and-filter terms tree)
+ (filter list? (keyword-flatten terms tree)))
+
+(define-record-type <email>
+ (make-email headers body)
+ email?
+ (headers email-headers)
+ (body email-body))
+
+(define-record-type <mime-entity>
+ (make-mime-entity headers body)
+ mime-entity?
+ (headers mime-entity-headers)
+ (body mime-entity-body))
+
+(define string->lcase-symbol
+ (compose string->symbol string-downcase))
+
+;;; PEG parser implementing the ABNF grammar specified in RFC5322
+;;; (Internet Message Format), RFC6854 (Update to Internet Message
+;;; Format to Allow Group Syntax in the "From:" and "Sender:" Header
+;;; Fields)
+
+;;; Obsolete syntax has not been implemented.
+
+;;; Core ABNF rules from RFC5234
+
+(define-peg-pattern alpha body
+ (or (range #\A #\Z) (range #\a #\z)))
+
+;; Though line endings should be crlf (\r\n), we also tolerate bare
+;; line feeds (\n)
+(define-peg-pattern crlf none
+ (or "\r\n" "\n"))
+
+(define-peg-pattern digit body
+ (range #\0 #\9))
+
+(define-peg-pattern dquote body
+ "\"")
+
+;; Printable ASCII characters and UTF-8 characters > \x7f (RFC6532)
+(define-peg-pattern vchar body
+ (and (not-followed-by (or (range #\Nul #\Space)
+ "\x7f"))
+ peg-any))
+
+(define-peg-pattern wsp body
+ (or " " "\t"))
+
+(define-peg-pattern lwsp body
+ (* (or wsp (and crlf wsp))))
+
+(define-syntax-rule (define-printable-ascii-character-pattern name . exceptions)
+ (define-peg-pattern name body
+ (and (not-followed-by (or . exceptions))
+ vchar)))
+
+;;; Quoted characters
+
+(define-peg-pattern quoted-pair body
+ (and (ignore "\\") (or vchar wsp)))
+
+;;; Folding white space and comments
+
+(define-peg-pattern fws body
+ (and (? (and (* wsp) crlf)) (+ wsp)))
+
+(define-printable-ascii-character-pattern ctext "(" ")" "\\")
+
+(define-peg-pattern comment none
+ (and "(" (* (and (? fws) ccontent)) ")"))
+
+(define-peg-pattern ccontent body
+ (or ctext quoted-pair comment))
+
+(define-peg-pattern cfws body
+ (or (and (+ (and (? fws) comment)) (? fws)) fws))
+
+;;; Atom
+
+(define-printable-ascii-character-pattern atext
+ "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]")
+
+(define-peg-pattern atom body
+ (and (? cfws) (+ atext) (? cfws)))
+
+(define-peg-pattern dot-atom-text body
+ (and (+ atext) (* (and "." (+ atext)))))
+
+(define-peg-pattern dot-atom body
+ (and (? cfws) dot-atom-text (? cfws)))
+
+;;; Quoted strings
+
+(define-printable-ascii-character-pattern qtext "\\" "\"")
+
+(define-peg-pattern qcontent body
+ (or qtext quoted-pair))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern quoted-string body
+ (and (? cfws) (ignore (and dquote))
+ (* (and (? fws) qcontent))
+ (? fws)
+ (ignore (and dquote)) (? cfws)))
+
+;;; Miscellaneous tokens
+
+(define-peg-pattern word body
+ (or atom quoted-string))
+
+(define-peg-pattern phrase body
+ (+ word))
+
+;; ABNF modified to ignore leading whitespace
+;; ABNF modified to allow for blank lines in folded field
+(define-peg-pattern unstructured body
+ (and (ignore (? fws))
+ (* (and (? fws) (? vchar)))
+ (ignore (* wsp))))
+
+;;; Date and time specification
+
+(define-peg-pattern day-name body
+ (or "Mon" "Tue" "Wed" "Thu" "Fri" "Sat" "Sun"))
+
+(define-peg-pattern day-of-week all
+ (and (ignore (? fws)) day-name))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern day all
+ (and (ignore (? fws)) digit (? digit) (ignore (and fws))))
+
+(define-peg-pattern month all
+ (or "Jan" "Feb" "Mar" "Apr" "May" "Jun"
+ "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern year all
+ (and (ignore (and fws)) digit digit digit digit (ignore (and fws))))
+
+(define-peg-pattern date all
+ (and day month year))
+
+(define-peg-pattern hours all
+ (and digit digit))
+
+(define-peg-pattern minutes all
+ (and digit digit))
+
+(define-peg-pattern seconds all
+ (and digit digit))
+
+(define-peg-pattern time-of-day all
+ (and hours (ignore ":") minutes (? (and (ignore ":") seconds))))
+
+;; zone-sign, zone-hours and zone-minutes do not exist in the ABNF
+;; specified in RFC5322. But, we have it here since it eases parsing
+;; the zone and converting it to a number of seconds as required by
+;; the make-date procedure of SRFI-19.
+(define-peg-pattern zone-sign all
+ (or "+" "-"))
+
+(define-peg-pattern zone-hours all
+ (and digit digit))
+
+(define-peg-pattern zone-minutes all
+ (and digit digit))
+
+;; TODO: Remove workaround guile peg bug for ignore
+(define-peg-pattern zone all
+ (and (ignore (and fws)) zone-sign zone-hours zone-minutes))
+
+(define-peg-pattern time all
+ (and time-of-day zone))
+
+(define-peg-pattern date-time all
+ (and (? (and day-of-week (ignore ","))) date time (? cfws)))
+
+;;; Address specification
+
+(define-peg-pattern display-name all
+ phrase)
+
+(define-peg-pattern local-part body
+ (or dot-atom quoted-string))
+
+(define-printable-ascii-character-pattern dtext "[" "]" "\\")
+
+(define-peg-pattern domain-literal body
+ (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws)))
+
+(define-peg-pattern domain body
+ (or dot-atom domain-literal))
+
+(define-peg-pattern addr-spec body
+ (and local-part "@" domain))
+
+(define-peg-pattern angle-addr all
+ (and (ignore (? cfws))
+ (ignore"<") addr-spec (ignore">")
+ (ignore (? cfws))))
+
+(define-peg-pattern name-addr body
+ (and (? display-name) angle-addr))
+
+(define-peg-pattern mailbox all
+ (or name-addr addr-spec))
+
+(define-peg-pattern mailbox-list all
+ (and mailbox (* (and (ignore ",") mailbox))))
+
+(define-peg-pattern group all
+ (and display-name (ignore ":")
+ (? group-list) (ignore ";") (? cfws)))
+
+(define-peg-pattern group-list all
+ (or mailbox-list cfws))
+
+(define-peg-pattern address body
+ (or mailbox group))
+
+(define-peg-pattern address-list all
+ (and address (* (and (ignore ",") address))))
+
+;;; Fields
+
+(define-syntax-rule (define-field-pattern name header pattern)
+ (define-peg-pattern name all
+ (and (ignore (string-ci header)) (ignore ":") pattern crlf)))
+
+;;; Origination date field
+
+(define-field-pattern orig-date "Date" date-time)
+
+;;; Originator fields
+
+(define-field-pattern from "From" (or mailbox-list address-list))
+(define-field-pattern sender "Sender" (or mailbox address))
+(define-field-pattern reply-to "Reply-To" address-list)
+
+;; Destination address fields
+
+(define-field-pattern to "To" address-list)
+(define-field-pattern cc "Cc" address-list)
+(define-field-pattern bcc "Bcc" (? (or address-list cfws)))
+
+;;; Identification fields
+
+(define-peg-pattern no-fold-literal body
+ (and "[" (* dtext) "]"))
+
+(define-peg-pattern id-left body
+ dot-atom-text)
+
+(define-peg-pattern id-right body
+ (or dot-atom-text no-fold-literal))
+
+(define-peg-pattern msg-id all
+ (and (ignore (? cfws)) (ignore "<")
+ id-left "@" id-right
+ (ignore ">") (ignore (? cfws))))
+
+(define-field-pattern message-id "Message-ID" msg-id)
+(define-field-pattern in-reply-to "In-Reply-To" (+ msg-id))
+(define-field-pattern references "References" (+ msg-id))
+
+;;; Informational fields
+
+(define-field-pattern subject "Subject" unstructured)
+(define-field-pattern comments "Comments" unstructured)
+(define-field-pattern keywords "Keywords" (and phrase (* (and "," phrase))))
+
+;;; Resent fields
+
+(define-field-pattern resent-date "Resent-Date" date-time)
+(define-field-pattern resent-from "Resent-From" (or mailbox-list address-list))
+(define-field-pattern resent-sender "Resent-Sender" (or mailbox address))
+(define-field-pattern resent-to "Resent-To" address-list)
+(define-field-pattern resent-cc "Resent-Cc" address-list)
+(define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws)))
+(define-field-pattern resent-msg-id "Resent-Message-ID" msg-id)
+
+;;; Trace fields
+
+;; word is given last priority in the ordered choice
+(define-peg-pattern received-token body
+ (or angle-addr addr-spec domain word))
+
+;; TODO: Do not discard the internal structure of received
+(define-field-pattern received "Received"
+ (and (* received-token) (ignore ";") date-time))
+
+(define-peg-pattern path body
+ (or angle-addr (and (? cfws) (ignore "<") (? cfws) (ignore ">") (? cfws))))
+
+(define-field-pattern return "Return-Path" path)
+
+(define-peg-pattern trace all
+ (and (? return) (+ received)))
+
+;;; Optional fields
+
+(define-printable-ascii-character-pattern ftext ":")
+
+(define-peg-pattern field-name all
+ (+ ftext))
+
+(define-peg-pattern optional-field all
+ (and field-name (ignore ":") unstructured crlf))
+
+;;; MIME version
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern mime-version "MIME-Version"
+ (and (ignore (? cfws)) (+ digit) "." (+ digit)))
+
+;;; Content type
+
+;; TODO: What is <"> in RFC2045?
+(define-peg-pattern tspecials body
+ (or "(" ")" "<" ">" "@"
+ "," ";" ":" "\\"
+ "/" "[" "]" "?" "="))
+
+;; TODO: What is a CTL in RFC2045?
+(define-peg-pattern token body
+ (+ (and (not-followed-by (or " " crlf tspecials))
+ peg-any)))
+
+;; TODO: Implement iana-token, ietf-token and x-token
+(define-peg-pattern iana-token body
+ token)
+
+(define-peg-pattern ietf-token body
+ token)
+
+(define-peg-pattern x-token body
+ token)
+
+(define-peg-pattern extension-token body
+ (or ietf-token x-token))
+
+(define-peg-pattern discrete-type body
+ (or "text" "image" "audio" "video"
+ "application" extension-token))
+
+(define-peg-pattern composite-type body
+ (or "message" "multipart" extension-token))
+
+(define-peg-pattern type all
+ (or discrete-type composite-type))
+
+(define-peg-pattern subtype all
+ (or extension-token iana-token))
+
+(define-peg-pattern attribute all
+ token)
+
+;; quoted-string is given higher precedence
+(define-peg-pattern value all
+ (or quoted-string token))
+
+(define-peg-pattern parameter all
+ (and attribute (ignore "=") value))
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern content "Content-Type"
+ (and (ignore (? cfws)) type (ignore "/") subtype
+ (* (and (ignore ";") (ignore (? cfws)) parameter))))
+
+;;; Content Disposition as defined in RFC2183
+
+(define-peg-pattern disposition-type body
+ (or "inline" "attachment" extension-token))
+
+(define-peg-pattern quoted-date-time all
+ quoted-string)
+
+(define-syntax-rule (define-parameter-pattern name header pattern)
+ (define-peg-pattern name all
+ (and (ignore header) (ignore "=") pattern)))
+
+(define-parameter-pattern filename-parm "filename" value)
+(define-parameter-pattern creation-date-parm "creation-date" quoted-date-time)
+(define-parameter-pattern modification-date-parm "modification-date" quoted-date-time)
+(define-parameter-pattern read-date-parm "read-date" quoted-date-time)
+(define-parameter-pattern size-parm "size" (+ digit))
+
+(define-peg-pattern disposition-parm body
+ (or filename-parm creation-date-parm modification-date-parm
+ read-date-parm size-parm parameter))
+
+;; I have prepended optional cfws to account for leading whitespace.
+(define-field-pattern disposition "Content-Disposition"
+ (and (ignore (? cfws)) disposition-type
+ (* (and (ignore ";") (ignore (? cfws)) disposition-parm))))
+
+;;; Content transfer encoding
+
+(define-peg-pattern mechanism body
+ (or "7bit" "8bit" "binary"
+ "quoted-printable" "base64"
+ ietf-token x-token))
+
+(define-field-pattern encoding "Content-Transfer-Encoding"
+ (and (ignore (? cfws)) mechanism))
+
+;;; Fields
+
+;; The ABNF specified for fields in RFC5322 does not make sense. With
+;; it, all headers are eaten up by optional-field. So, as a temporary
+;; workaround, I am going with the following much simpler (but
+;; possibly incorrect) ABNF.
+
+;; TODO: Try to understand and implement the actual ABNF specified by
+;; RFC5322.
+(define-peg-pattern fields all
+ (* (or trace
+ resent-date
+ resent-from
+ resent-sender
+ resent-to
+ resent-cc
+ resent-bcc
+ resent-msg-id
+ orig-date
+ from
+ sender
+ reply-to
+ to
+ cc
+ bcc
+ message-id
+ in-reply-to
+ references
+ subject
+ comments
+ keywords
+ mime-version
+ content
+ disposition
+ encoding
+ optional-field)))
+
+(define-peg-pattern mime-extension-field-name all
+ (and "Content-" (+ ftext)))
+
+(define-peg-pattern mime-extension-field all
+ (and mime-extension-field-name (ignore ":") unstructured crlf))
+
+(define-peg-pattern mime-entity-fields all
+ (* (or content
+ disposition
+ encoding
+ mime-extension-field
+ optional-field)))
+
+(define (decode-mime-encoded-word word)
+ (regexp-substitute/global
+ #f "=\\?([^?]*)\\?([^?]*)\\?([^?]*)\\?=" word
+ 'pre (lambda (match-record)
+ (let ((charset (match:substring match-record 1))
+ (encoding (string->lcase-symbol (match:substring match-record 2)))
+ (encoded-text (match:substring match-record 3)))
+ (bytevector->string
+ ((case encoding
+ ((b) base64-decode)
+ ((q) q-encoding-decode)
+ (else (error "Encoding of MIME word unknown" word)))
+ encoded-text)
+ charset)))
+ 'post))
+
+(define (body->mime-entities body boundary)
+ "Split BODY into a list of mime entities separated by BOUNDARY (as
+explained in RFC2045), and return that list."
+ (define (read-till-boundary port)
+ (read-while port get-line-with-delimiter
+ (negate (cut string-prefix? (string-append "--" boundary) <>))))
+
+ (define (read-mime-entity port)
+ (if (string-prefix? (string-append "--" boundary "--")
+ (get-line-with-delimiter port))
+ (eof-object)
+ (read-till-boundary port)))
+
+ (call-with-input-string body
+ (lambda (port)
+ (read-till-boundary port)
+ (read-objects read-mime-entity port))))
+
+(define (email->headers+body email)
+ "Split EMAIL into headers and body. Return as multiple values."
+ (call-with-input-string email
+ (lambda (port)
+ (let ((headers (read-while port get-line-with-delimiter
+ (lambda (line)
+ (not (or (string= line "\n")
+ (string= line "\r\n")))))))
+ (get-line-with-delimiter port)
+ (values headers
+ (read-while port get-line-with-delimiter identity))))))
+
+(define (post-process-content-transfer-encoding _ value)
+ (list 'content-transfer-encoding (string->lcase-symbol value)))
+
+(define post-process-content-type
+ (match-lambda*
+ (`(content (type ,type)
+ (subtype ,subtype)
+ . ,parameters)
+ (let ((type (string->lcase-symbol type))
+ (subtype (string->lcase-symbol subtype)))
+ `(content-type
+ ,(acons* 'type type
+ 'subtype subtype
+ (let ((parameters
+ (map (match-lambda
+ (`(parameter (attribute ,attribute)
+ (value ,value))
+ (cons (string->lcase-symbol attribute) value)))
+ (flatten-and-filter '(parameter) parameters))))
+ (if (and (eq? type 'text)
+ (not (assoc-ref parameters 'charset)))
+ ;; UTF-8 is specified as the default charset in RFC6657
+ (acons 'charset "utf-8" parameters)
+ parameters))))))))
+
+(define post-process-content-disposition
+ (match-lambda*
+ (`(disposition ,type . ,parameters)
+ `(content-disposition
+ ,(acons 'type (string->lcase-symbol type)
+ (map (match-lambda
+ (('filename-parm ('value filename))
+ (cons 'filename (basename filename)))
+ (((? (lambda (date-parm)
+ (member date-parm '(creation-date-parm modification-date-parm read-date-parm)))
+ date-parm) value)
+ ;; TODO: Convert to SRFI-19 datetime
+ (cons date-parm value))
+ (('size-parm value)
+ (cons 'size (string->number value)))
+ (`(parameter (attribute ,attribute)
+ (value ,value))
+ (cons (string->lcase-symbol attribute) value)))
+ (flatten-and-filter
+ '(filename-parm creation-date-parm modification-date-parm
+ read-date-parm size-parm parameter)
+ parameters)))))))
+
+(define post-process-optional-field
+ (match-lambda*
+ (`(optional-field
+ (field-name ,field-name)
+ ,field-value)
+ (list (string->lcase-symbol field-name)
+ field-value))
+ (`(optional-field (field-name ,field-name))
+ (list field-name ""))))
+
+(define* (macro-process-address-list _ . addresses)
+ (flatten-and-filter '(address mailbox) addresses))
+
+(define (parse-email-address address)
+ "Parse ADDRESS as an email address and return an association list
+with keys being the symbols name and address, and values being the
+display-name and addr-spec respectively. display-name and addr-spec
+are as defined in RFC5322.
+
+For example,
+(parse-email-address \"Foo <foo@example.org>\")
+=> ((name \"Foo\") (address \"foo@example.org\"))
+(parse-email-address \"foo@example.org\")
+=> ((address \"foo@example.org\"))"
+ (cond
+ ((string-match "([^<]*)<([^>]*)>" address)
+ => (lambda (match-record)
+ (let ((name (string-trim-both (match:substring match-record 1)))
+ (address (match:substring match-record 2)))
+ (if (string-null? name)
+ `((address . ,address))
+ `((name . ,name)
+ (address . ,address))))))
+ (else `((address . ,address)))))
+
+(define interpret-address
+ (match-lambda
+ ((('name . name)
+ ('address . address))
+ (format #f "~a <~a>" name address))
+ ((('address . address)) address)))
+
+(define (parse-email-body headers body)
+ "Parse BODY as email body where HEADERS is an association list of
+header keys and values as returned by parse-email-headers. Return a
+list of <mime-entity> records if the body is a multipart
+message. Else, return a single <mime-entity> record."
+ (let ((content-type (assoc-ref headers 'content-type)))
+ (case (assoc-ref content-type 'type)
+ ((multipart)
+ (map parse-mime-entity
+ (body->mime-entities body (assoc-ref content-type 'boundary))))
+ ((text)
+ (string-trim-both
+ (decode-body body (assoc-ref headers 'content-transfer-encoding)
+ (assoc-ref content-type 'charset))))
+ (else (decode-body body (assoc-ref headers 'content-transfer-encoding))))))
+
+(define (add-default-headers headers)
+ ;; Default Content-Type and Content-Transfer-Encoding headers as
+ ;; specified in RFC2045
+ (let ((default-headers
+ (acons* 'content-type '((type . text)
+ (subtype . plain)
+ ;; UTF-8 is specified as the default
+ ;; charset in RFC6657
+ (charset . "utf-8"))
+ 'content-transfer-encoding '#{7bit}#)))
+ (append (alist-delete* (append (if (assoc-ref headers 'content-type)
+ (list 'content-type) (list))
+ (if (assoc-ref headers 'content-transfer-encoding)
+ (list 'content-transfer-encoding) (list)))
+ default-headers)
+ headers)))
+
+(define (parse-mime-entity text)
+ (let-values (((headers body) (email->headers+body text)))
+ (let ((headers
+ (pre-post-order
+ (peg:tree
+ (match-pattern mime-entity-fields text))
+ `((content . ,post-process-content-type)
+ (encoding . ,post-process-content-transfer-encoding)
+ (disposition . ,post-process-content-disposition)
+ (optional-field . ,post-process-optional-field)
+ (mime-entity-fields . ,(lambda (_ . mime-entity-fields)
+ (add-default-headers
+ (map (match-lambda
+ ((mime-entity-field value)
+ (cons mime-entity-field value))
+ ((mime-entity-field . values)
+ (cons mime-entity-field values)))
+ mime-entity-fields))))
+ (*text* . ,(lambda (_ text) text))
+ (*default* . ,(lambda tree tree))))))
+ (make-mime-entity headers (parse-email-body headers body)))))
+
+(define (parse-email email)
+ "Parse string EMAIL and return result as a <email> record."
+ (let-values (((headers body) (email->headers+body email)))
+ (let ((headers-alist (parse-email-headers headers)))
+ (make-email headers-alist (parse-email-body headers-alist body)))))
+
+(define (parse-email-headers headers)
+ "Parse string HEADERS as email headers and return an association
+list of header keys and values."
+ (define (extract-value _ value) value)
+
+ (pre-post-order
+ (peg:tree
+ (match-pattern fields headers))
+ `((date-time . ,(lambda node
+ (match-let
+ ((`((day ,day) (month ,month) (year ,year)
+ (hours ,hours) (minutes ,minutes) (seconds ,seconds)
+ (zone-sign ,zone-sign) (zone-hours ,zone-hours) (zone-minutes ,zone-minutes))
+ (flatten-and-filter
+ '(day month year hours minutes seconds
+ zone-sign zone-hours zone-minutes)
+ node)))
+ (make-date 0
+ (string->number seconds)
+ (string->number minutes)
+ (string->number hours)
+ (string->number day)
+ (1+ (list-index
+ (cut equal? <> month)
+ (list "Jan" "Feb" "Mar" "Apr" "May" "Jun"
+ "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")))
+ (string->number year)
+ (* (case (string->symbol zone-sign)
+ ((+) 1)
+ ((-) -1))
+ (+ (* 60 60 (string->number zone-hours))
+ (* 60 (string->number zone-minutes))))))))
+ (orig-date . ,(lambda (_ date) (list 'date date)))
+ (angle-addr . ,extract-value)
+ (mailbox . ,(match-lambda*
+ (`(mailbox (display-name ,name) ,address)
+ `((name . ,(decode-mime-encoded-word
+ (string-trim-both name)))
+ (address . ,address)))
+ (`(mailbox ,address)
+ `((address . ,(string-trim-both address))))
+ (_ (error "Failed to parse mailbox"))))
+ (address-list *macro* . ,macro-process-address-list)
+ (mailbox-list *macro* . ,macro-process-address-list)
+ (optional-field . ,post-process-optional-field)
+ (msg-id . ,(match-lambda* (`(msg-id ,msg-id) msg-id)))
+ (content . ,post-process-content-type)
+ (encoding . ,post-process-content-transfer-encoding)
+ (disposition . ,post-process-content-disposition)
+ (fields . ,(lambda (_ . fields)
+ (add-default-headers
+ (filter-map (match-lambda
+ (('trace . _) #f)
+ ((field value)
+ (cons field value))
+ ((field . values)
+ (cons field values))
+ (_ #f))
+ fields))))
+ (*text* . ,extract-value)
+ (*default* . ,(lambda tree tree)))))
+
+(define* (decode-body body encoding #:optional charset)
+ (let ((octets
+ (case encoding
+ ((base64)
+ (base64-decode
+ (string-filter
+ (char-set-union
+ (ucs-range->char-set (char->integer #\a) (1+ (char->integer #\z)))
+ (ucs-range->char-set (char->integer #\A) (1+ (char->integer #\Z)))
+ (ucs-range->char-set (char->integer #\0) (1+ (char->integer #\9)))
+ (char-set #\+ #\/ #\=))
+ body)))
+ ((quoted-printable) (quoted-printable-decode body))
+ ((#{7bit}# #{8bit}# binary) body)
+ (else (error "Body decoding failed. Unknown encoding" encoding)))))
+ (if charset
+ (case encoding
+ ((base64 quoted-printable) (bytevector->string octets charset))
+ ((#{7bit}# #{8bit}# binary) octets))
+ octets)))
+
+(define (read-next-email-in-mbox port)
+ ;; Read and discard From_ line
+ (get-line-with-delimiter port)
+ ;; Read the actual email
+ (read-while port get-line-with-delimiter
+ (negate (cut string-prefix? "From " <>))))
+
+(define (mbox->emails port)
+ (read-objects read-next-email-in-mbox port))