;;; guile-email --- Guile email parser ;;; Copyright © 2018, 2019 Arun Isaac ;;; ;;; This file is part of guile-email. ;;; ;;; guile-email is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU Affero General Public License as ;;; published by the Free Software Foundation; either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; guile-email is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with guile-email. If not, see ;;; . (define-module (email email) #:use-module (ice-9 binary-ports) #:use-module (ice-9 iconv) #:use-module (ice-9 match) #:use-module (ice-9 peg) #:use-module (ice-9 regex) #:use-module (rnrs bytevectors) #:use-module ((rnrs io ports) #:select (call-with-port)) #:use-module (rnrs io simple) #:use-module (srfi srfi-1) #:use-module (srfi srfi-2) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) #:use-module (sxml transform) #:use-module (email base64) #:use-module (email quoted-printable) #:use-module (email utils) #:export ( make-email email? email-headers email-body make-mime-entity mime-entity? mime-entity-headers mime-entity-body email->headers+body parse-email parse-email-headers parse-email-body parse-email-address interpret-address mbox->emails)) (define (flatten-and-filter terms tree) (filter list? (keyword-flatten terms tree))) (define-record-type (make-email headers body) email? (headers email-headers) (body email-body)) (define-record-type (make-mime-entity headers body) mime-entity? (headers mime-entity-headers) (body mime-entity-body)) (define string->lcase-symbol (compose string->symbol string-downcase)) ;;; PEG parser implementing the ABNF grammar specified in RFC5322 ;;; (Internet Message Format), RFC6854 (Update to Internet Message ;;; Format to Allow Group Syntax in the "From:" and "Sender:" Header ;;; Fields) ;;; Obsolete syntax has not been implemented. ;;; Core ABNF rules from RFC5234 (define-peg-pattern alpha body (or (range #\A #\Z) (range #\a #\z))) ;; Though line endings should be crlf (\r\n), we also tolerate bare ;; line feeds (\n) (define-peg-pattern crlf none (or "\r\n" "\n")) (define-peg-pattern digit body (range #\0 #\9)) (define-peg-pattern dquote body "\"") ;; Printable ASCII characters and UTF-8 characters > \x7f (RFC6532) (define-peg-pattern vchar body (and (not-followed-by (or (range #\Nul #\Space) "\x7f")) peg-any)) (define-peg-pattern wsp body (or " " "\t")) (define-peg-pattern lwsp body (* (or wsp (and crlf wsp)))) (define-syntax-rule (define-printable-ascii-character-pattern name . exceptions) (define-peg-pattern name body (and (not-followed-by (or . exceptions)) vchar))) ;;; Quoted characters (define-peg-pattern quoted-pair body (and (ignore "\\") (or vchar wsp))) ;;; Folding white space and comments (define-peg-pattern fws body (and (? (and (* wsp) crlf)) (+ wsp))) (define-printable-ascii-character-pattern ctext "(" ")" "\\") (define-syntax-rule (define-comment-pattern name capture-type) (define-peg-pattern name capture-type (and (ignore "(") (* (and (? fws) ccontent)) (ignore ")")))) (define-comment-pattern comment none) (define-peg-pattern ccontent body (or ctext quoted-pair comment)) (define-syntax-rule (define-cfws-pattern name comment) (define-peg-pattern name body (or (and (+ (and (? fws) comment)) (? fws)) fws))) (define-cfws-pattern cfws comment) ;;; Atom (define-printable-ascii-character-pattern atext "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]") (define-peg-pattern atom body (and (? cfws) (+ atext) (? cfws))) (define-peg-pattern dot-atom-text body (and (+ atext) (* (and "." (+ atext))))) (define-syntax-rule (define-dot-atom-pattern name cfws) (define-peg-pattern name body (and (? cfws) dot-atom-text (? cfws)))) (define-dot-atom-pattern dot-atom cfws) ;;; Quoted strings (define-printable-ascii-character-pattern qtext "\\" "\"") (define-peg-pattern qcontent body (or qtext quoted-pair)) ;; TODO: Remove workaround guile peg bug for ignore (define-peg-pattern quoted-string body (and (? cfws) (ignore (and dquote)) (* (and (? fws) qcontent)) (? fws) (ignore (and dquote)) (? cfws))) ;;; Miscellaneous tokens (define-peg-pattern word body (or atom quoted-string)) (define-peg-pattern phrase body (+ word)) ;; ABNF modified to ignore leading whitespace ;; ABNF modified to allow for blank lines in folded field (define-peg-pattern unstructured body (and (ignore (? fws)) (* (and (? fws) (? vchar))) (ignore (* wsp)))) ;;; Date and time specification (define-peg-pattern day-name body (or "Mon" "Tue" "Wed" "Thu" "Fri" "Sat" "Sun")) (define-peg-pattern day-of-week all (and (ignore (? fws)) day-name)) ;; TODO: Remove workaround guile peg bug for ignore (define-peg-pattern day all (and (ignore (? fws)) digit (? digit) (ignore (and fws)))) (define-peg-pattern month all (or "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")) ;; TODO: Remove workaround guile peg bug for ignore (define-peg-pattern year all (and (ignore (and fws)) digit digit digit digit (ignore (and fws)))) (define-peg-pattern date all (and day month year)) (define-peg-pattern hours all (and digit digit)) (define-peg-pattern minutes all (and digit digit)) (define-peg-pattern seconds all (and digit digit)) (define-peg-pattern time-of-day all (and hours (ignore ":") minutes (? (and (ignore ":") seconds)))) ;; zone-sign, zone-hours and zone-minutes do not exist in the ABNF ;; specified in RFC5322. But, we have it here since it eases parsing ;; the zone and converting it to a number of seconds as required by ;; the make-date procedure of SRFI-19. (define-peg-pattern zone-sign all (or "+" "-")) (define-peg-pattern zone-hours all (and digit digit)) (define-peg-pattern zone-minutes all (and digit digit)) ;; TODO: Remove workaround guile peg bug for ignore (define-peg-pattern zone all (and (ignore (and fws)) zone-sign zone-hours zone-minutes)) (define-peg-pattern time all (and time-of-day zone)) (define-peg-pattern date-time all (and (? (and day-of-week (ignore ","))) date time (? cfws))) ;;; Address specification (define-peg-pattern display-name all phrase) (define-peg-pattern local-part body (or dot-atom quoted-string)) (define-printable-ascii-character-pattern dtext "[" "]" "\\") (define-peg-pattern domain-literal body (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws))) (define-syntax-rule (define-domain-pattern name dot-atom) (define-peg-pattern name body (or dot-atom domain-literal))) (define-domain-pattern domain dot-atom) (define-syntax-rule (define-addr-spec-pattern name domain) (define-peg-pattern name body (and local-part "@" domain))) (define-addr-spec-pattern addr-spec domain) (define-syntax-rule (define-angle-addr-pattern name opening-bracket closing-bracket) (define-peg-pattern name body (and (ignore (? cfws)) opening-bracket addr-spec closing-bracket (ignore (? cfws))))) (define-angle-addr-pattern angle-addr "<" ">") ;; When handling email addresses, we need to drop the angle ;; brackets. But in the Received field we need to include the angle ;; brackes. So, we define unbracketed-angle-addr, a variant of ;; angle-addr in which the angle brackets are ignored. (define-angle-addr-pattern unbracketed-angle-addr (ignore "<") (ignore ">")) (define-peg-pattern name-addr body (and (? display-name) unbracketed-angle-addr)) ;; In order to support emacs message mode parens style email ;; addresses, we need to capture comments in address fields and ;; process them as names. But, we need to ignore comments in other ;; parts of the email. In order to facilitate this, we define a set of ;; captured-* patterns which are used in address fields. (define-comment-pattern captured-comment all) (define-cfws-pattern captured-cfws captured-comment) (define-dot-atom-pattern captured-dot-atom captured-cfws) (define-domain-pattern captured-domain captured-dot-atom) (define-addr-spec-pattern captured-addr-spec captured-domain) (define-peg-pattern mailbox all (or name-addr captured-addr-spec)) (define-peg-pattern mailbox-list all (and mailbox (* (and (ignore ",") mailbox)))) (define-peg-pattern group all (and display-name (ignore ":") (? group-list) (ignore ";") (? cfws))) (define-peg-pattern group-list all (or mailbox-list cfws)) (define-peg-pattern address body (or mailbox group)) (define-peg-pattern address-list all (and address (* (and (ignore ",") address)))) ;;; Fields (define-syntax-rule (define-field-pattern name header pattern) (define-peg-pattern name all (and (ignore (string-ci header)) (ignore ":") pattern crlf))) ;;; Origination date field (define-field-pattern orig-date "Date" date-time) ;;; Originator fields (define-field-pattern from "From" (or mailbox-list address-list)) (define-field-pattern sender "Sender" (or mailbox address)) (define-field-pattern reply-to "Reply-To" address-list) ;; Destination address fields (define-field-pattern to "To" address-list) (define-field-pattern cc "Cc" address-list) (define-field-pattern bcc "Bcc" (? (or address-list cfws))) ;;; Identification fields (define-peg-pattern no-fold-literal body (and "[" (* dtext) "]")) (define-peg-pattern id-left body dot-atom-text) (define-peg-pattern id-right body (or dot-atom-text no-fold-literal)) (define-peg-pattern msg-id all (and (ignore (? cfws)) (ignore "<") id-left "@" id-right (ignore ">") (ignore (? cfws)))) (define-field-pattern message-id "Message-ID" msg-id) (define-field-pattern in-reply-to "In-Reply-To" (+ msg-id)) (define-field-pattern references "References" (+ msg-id)) ;;; Informational fields (define-field-pattern subject "Subject" unstructured) (define-field-pattern comments "Comments" unstructured) (define-field-pattern keywords "Keywords" (and phrase (* (and "," phrase)))) ;;; Resent fields (define-field-pattern resent-date "Resent-Date" date-time) (define-field-pattern resent-from "Resent-From" (or mailbox-list address-list)) (define-field-pattern resent-sender "Resent-Sender" (or mailbox address)) (define-field-pattern resent-to "Resent-To" address-list) (define-field-pattern resent-cc "Resent-Cc" address-list) (define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws))) (define-field-pattern resent-msg-id "Resent-Message-ID" msg-id) ;;; Trace fields ;; word is given last priority in the ordered choice (define-peg-pattern received-token body (or angle-addr addr-spec domain word)) (define-field-pattern received "Received" (and (* received-token) (ignore ";") date-time)) (define-peg-pattern path body (or angle-addr (and (? cfws) (ignore "<") (? cfws) (ignore ">") (? cfws)))) (define-field-pattern return "Return-Path" path) (define-peg-pattern trace all (and (? return) (+ received))) ;;; Optional fields (define-printable-ascii-character-pattern ftext ":") (define-peg-pattern field-name all (+ ftext)) (define-peg-pattern optional-field all (and field-name (ignore ":") unstructured crlf)) ;;; MIME version ;; I have prepended optional cfws to account for leading whitespace. (define-field-pattern mime-version "MIME-Version" (and (ignore (? cfws)) (+ digit) "." (+ digit))) ;;; Content type ;; TODO: What is <"> in RFC2045? (define-peg-pattern tspecials body (or "(" ")" "<" ">" "@" "," ";" ":" "\\" "/" "[" "]" "?" "=")) ;; TODO: What is a CTL in RFC2045? (define-peg-pattern token body (+ (and (not-followed-by (or " " crlf tspecials)) peg-any))) ;; TODO: Implement iana-token, ietf-token and x-token (define-peg-pattern iana-token body token) (define-peg-pattern ietf-token body token) (define-peg-pattern x-token body token) (define-peg-pattern extension-token body (or ietf-token x-token)) (define-peg-pattern discrete-type body (or "text" "image" "audio" "video" "application" extension-token)) (define-peg-pattern composite-type body (or "message" "multipart" extension-token)) (define-peg-pattern type all (or discrete-type composite-type)) (define-peg-pattern subtype all (or extension-token iana-token)) (define-peg-pattern attribute all token) ;; quoted-string is given higher precedence (define-peg-pattern value all (or quoted-string token)) (define-peg-pattern parameter all (and attribute (ignore "=") value)) ;; I have prepended optional cfws to account for leading whitespace. (define-field-pattern content "Content-Type" (and (ignore (? cfws)) type (ignore "/") subtype (* (and (ignore ";") (ignore (? cfws)) parameter)))) ;;; Content Disposition as defined in RFC2183 (define-peg-pattern disposition-type body (or "inline" "attachment" extension-token)) (define-peg-pattern quoted-date-time all quoted-string) (define-syntax-rule (define-parameter-pattern name header pattern) (define-peg-pattern name all (and (ignore header) (ignore "=") pattern))) (define-parameter-pattern filename-parm "filename" value) (define-parameter-pattern creation-date-parm "creation-date" quoted-date-time) (define-parameter-pattern modification-date-parm "modification-date" quoted-date-time) (define-parameter-pattern read-date-parm "read-date" quoted-date-time) (define-parameter-pattern size-parm "size" (+ digit)) (define-peg-pattern disposition-parm body (or filename-parm creation-date-parm modification-date-parm read-date-parm size-parm parameter)) ;; I have prepended optional cfws to account for leading whitespace. (define-field-pattern disposition "Content-Disposition" (and (ignore (? cfws)) disposition-type (* (and (ignore ";") (ignore (? cfws)) disposition-parm)))) ;;; Content transfer encoding (define-peg-pattern mechanism body (or "7bit" "8bit" "binary" "quoted-printable" "base64" ietf-token x-token)) (define-field-pattern encoding "Content-Transfer-Encoding" (and (ignore (? cfws)) mechanism)) ;;; Fields ;; The ABNF specified for fields in RFC5322 does not make sense. With ;; it, all headers are eaten up by optional-field. So, as a temporary ;; workaround, I am going with the following much simpler (but ;; possibly incorrect) ABNF. ;; TODO: Try to understand and implement the actual ABNF specified by ;; RFC5322. (define-peg-pattern fields all (* (or trace resent-date resent-from resent-sender resent-to resent-cc resent-bcc resent-msg-id orig-date from sender reply-to to cc bcc message-id in-reply-to references subject comments keywords mime-version content disposition encoding optional-field))) (define-peg-pattern mime-extension-field-name all (and "Content-" (+ ftext))) (define-peg-pattern mime-extension-field all (and mime-extension-field-name (ignore ":") unstructured crlf)) (define-peg-pattern mime-entity-fields all (* (or content disposition encoding mime-extension-field optional-field))) (define (decode-mime-encoded-word word) (regexp-substitute/global #f "=\\?([^?]*)\\?([^?]*)\\?([^?]*)\\?=" word 'pre (lambda (match-record) (let ((charset (match:substring match-record 1)) (encoding (string->lcase-symbol (match:substring match-record 2))) (encoded-text (match:substring match-record 3))) (bytevector->string ((case encoding ((b) base64-decode) ((q) q-encoding-decode) (else (error "Encoding of MIME word unknown" word))) encoded-text) charset 'substitute))) 'post)) (define (body->mime-entities body boundary) "Split bytevector BODY into a list of mime entities separated by BOUNDARY (as explained in RFC2045), and return that list." (define (read-till-boundary port) (read-bytes-till port ;; TODO: Look for the boundary sequence only at the beginning of ;; a line. ;; Boundary is always an ASCII string. (string->bytevector (string-append "--" boundary) "us-ascii"))) (define (read-mime-entity port) (let ((line (get-line-with-delimiter port))) ;; The eof-object? check returns #t only when the message is ;; prematurely truncated. It is invoked only to handle truncated ;; messages gracefully without raising an error. (if (or (eof-object? line) (string-prefix? (string-append "--" boundary "--") line)) (eof-object) (read-till-boundary port)))) (call-with-port (open-bytevector-input-port body) (lambda (port) (read-till-boundary port) (read-objects read-mime-entity port)))) (define (email->headers+body email) "Split EMAIL bytevector into headers and body. Return as multiple values. The returned headers is a string and body is a bytevector." (call-with-port (open-bytevector-input-port email) (lambda (port) ;; Email headers must strictly be ASCII characters. But for the ;; sake of supporting Emacs message mode parens style addresses ;; that may use non-ASCII characters, typically for the full ;; name, we relax this requirement. We assume an encoding of ;; UTF-8, and hope that everything turns out fine. Since UTF-8 ;; is a superset of ASCII, this should not affect standards ;; conforming headers. If encoding is neither UTF-8 nor ASCII, ;; we use the substitute conversion strategy and proceed without ;; raising an error. (set-port-encoding! port "utf-8") (set-port-conversion-strategy! port 'substitute) (let ((headers (read-while port get-line-with-delimiter (lambda (line) (not (or (string= line "\n") (string= line "\r\n"))))))) (get-line-with-delimiter port) (values (if (eof-object? headers) "" headers) (get-bytevector-all port)))))) (define (post-process-mailbox . args) (define process-name (compose decode-mime-encoded-word string-trim-both)) (match args ;; name-addr email address (`(mailbox (display-name ,name) ,address) `((name . ,(process-name name)) (address . ,(string-trim-both address)))) ;; addr-spec email address (`(mailbox ,(? string? address)) `((address . ,(string-trim-both address)))) ;; emacs message mode parens style email address (`(mailbox ,(? list? address-parts)) `((name . ,(match-let ((`((captured-comment ,name)) (flatten-and-filter '(captured-comment) address-parts))) (process-name name))) (address . ,(string-trim-both (string-join (drop-right address-parts 1) ""))))) (_ (error "Failed to parse mailbox")))) (define (post-process-content-transfer-encoding _ value) (list 'content-transfer-encoding (string->lcase-symbol value))) (define (post-process-content-type . args) (define (valid-charset? charset) (catch #t (lambda () (bytevector->string (make-bytevector 0 0) charset) #t) (const #f))) (match args (`(content (type ,type) (subtype ,subtype) . ,parameters) (let ((type (string->lcase-symbol type)) (subtype (string->lcase-symbol subtype)) (parameters (map (match-lambda (`(parameter (attribute ,attribute) (value ,value)) (cons (string->lcase-symbol attribute) value))) (flatten-and-filter '(parameter) parameters)))) `(content-type ,(acons* 'type type 'subtype subtype (cond ((and (eq? type 'text) (or ;; RFC6657 specifies UTF-8 as the default charset for ;; text/* media types. (not (assoc-ref parameters 'charset)) ;; RFC2045 recommends that the default be assumed ;; when a syntactically invalid Content-Type header ;; is encountered. In this implementation, we are ;; only checking for the validity of the ;; charset. Should we check for the validity of the ;; entire Content-Type header? If so, how? (not (valid-charset? (assoc-ref parameters 'charset))))) (acons 'charset "utf-8" parameters)) (else parameters)))))))) (define post-process-content-disposition (match-lambda* (`(disposition ,type . ,parameters) `(content-disposition ,(acons 'type (string->lcase-symbol type) (map (match-lambda (('filename-parm ('value filename)) (cons 'filename (basename filename))) (((? (lambda (date-parm) (member date-parm '(creation-date-parm modification-date-parm read-date-parm))) date-parm) value) ;; TODO: Convert to SRFI-19 datetime (cons date-parm value)) (('size-parm value) (cons 'size (string->number value))) (`(parameter (attribute ,attribute) (value ,value)) (cons (string->lcase-symbol attribute) value))) (flatten-and-filter '(filename-parm creation-date-parm modification-date-parm read-date-parm size-parm parameter) parameters))))))) (define post-process-optional-field (match-lambda* (`(optional-field (field-name ,field-name) ,field-value) (list (string->lcase-symbol field-name) field-value)) (`(optional-field (field-name ,field-name)) (list field-name "")))) (define* (macro-process-address-list _ . addresses) (flatten-and-filter '(address mailbox) addresses)) (define (parse-email-address address) "Parse ADDRESS as an email address and return an association list with keys being the symbols name and address, and values being the display-name and addr-spec respectively. display-name and addr-spec are as defined in RFC5322. For example, (parse-email-address \"Foo \") => ((name . \"Foo\") (address . \"foo@example.org\")) (parse-email-address \"foo@example.org\") => ((address . \"foo@example.org\"))" (pre-post-order (peg:tree (match-pattern mailbox address)) `((mailbox . ,post-process-mailbox) (*text* . ,(lambda (_ text) text)) (*default* . ,(lambda tree tree))))) (define interpret-address (match-lambda ((('name . name) ('address . address)) (format #f "~a <~a>" name address)) ((('address . address)) address))) (define (parse-email-body headers body) "Parse BODY as email body where HEADERS is an association list of header keys and values as returned by parse-email-headers. Return a list of records if the body is a multipart message. Else, return a single record." (match body ((? string? body) (parse-email-body headers (string->bytevector body "utf-8"))) ((? bytevector? body) (let ((content-type (assoc-ref headers 'content-type))) (case (assoc-ref content-type 'type) ((multipart) (map (cut parse-mime-entity headers <>) (body->mime-entities body (assoc-ref content-type 'boundary)))) ((text) (string-trim-both (decode-body body (assoc-ref headers 'content-transfer-encoding) (assoc-ref content-type 'charset)))) (else (decode-body body (assoc-ref headers 'content-transfer-encoding)))))))) (define (add-default-headers headers) ;; Default Content-Type and Content-Transfer-Encoding headers as ;; specified in RFC2045 (let ((default-headers (acons* 'content-type '((type . text) (subtype . plain) ;; UTF-8 is specified as the default ;; charset in RFC6657 (charset . "utf-8")) 'content-transfer-encoding '#{7bit}#))) (append (alist-delete* (append (if (assoc-ref headers 'content-type) (list 'content-type) (list)) (if (assoc-ref headers 'content-transfer-encoding) (list 'content-transfer-encoding) (list))) default-headers) headers))) (define (add-default-mime-entity-headers parent-headers headers) ;; Default Content-Type and Content-Transfer-Encoding headers as ;; specified in RFC2045 and RFC2046 (let* ((parent-content-type (assoc-ref parent-headers 'content-type)) (default-headers (acons* 'content-type `(,@(if (and (eq? (assoc-ref parent-content-type 'type) 'multipart) (eq? (assoc-ref parent-content-type 'subtype) 'digest)) '((type . message) (subtype . rfc822)) '((type . text) (subtype . plain))) ;; UTF-8 is specified as the default ;; charset in RFC6657 (charset . "utf-8")) 'content-transfer-encoding '#{7bit}#))) (append (alist-delete* (append (if (assoc-ref headers 'content-type) (list 'content-type) (list)) (if (assoc-ref headers 'content-transfer-encoding) (list 'content-transfer-encoding) (list))) default-headers) headers))) (define (parse-mime-entity parent-headers bv) (let-values (((headers body) (email->headers+body bv))) (let ((headers (pre-post-order (peg:tree (match-pattern mime-entity-fields headers)) `((content . ,post-process-content-type) (encoding . ,post-process-content-transfer-encoding) (disposition . ,post-process-content-disposition) (optional-field . ,post-process-optional-field) (mime-entity-fields . ,(lambda (_ . mime-entity-fields) (add-default-mime-entity-headers parent-headers (map (match-lambda ((mime-entity-field value) (cons mime-entity-field value)) ((mime-entity-field . values) (cons mime-entity-field values))) mime-entity-fields)))) (*text* . ,(match-lambda* ;; Handle MIME entities that have no ;; headers. ((_ 'mime-entity-fields) (add-default-mime-entity-headers parent-headers '())) ((_ text) text))) (*default* . ,(lambda tree tree)))))) (make-mime-entity headers (parse-email-body headers body))))) (define (parse-email email) "Parse string EMAIL and return result as an record." (match email ((? string? email) (parse-email (string->bytevector email "utf-8"))) ((? bytevector? email) (let-values (((headers body) (email->headers+body email))) (let ((headers-alist (parse-email-headers headers))) (make-email headers-alist (parse-email-body headers-alist body))))))) (define (parse-email-headers headers) "Parse string HEADERS as email headers and return an association list of header keys and values." (pre-post-order (peg:tree (match-pattern fields headers)) `((date-time . ,(lambda node (match-let ((`((day ,day) (month ,month) (year ,year) (hours ,hours) (minutes ,minutes) (seconds ,seconds) (zone-sign ,zone-sign) (zone-hours ,zone-hours) (zone-minutes ,zone-minutes)) (flatten-and-filter '(day month year hours minutes seconds zone-sign zone-hours zone-minutes) node))) (make-date 0 (string->number seconds) (string->number minutes) (string->number hours) (string->number day) (1+ (list-index (cut equal? <> month) (list "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))) (string->number year) (* (case (string->symbol zone-sign) ((+) 1) ((-) -1)) (+ (* 60 60 (string->number zone-hours)) (* 60 (string->number zone-minutes)))))))) (orig-date . ,(lambda (_ date) (list 'date date))) (mailbox . ,post-process-mailbox) (address-list *macro* . ,macro-process-address-list) (mailbox-list *macro* . ,macro-process-address-list) (optional-field . ,post-process-optional-field) (msg-id . ,(match-lambda* (`(msg-id ,msg-id) msg-id))) (content . ,post-process-content-type) (encoding . ,post-process-content-transfer-encoding) (disposition . ,post-process-content-disposition) (subject . ,(match-lambda* (`(subject ,subject) `(subject ,(decode-mime-encoded-word subject))))) (fields . ,(lambda (_ . fields) (add-default-headers (filter-map (match-lambda ((field value) (cons field value)) ((field . values) (cons field values)) (_ #f)) fields)))) (*text* . ,(lambda (_ value) value)) (*default* . ,(lambda tree tree))))) (define* (decode-body body encoding #:optional charset) (let ((decoded-octets (case encoding ((base64) (base64-decode (string-filter (char-set-union (ucs-range->char-set (char->integer #\a) (1+ (char->integer #\z))) (ucs-range->char-set (char->integer #\A) (1+ (char->integer #\Z))) (ucs-range->char-set (char->integer #\0) (1+ (char->integer #\9))) (char-set #\+ #\/ #\=)) (bytevector->string body "us-ascii")))) ((quoted-printable) (quoted-printable-decode (bytevector->string body "us-ascii"))) ((#{7bit}# #{8bit}# binary) body) (else (error "Body decoding failed. Unknown encoding" encoding))))) (if charset (bytevector->string decoded-octets charset) decoded-octets))) (define (read-next-email-in-mbox port) "Read next email from PORT and return as a bytevector. PORT is an input port reading an mbox file." ;; Read and discard From_ line (get-line-with-delimiter port) ;; Read the actual email (let ((email (read-bytes-till port (string->bytevector "\nFrom " "us-ascii")))) (read-char port) email)) (define (mbox->emails port) "Read all emails from PORT and return as a list of bytevectors. PORT is an input port reading an mbox file." (read-objects read-next-email-in-mbox port))