;;; guile-email --- Guile email parser
;;; Copyright © 2017 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2018, 2019, 2020, 2021, 2023, 2025 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2023 Andrew Whatson <whatson@tailcall.au>
;;;
;;; This file was adapted from guile-debbugs and is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email. If not, see
;;; <http://www.gnu.org/licenses/>.
;;;
;;; This is an adaptation of "ext/rfc/test.scm" from Gauche Scheme.
;;; The file is under the BSD-3 license, reproduced below:
;;;
;;; Copyright (c) 2000-2017 Shiro Kawai <shiro@acm.org>
;;;
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;;
;;; 1. Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;;
;;; 2. Redistributions in binary form must reproduce the above copyright
;;; notice, this list of conditions and the following disclaimer in the
;;; documentation and/or other materials provided with the distribution.
;;;
;;; 3. Neither the name of the authors nor the names of its contributors
;;; may be used to endorse or promote products derived from this
;;; software without specific prior written permission.
;;;
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(use-modules (email email)
(ice-9 binary-ports)
(ice-9 iconv)
(srfi srfi-1)
(srfi srfi-19)
(srfi srfi-64))
(define (alist=? headers1 headers2)
(lset= equal? headers1 headers2))
(define (body=? body1 body2)
(cond
((and (list? body1) (list? body2))
(every mime-entity=? body1 body2))
((and (string? body1) (string? body2))
(string=? body1 body2))
(else #f)))
(define (email=? email1 email2)
(and (alist=? (email-headers email1)
(email-headers email2))
(body=? (email-body email1)
(email-body email2))))
(define (mime-entity=? entity1 entity2)
(and (alist=? (mime-entity-headers entity1)
(mime-entity-headers entity2))
(body=? (mime-entity-body entity1)
(mime-entity-body entity2))))
(define-syntax-rule (test-alist= test-name actual expected)
(test-assert test-name
(alist=? actual expected)))
(define-syntax-rule (test-email= test-name actual expected)
(test-assert test-name
(email=? actual expected)))
(test-begin "email")
;;;
;;; Emails
;;;
(test-alist= "parse email headers"
(parse-email-headers
"Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST)
Received: from ooo.ooo.com (ooo.ooo.com [1.2.3.4])
\tby foo.bar.com (9.9.9+3.2W/3.7W-) with ESMTP id ZZZ55555
\tfor <yoo@bar.com>; Thu, 31 May 2001 16:38:02 -1000 (HST)
Received: from zzz ([1.2.3.5]) by ooo.ooo.com with Maccrosoft SMTPSVC(5.5.1877.197.19);
\t Thu, 31 May 2001 22:33:16 -0400
Message-ID: <beefbeefbeefbeef@ooo.ooo.com>
Subject: Bogus Tester
From: Bogus Sender <bogus@ooo.com>
To: You <you@bar.com>, Another <another@ooo.com>
Date: Fri, 01 Jun 2001 02:37:31 +0530
Mime-Version: 1.0
Content-Type: text/html
Content-Transfer-Encoding: quoted-printable
X-MSMail-Priority: Normal
X-mailer: FooMail 4.0 4.03 (SMT460B92F)
Content-Length: 4349
")
`((trace (received "by foo.bar.com id ZZZ55555"
,(make-date 0 4 38 16 31 5 2001 -36000))
(received "from ooo.ooo.com by foo.bar.com with ESMTP id ZZZ55555 for <yoo@bar.com>"
,(make-date 0 2 38 16 31 5 2001 -36000))
(received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC"
,(make-date 0 16 33 22 31 5 2001 -14400)))
(message-id . "beefbeefbeefbeef@ooo.ooo.com")
(subject . "Bogus Tester")
(from ((name . "Bogus Sender")
(address . "bogus@ooo.com")))
(to ((name . "You")
(address . "you@bar.com"))
((name . "Another")
(address . "another@ooo.com")))
(date . ,(make-date 0 31 37 2 1 6 2001 19800))
(mime-version . "1.0")
(content-type (type . text)
(subtype . html)
(charset . "utf-8"))
(content-transfer-encoding . quoted-printable)
(x-msmail-priority . "Normal")
(x-mailer . "FooMail 4.0 4.03 (SMT460B92F)")
(content-length . "4349")))
(test-email= "RFC5322 A.1.1. A message from one person to another with simple addressing"
(parse-email
"From: John Doe <jdoe@machine.example>
Sender: Michael Jones <mjones@machine.example>
To: Mary Smith <mary@example.net>
Subject: Saying Hello
Date: Fri, 21 Nov 1997 09:55:06 -0600
Message-ID: <1234@local.machine.example>
This is a message just to say hello.
So, \"Hello\".")
(make-email
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)
(from ((name . "John Doe")
(address . "jdoe@machine.example")))
(sender (name . "Michael Jones")
(address . "mjones@machine.example"))
(to ((name . "Mary Smith")
(address . "mary@example.net")))
(subject . "Saying Hello")
(date . ,(make-date 0 6 55 9 21 11 1997 -21600))
(message-id . "1234@local.machine.example"))
"This is a message just to say hello.
So, \"Hello\"."))
(test-email= "RFC5322 A.1.2. Different types of mailboxes"
(parse-email
"From: \"Joe Q. Public\" <john.q.public@example.com>
To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
Cc: <boss@nil.test>, \"Giant; \\\"Big\\\" Box\" <sysservices@example.net>
Date: Tue, 1 Jul 2003 10:52:37 +0200
Message-ID: <5678.21-Nov-1997@example.com>
Hi everyone.")
(make-email
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)
(from ((name . "Joe Q. Public")
(address . "john.q.public@example.com")))
(to ((name . "Mary Smith")
(address . "mary@x.test"))
((address . "jdoe@example.org"))
((name . "Who?")
(address . "one@y.test")))
(cc ((address . "boss@nil.test"))
((name . "Giant; \"Big\" Box")
(address . "sysservices@example.net")))
(date . ,(make-date 0 37 52 10 1 7 2003 7200))
(message-id . "5678.21-Nov-1997@example.com"))
"Hi everyone."))
(test-email= "RFC5322 A.6.1. Obsolete addressing"
(parse-email
(string->bytevector
"From: Joe Q. Public <john.q.public@example.com>
To: Mary Smith <@node.test:mary@example.net>, , jdoe@test . example
Date: Tue, 1 Jul 2003 10:52:37 +0200
Message-ID: <5678.21-Nov-1997@example.com>
Hi everyone.
"
"utf-8"))
(make-email
`((from ((name . "Joe Q. Public")
(address . "john.q.public@example.com")))
(to ((name . "Mary Smith")
(address . "mary@example.net"))
((address . "jdoe@test . example")))
(date . ,(make-date 0 37 52 10 1 7 2003 7200))
(message-id . "5678.21-Nov-1997@example.com")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit))
"Hi everyone."))
(test-email= "RFC5322 A.6.2. Obsolete dates"
(parse-email
(string->bytevector
"From: John Doe <jdoe@machine.example>
To: Mary Smith <mary@example.net>
Subject: Saying Hello
Date: 21 Nov 97 09:55:06 GMT
Message-ID: <1234@local.machine.example>
This is a message just to say hello.
So, \"Hello\".
"
"utf-8"))
(make-email
`((from ((name . "John Doe")
(address . "jdoe@machine.example")))
(to ((name . "Mary Smith")
(address . "mary@example.net")))
(subject . "Saying Hello")
(date . ,(make-date 0 6 55 9 21 11 1997 0))
(message-id . "1234@local.machine.example")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit))
"This is a message just to say hello.
So, \"Hello\"."))
(test-email= "RFC5322 A.6.3. Obsolete white space and comments"
(parse-email
(string->bytevector
"From : John Doe <jdoe@machine(comment). example>
To : Mary Smith
<mary@example.net>
Subject : Saying Hello
Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600
Message-ID : <1234 @ local(blah) .machine .example>
This is a message just to say hello.
So, \"Hello\".
"
"utf-8"))
(make-email
`((from ((name . "John Doe")
(address . "jdoe@machine.example")))
(to ((name . "Mary Smith")
(address . "mary@example.net")))
(subject . "Saying Hello")
(date . ,(make-date 0 6 55 9 21 11 1997 -21600))
(message-id . "1234@local.machine.example")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit))
"This is a message just to say hello.
So, \"Hello\"."))
(test-email= "RFC2046 5.1.1. Common syntax"
(parse-email
(string->bytevector
"From: Nathaniel Borenstein <nsb@bellcore.com>
To: Ned Freed <ned@innosoft.com>
Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
Subject: Sample message
MIME-Version: 1.0
Content-type: multipart/mixed; boundary=\"simple boundary\"
This is the preamble. It is to be ignored, though it
is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers.
--simple boundary
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
This is the epilogue. It is also to be ignored.
" "utf-8"))
(make-email
`((content-transfer-encoding . 7bit)
(from ((name . "Nathaniel Borenstein")
(address . "nsb@bellcore.com")))
(to ((name . "Ned Freed")
(address . "ned@innosoft.com")))
(date . ,(make-date 0 48 56 23 21 3 1993 -28800))
(subject . "Sample message")
(mime-version . "1.0")
(content-type (type . multipart)
(subtype . mixed)
(boundary . "simple boundary")))
(list (make-mime-entity '((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit))
"This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.")
(make-mime-entity '((content-transfer-encoding . 7bit)
(content-type (type . text)
(subtype . plain)
(charset . "us-ascii")))
"This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak."))))
(test-equal "decode MIME entity without headers"
((module-ref (resolve-module '(email email))
'parse-mime-entity)
'((content-type (type . multipart)
(subtype . mixed)))
(string->bytevector
"
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
" "utf-8"))
(make-mime-entity '((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit))
"This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak."))
(test-email= "email with 8 bit encoding and non UTF-8 charset"
(call-with-input-file "tests/email-with-8bit-encoding-and-non-utf8-charset"
(compose parse-email get-bytevector-all))
(make-email
`((from ((name . "John Doe")
(address . "jdoe@machine.example")))
(to ((name . "Mary Smith")
(address . "mary@example.net")))
(subject . "Saying Hello")
(date . ,(make-date 0 6 55 9 21 11 1997 -21600))
(message-id . "1234@local.machine.example")
(content-type (type . text)
(subtype . plain)
(charset . "ISO-8859-7"))
(content-transfer-encoding . 8bit))
"Hello Foo’."))
(test-email= "multipart email with a 8 bit encoding and non UTF-8 charset part"
(call-with-input-file "tests/multipart-email-with-a-8bit-encoding-and-non-utf8-charset-part"
(compose parse-email get-bytevector-all))
(make-email
`((content-transfer-encoding . 7bit)
(from ((name . "John Doe")
(address . "jdoe@machine.example")))
(to ((name . "Mary Smith")
(address . "mary@example.net")))
(subject . "Saying Hello")
(date . ,(make-date 0 6 55 9 21 11 1997 -21600))
(message-id . "1234@local.machine.example")
(content-type (type . multipart)
(subtype . mixed)
(boundary . "boundary")))
(list (make-mime-entity
`((content-type (type . text)
(subtype . plain)
(charset . "ISO-8859-7"))
(content-transfer-encoding . 8bit))
"Hello Foo’."))))
(test-equal "handle truncated multipart message gracefully"
((module-ref (resolve-module '(email email))
'body->mime-entities)
(string->bytevector
"--boundary
Content-Type: text/plain
foo
" "utf-8")
"boundary")
(list (string->bytevector "Content-Type: text/plain
foo
" "utf-8")))
(test-email= "decode utf-8 characters in headers"
(parse-email
(string->bytevector
"From: foo@bar.org (Foo Bãr)
body" "utf-8"))
(make-email
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)
(from ((name . "Foo Bãr")
(address . "foo@bar.org"))))
"body"))
(test-email= "tolerate non-ascii non-utf-8 characters in headers"
(parse-email
(string->bytevector
"From: foo@bar.org (Foo Bãr)
body" "iso-8859-1"))
(make-email
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)
(from ((name . "Foo B�r")
(address . "foo@bar.org"))))
"body"))
(test-alist= "tolerate invalid charset"
(parse-email-headers
"Content-Type: text/plain; charset=foo
")
`((content-transfer-encoding . 7bit)
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))))
(test-email= "tolerate decoding errors in body"
(parse-email
"Content-Transfer-Encoding: quoted-printable
copyright =A9")
(make-email
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . quoted-printable))
"copyright �"))
(test-alist= "Keywords header must be a list"
(parse-email-headers
"Keywords: foo, bar
")
`((keywords " foo" " bar")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-alist= "blank Subject header must be treated as having the null string as value"
(parse-email-headers
"Subject:
")
'((subject . "")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-expect-fail
"References header with only one reference must be a singleton list, not a string")
(test-alist= "References header with only one reference must be a singleton list, not a string"
(parse-email-headers
"References: <foo@bar.org>
")
'((references . ("foo@bar.org"))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-expect-fail
"Trace with only one Received header should be a list of received traces, not a single received trace")
(test-alist= "Trace with only one Received header should be a list of received traces, not a single received trace"
(parse-email-headers
"Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST)
")
`((trace (received "by foo.bar.com id ZZZ55555"
,(make-date 0 4 38 16 31 5 2001 -36000)))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-alist= "parse obsolete Received header"
(parse-email-headers
"Received: by foo.bar.com id ZZZ55555
Received: from zzz ([1.2.3.5]) by ooo.ooo.com with Maccrosoft SMTPSVC(5.5.1877.197.19)
")
'((trace (received "by foo.bar.com id ZZZ55555")
(received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC"))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-alist= "parse names with more than two words"
(parse-email-headers
"From: Foo Bar Foobar <foo@bar.org>
")
`((from ((name . "Foo Bar Foobar")
(address . "foo@bar.org")))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
;; See §6.4 in RFC2045.
(test-alist= "assume application/octet-stream Content-Type if Content-Transfer-Encoding is unrecognized"
(parse-email-headers
"Content-Transfer-Encoding: some-unrecognized-encoding
Content-Type: text/plain; charset=utf-8
")
`((content-type (type . application)
(subtype . octet-stream))
(content-transfer-encoding . binary)))
(test-expect-fail
"Trace with only one Received header must be a list of lists, not a list")
(test-alist= "Trace with only one Received header must be a list of lists, not a list"
(parse-email-headers
"Received: from foo; Sun, 24 Jan 2021 13:45:20 -0500
")
`((trace (received "from foo"
,(make-date 0 20 45 13 24 1 2021 -18000)))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
;; TODO: Fix this test once previous test is addressed.
(test-alist= "parse Received header with two tokens but no timestamp"
(parse-email-headers
"Received: from foo
")
`((trace received "from foo")
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
(test-alist= "parse Date without seconds"
(parse-email-headers
"Date: Tue, 22 Nov 94 17:52 GMT
")
`((date . ,(make-date 0 0 52 17 22 11 1994 0))
(content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)))
;;;
;;; Email addresses
;;;
(test-equal "parse name-addr email address"
(parse-email-address "Foo <foo@example.org>")
'((name . "Foo") (address . "foo@example.org")))
(test-equal "parse addr-spec email address"
(parse-email-address "foo@example.org")
'((address . "foo@example.org")))
(test-equal "parse emacs message mode parens style email address"
(parse-email-address "foo@example.org (Foo)")
'((name . "Foo") (address . "foo@example.org")))
(test-equal "parse email addresses with period in name"
(parse-email-address "Foo P. Bar <foo@example.com>")
'((name . "Foo P. Bar") (address . "foo@example.com")))
(test-equal "tolerate email addresses with parentheses in name"
(parse-email-address "Foo(Bar <foo@example.com>")
'((name . "Foo(Bar") (address . "foo@example.com")))
(test-equal "tolerate email addresses with square brackets in name"
(parse-email-address "Foo [Bar] <foo@example.com>")
'((name . "Foo [Bar]") (address . "foo@example.com")))
(test-equal "quote display-name when serializing email addresses with illegal characters"
"\"Foo [Bar]\" <foo@example.com>"
(interpret-address '((name . "Foo [Bar]") (address . "foo@example.com"))))
(test-equal "do not quote display-name when serializing email addresses without illegal characters"
"FooBar <foo@example.com>"
(interpret-address '((name . "FooBar") (address . "foo@example.com"))))
;;;
;;; MIME encoded words
;;;
(test-equal "decode MIME encoded word: wikipedia example"
((module-ref (resolve-module '(email email))
'decode-mime-encoded-word)
"=?iso-8859-1?Q?=A1Hola,_se=F1or!?=")
"¡Hola, señor!")
(test-equal "decode MIME encoded phrases that mix ASCII text and MIME encoded words"
((module-ref (resolve-module '(email email))
'decode-mime-encoded-word)
"Foo =?UTF-8?Q?B=C3=A3r?=")
"Foo Bãr")
(test-equal "decode MIME encoded phrases that contain multiple MIME encoded words each with their own encoding"
((module-ref (resolve-module '(email email))
'decode-mime-encoded-word)
"=?iso-8859-1?Q?=A1Hola,_se=F1or!?= =?UTF-8?Q?B=C3=A3r?=")
"¡Hola, señor! Bãr")
(test-alist= "decode MIME encoded words in Subject header"
(parse-email-headers "Subject: Foo =?UTF-8?Q?B=C3=A3r?=
")
`((content-type (type . text)
(subtype . plain)
(charset . "utf-8"))
(content-transfer-encoding . 7bit)
(subject . "Foo Bãr")))
(test-equal "tolerate decoding errors in MIME encoded words"
((module-ref (resolve-module '(email email))
'decode-mime-encoded-word)
"=?UTF-8?Q?B=E8r?=")
"B�r")
(test-end "email")