aboutsummaryrefslogtreecommitdiff
;;; guile-email --- Guile email parser
;;; Copyright © 2017 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2018, 2019, 2020, 2021, 2023 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2023 Andrew Whatson <whatson@tailcall.au>
;;;
;;; This file was adapted from guile-debbugs and is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.
;;;
;;; This is an adaptation of "ext/rfc/test.scm" from Gauche Scheme.
;;; The file is under the BSD-3 license, reproduced below:
;;;
;;;   Copyright (c) 2000-2017  Shiro Kawai  <shiro@acm.org>
;;;
;;;   Redistribution and use in source and binary forms, with or without
;;;   modification, are permitted provided that the following conditions
;;;   are met:
;;;
;;;    1. Redistributions of source code must retain the above copyright
;;;       notice, this list of conditions and the following disclaimer.
;;;
;;;    2. Redistributions in binary form must reproduce the above copyright
;;;       notice, this list of conditions and the following disclaimer in the
;;;       documentation and/or other materials provided with the distribution.
;;;
;;;    3. Neither the name of the authors nor the names of its contributors
;;;       may be used to endorse or promote products derived from this
;;;       software without specific prior written permission.
;;;
;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

(use-modules (email email)
             (ice-9 binary-ports)
             (ice-9 iconv)
             (srfi srfi-1)
             (srfi srfi-19)
             (srfi srfi-64))

(define (alist=? headers1 headers2)
  (lset= equal? headers1 headers2))

(define (body=? body1 body2)
  (cond
   ((and (list? body1) (list? body2))
    (every mime-entity=? body1 body2))
   ((and (string? body1) (string? body2))
    (string=? body1 body2))
   (else #f)))

(define (email=? email1 email2)
  (and (alist=? (email-headers email1)
                (email-headers email2))
       (body=? (email-body email1)
               (email-body email2))))

(define (mime-entity=? entity1 entity2)
  (and (alist=? (mime-entity-headers entity1)
                (mime-entity-headers entity2))
       (body=? (mime-entity-body entity1)
               (mime-entity-body entity2))))

(define-syntax-rule (test-alist= test-name actual expected)
  (test-assert test-name
    (alist=? actual expected)))

(define-syntax-rule (test-email= test-name actual expected)
  (test-assert test-name
    (email=? actual expected)))

(test-begin "email")


;;;
;;; Emails
;;;

(test-alist= "parse email headers"
  (parse-email-headers
   "Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST)
Received: from ooo.ooo.com (ooo.ooo.com [1.2.3.4])
\tby foo.bar.com (9.9.9+3.2W/3.7W-) with ESMTP id ZZZ55555
\tfor <yoo@bar.com>; Thu, 31 May 2001 16:38:02 -1000 (HST)
Received: from zzz ([1.2.3.5]) by ooo.ooo.com  with Maccrosoft SMTPSVC(5.5.1877.197.19);
\t Thu, 31 May 2001 22:33:16 -0400
Message-ID: <beefbeefbeefbeef@ooo.ooo.com>
Subject: Bogus Tester
From: Bogus Sender <bogus@ooo.com>
To: You <you@bar.com>, Another <another@ooo.com>
Date: Fri, 01 Jun 2001 02:37:31 +0530
Mime-Version: 1.0
Content-Type: text/html
Content-Transfer-Encoding: quoted-printable
X-MSMail-Priority: Normal
X-mailer: FooMail 4.0 4.03 (SMT460B92F)
Content-Length: 4349
")
  `((trace (received "by foo.bar.com id ZZZ55555"
                     ,(make-date 0 4 38 16 31 5 2001 -36000))
           (received "from ooo.ooo.com by foo.bar.com with ESMTP id ZZZ55555 for <yoo@bar.com>"
                     ,(make-date 0 2 38 16 31 5 2001 -36000))
           (received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC"
                     ,(make-date 0 16 33 22 31 5 2001 -14400)))
    (message-id . "beefbeefbeefbeef@ooo.ooo.com")
    (subject . "Bogus Tester")
    (from ((name . "Bogus Sender")
           (address . "bogus@ooo.com")))
    (to ((name . "You")
         (address . "you@bar.com"))
        ((name . "Another")
         (address . "another@ooo.com")))
    (date . ,(make-date 0 31 37 2 1 6 2001 19800))
    (mime-version . "1.0")
    (content-type (type . text)
                  (subtype . html)
                  (charset . "utf-8"))
    (content-transfer-encoding . quoted-printable)
    (x-msmail-priority . "Normal")
    (x-mailer . "FooMail 4.0 4.03 (SMT460B92F)")
    (content-length . "4349")))

(test-email= "RFC5322 A.1.1. A message from one person to another with simple addressing"
  (parse-email
   "From: John Doe <jdoe@machine.example>
Sender: Michael Jones <mjones@machine.example>
To: Mary Smith <mary@example.net>
Subject: Saying Hello
Date: Fri, 21 Nov 1997 09:55:06 -0600
Message-ID: <1234@local.machine.example>

This is a message just to say hello.
So, \"Hello\".")
  (make-email
   `((content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit)
     (from ((name . "John Doe")
            (address . "jdoe@machine.example")))
     (sender (name . "Michael Jones")
             (address . "mjones@machine.example"))
     (to ((name . "Mary Smith")
          (address . "mary@example.net")))
     (subject . "Saying Hello")
     (date . ,(make-date 0 6 55 9 21 11 1997 -21600))
     (message-id . "1234@local.machine.example"))
   "This is a message just to say hello.
So, \"Hello\"."))

(test-email= "RFC5322 A.1.2. Different types of mailboxes"
  (parse-email
   "From: \"Joe Q. Public\" <john.q.public@example.com>
To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
Cc: <boss@nil.test>, \"Giant; \\\"Big\\\" Box\" <sysservices@example.net>
Date: Tue, 1 Jul 2003 10:52:37 +0200
Message-ID: <5678.21-Nov-1997@example.com>

Hi everyone.")
  (make-email
   `((content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit)
     (from ((name . "Joe Q. Public")
            (address . "john.q.public@example.com")))
     (to ((name . "Mary Smith")
          (address . "mary@x.test"))
         ((address . "jdoe@example.org"))
         ((name . "Who?")
          (address . "one@y.test")))
     (cc ((address . "boss@nil.test"))
         ((name . "Giant; \"Big\" Box")
          (address . "sysservices@example.net")))
     (date . ,(make-date 0 37 52 10 1 7 2003 7200))
     (message-id . "5678.21-Nov-1997@example.com"))
   "Hi everyone."))

(test-email= "RFC5322 A.6.1. Obsolete addressing"
  (parse-email
   (string->bytevector
    "From: Joe Q. Public <john.q.public@example.com>
To: Mary Smith <@node.test:mary@example.net>, , jdoe@test  . example
Date: Tue, 1 Jul 2003 10:52:37 +0200
Message-ID: <5678.21-Nov-1997@example.com>

Hi everyone.
"
    "utf-8"))
  (make-email
   `((from ((name . "Joe Q. Public")
            (address . "john.q.public@example.com")))
     (to ((name . "Mary Smith")
          (address . "mary@example.net"))
         ((address . "jdoe@test  . example")))
     (date . ,(make-date 0 37 52 10 1 7 2003 7200))
     (message-id . "5678.21-Nov-1997@example.com")
     (content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit))
   "Hi everyone."))

(test-email= "RFC5322 A.6.2. Obsolete dates"
  (parse-email
   (string->bytevector
    "From: John Doe <jdoe@machine.example>
To: Mary Smith <mary@example.net>
Subject: Saying Hello
Date: 21 Nov 97 09:55:06 GMT
Message-ID: <1234@local.machine.example>

This is a message just to say hello.
So, \"Hello\".
"
    "utf-8"))
  (make-email
   `((from ((name . "John Doe")
            (address . "jdoe@machine.example")))
     (to ((name . "Mary Smith")
          (address . "mary@example.net")))
     (subject . "Saying Hello")
     (date . ,(make-date 0 6 55 9 21 11 1997 0))
     (message-id . "1234@local.machine.example")
     (content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit))
   "This is a message just to say hello.
So, \"Hello\"."))

(test-email= "RFC5322 A.6.3. Obsolete white space and comments"
  (parse-email
   (string->bytevector
    "From  : John Doe <jdoe@machine(comment).  example>
To    : Mary Smith
  
          <mary@example.net>
Subject     : Saying Hello
Date  : Fri, 21 Nov 1997 09(comment):   55  :  06 -0600
Message-ID  : <1234   @   local(blah)  .machine .example>

This is a message just to say hello.
So, \"Hello\".
"
    "utf-8"))
  (make-email
   `((from ((name . "John Doe")
            (address . "jdoe@machine.example")))
     (to ((name . "Mary Smith")
          (address . "mary@example.net")))
     (subject . "Saying Hello")
     (date . ,(make-date 0 6 55 9 21 11 1997 -21600))
     (message-id . "1234@local.machine.example")
     (content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit))
   "This is a message just to say hello.
So, \"Hello\"."))

(test-email= "RFC2046 5.1.1. Common syntax"
  (parse-email
   (string->bytevector
    "From: Nathaniel Borenstein <nsb@bellcore.com>
To: Ned Freed <ned@innosoft.com>
Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
Subject: Sample message
MIME-Version: 1.0
Content-type: multipart/mixed; boundary=\"simple boundary\"

This is the preamble.  It is to be ignored, though it
is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers.

--simple boundary

This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii

This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.

--simple boundary--

This is the epilogue.  It is also to be ignored.
" "utf-8"))
  (make-email
   `((content-transfer-encoding . 7bit)
     (from ((name . "Nathaniel Borenstein")
            (address . "nsb@bellcore.com")))
     (to ((name . "Ned Freed")
          (address . "ned@innosoft.com")))
     (date . ,(make-date 0 48 56 23 21 3 1993 -28800))
     (subject . "Sample message")
     (mime-version . "1.0")
     (content-type (type . multipart)
                   (subtype . mixed)
                   (boundary . "simple boundary")))
   (list (make-mime-entity '((content-type (type . text)
                                           (subtype . plain)
                                           (charset . "utf-8"))
                             (content-transfer-encoding . 7bit))
                           "This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.")
         (make-mime-entity '((content-transfer-encoding . 7bit)
                             (content-type (type . text)
                                           (subtype . plain)
                                           (charset . "us-ascii")))
                           "This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak."))))

(test-equal "decode MIME entity without headers"
  ((module-ref (resolve-module '(email email))
               'parse-mime-entity)
   '((content-type (type . multipart)
                   (subtype . mixed)))
   (string->bytevector
    "
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
" "utf-8"))
  (make-mime-entity '((content-type (type . text)
                                    (subtype . plain)
                                    (charset . "utf-8"))
                      (content-transfer-encoding . 7bit))
                    "This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak."))

(test-email= "email with 8 bit encoding and non UTF-8 charset"
  (call-with-input-file "tests/email-with-8bit-encoding-and-non-utf8-charset"
    (compose parse-email get-bytevector-all))
  (make-email
   `((from ((name . "John Doe")
            (address . "jdoe@machine.example")))
     (to ((name . "Mary Smith")
          (address . "mary@example.net")))
     (subject . "Saying Hello")
     (date . ,(make-date 0 6 55 9 21 11 1997 -21600))
     (message-id . "1234@local.machine.example")
     (content-type (type . text)
                   (subtype . plain)
                   (charset . "ISO-8859-7"))
     (content-transfer-encoding . 8bit))
   "Hello Foo’."))

(test-email= "multipart email with a 8 bit encoding and non UTF-8 charset part"
  (call-with-input-file "tests/multipart-email-with-a-8bit-encoding-and-non-utf8-charset-part"
    (compose parse-email get-bytevector-all))
  (make-email
   `((content-transfer-encoding . 7bit)
     (from ((name . "John Doe")
            (address . "jdoe@machine.example")))
     (to ((name . "Mary Smith")
          (address . "mary@example.net")))
     (subject . "Saying Hello")
     (date . ,(make-date 0 6 55 9 21 11 1997 -21600))
     (message-id . "1234@local.machine.example")
     (content-type (type . multipart)
                   (subtype . mixed)
                   (boundary . "boundary")))
   (list (make-mime-entity
          `((content-type (type . text)
                          (subtype . plain)
                          (charset . "ISO-8859-7"))
            (content-transfer-encoding . 8bit))
          "Hello Foo’."))))

(test-equal "handle truncated multipart message gracefully"
  ((module-ref (resolve-module '(email email))
               'body->mime-entities)
   (string->bytevector
    "--boundary
Content-Type: text/plain

foo
" "utf-8")
   "boundary")
  (list (string->bytevector "Content-Type: text/plain

foo
" "utf-8")))

(test-email= "decode utf-8 characters in headers"
  (parse-email
   (string->bytevector
    "From: foo@bar.org (Foo Bãr)

body" "utf-8"))
  (make-email
   `((content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit)
     (from ((name . "Foo Bãr")
            (address . "foo@bar.org"))))
   "body"))

(test-email= "tolerate non-ascii non-utf-8 characters in headers"
  (parse-email
   (string->bytevector
    "From: foo@bar.org (Foo Bãr)

body" "iso-8859-1"))
  (make-email
   `((content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . 7bit)
     (from ((name . "Foo B�r")
            (address . "foo@bar.org"))))
   "body"))

(test-alist= "tolerate invalid charset"
  (parse-email-headers
   "Content-Type: text/plain; charset=foo
")
  `((content-transfer-encoding . 7bit)
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))))

(test-email= "tolerate decoding errors in body"
  (parse-email
   "Content-Transfer-Encoding: quoted-printable

copyright =A9")
  (make-email
   `((content-type (type . text)
                   (subtype . plain)
                   (charset . "utf-8"))
     (content-transfer-encoding . quoted-printable))
   "copyright �"))

(test-alist= "Keywords header must be a list"
  (parse-email-headers
   "Keywords: foo, bar
")
  `((keywords " foo" " bar")
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-alist= "blank Subject header must be treated as having the null string as value"
  (parse-email-headers
   "Subject:
")
  '((subject . "")
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-expect-fail
 "References header with only one reference must be a singleton list, not a string")

(test-alist= "References header with only one reference must be a singleton list, not a string"
  (parse-email-headers
   "References: <foo@bar.org>
")
  '((references . ("foo@bar.org"))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-expect-fail
 "Trace with only one Received header should be a list of received traces, not a single received trace")

(test-alist= "Trace with only one Received header should be a list of received traces, not a single received trace"
  (parse-email-headers
   "Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST)
")
  `((trace (received "by foo.bar.com id ZZZ55555"
                     ,(make-date 0 4 38 16 31 5 2001 -36000)))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-alist= "parse obsolete Received header"
  (parse-email-headers
   "Received: by foo.bar.com id ZZZ55555
Received: from zzz ([1.2.3.5]) by ooo.ooo.com  with Maccrosoft SMTPSVC(5.5.1877.197.19)
")
  '((trace (received "by foo.bar.com id ZZZ55555")
           (received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC"))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-alist= "parse names with more than two words"
  (parse-email-headers
   "From: Foo Bar Foobar <foo@bar.org>
")
  `((from ((name . "Foo Bar Foobar")
           (address . "foo@bar.org")))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

;; See §6.4 in RFC2045.
(test-alist= "assume application/octet-stream Content-Type if Content-Transfer-Encoding is unrecognized"
  (parse-email-headers
   "Content-Transfer-Encoding: some-unrecognized-encoding
Content-Type: text/plain; charset=utf-8
")
  `((content-type (type . application)
                  (subtype . octet-stream))
    (content-transfer-encoding . binary)))

(test-expect-fail
 "Trace with only one Received header must be a list of lists, not a list")

(test-alist= "Trace with only one Received header must be a list of lists, not a list"
  (parse-email-headers
   "Received: from foo; Sun, 24 Jan 2021 13:45:20 -0500
")
  `((trace (received "from foo"
                     ,(make-date 0 20 45 13 24 1 2021 -18000)))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

;; TODO: Fix this test once previous test is addressed.
(test-alist= "parse Received header with two tokens but no timestamp"
  (parse-email-headers
   "Received: from foo
")
  `((trace received "from foo")
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))

(test-alist= "parse Date without seconds"
  (parse-email-headers
   "Date: Tue, 22 Nov 94 17:52 GMT
")
  `((date . ,(make-date 0 0 52 17 22 11 1994 0))
    (content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)))


;;;
;;; Email addresses
;;;

(test-equal "parse name-addr email address"
  (parse-email-address "Foo <foo@example.org>")
  '((name . "Foo") (address . "foo@example.org")))

(test-equal "parse addr-spec email address"
  (parse-email-address "foo@example.org")
  '((address . "foo@example.org")))

(test-equal "parse emacs message mode parens style email address"
  (parse-email-address "foo@example.org (Foo)")
  '((name . "Foo") (address . "foo@example.org")))

(test-equal "parse email addresses with period in name"
  (parse-email-address "Foo P. Bar <foo@example.com>")
  '((name . "Foo P. Bar") (address . "foo@example.com")))

(test-equal "tolerate email addresses with parentheses in name"
  (parse-email-address "Foo(Bar <foo@example.com>")
  '((name . "Foo(Bar") (address . "foo@example.com")))


;;;
;;; MIME encoded words
;;;

(test-equal "decode MIME encoded word: wikipedia example"
  ((module-ref (resolve-module '(email email))
               'decode-mime-encoded-word)
   "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=")
  "¡Hola, señor!")

(test-equal "decode MIME encoded phrases that mix ASCII text and MIME encoded words"
  ((module-ref (resolve-module '(email email))
               'decode-mime-encoded-word)
   "Foo =?UTF-8?Q?B=C3=A3r?=")
  "Foo Bãr")

(test-equal "decode MIME encoded phrases that contain multiple MIME encoded words each with their own encoding"
  ((module-ref (resolve-module '(email email))
               'decode-mime-encoded-word)
   "=?iso-8859-1?Q?=A1Hola,_se=F1or!?= =?UTF-8?Q?B=C3=A3r?=")
  "¡Hola, señor! Bãr")

(test-alist= "decode MIME encoded words in Subject header"
  (parse-email-headers "Subject: Foo =?UTF-8?Q?B=C3=A3r?=
")
  `((content-type (type . text)
                  (subtype . plain)
                  (charset . "utf-8"))
    (content-transfer-encoding . 7bit)
    (subject . "Foo Bãr")))

(test-equal "tolerate decoding errors in MIME encoded words"
  ((module-ref (resolve-module '(email email))
               'decode-mime-encoded-word)
   "=?UTF-8?Q?B=E8r?=")
  "B�r")

(test-end "email")