;;; guile-email --- Guile email parser ;;; Copyright © 2017 Ricardo Wurmus ;;; Copyright © 2018, 2019, 2020, 2021, 2023 Arun Isaac ;;; Copyright © 2023 Andrew Whatson ;;; ;;; This file was adapted from guile-debbugs and is part of guile-email. ;;; ;;; guile-email is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU Affero General Public License as ;;; published by the Free Software Foundation; either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; guile-email is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public ;;; License along with guile-email. If not, see ;;; . ;;; ;;; This is an adaptation of "ext/rfc/test.scm" from Gauche Scheme. ;;; The file is under the BSD-3 license, reproduced below: ;;; ;;; Copyright (c) 2000-2017 Shiro Kawai ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; ;;; 1. Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; ;;; 2. Redistributions in binary form must reproduce the above copyright ;;; notice, this list of conditions and the following disclaimer in the ;;; documentation and/or other materials provided with the distribution. ;;; ;;; 3. Neither the name of the authors nor the names of its contributors ;;; may be used to endorse or promote products derived from this ;;; software without specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (use-modules (email email) (ice-9 binary-ports) (ice-9 iconv) (srfi srfi-1) (srfi srfi-19) (srfi srfi-64)) (define (alist=? headers1 headers2) (lset= equal? headers1 headers2)) (define (body=? body1 body2) (cond ((and (list? body1) (list? body2)) (every mime-entity=? body1 body2)) ((and (string? body1) (string? body2)) (string=? body1 body2)) (else #f))) (define (email=? email1 email2) (and (alist=? (email-headers email1) (email-headers email2)) (body=? (email-body email1) (email-body email2)))) (define (mime-entity=? entity1 entity2) (and (alist=? (mime-entity-headers entity1) (mime-entity-headers entity2)) (body=? (mime-entity-body entity1) (mime-entity-body entity2)))) (define-syntax-rule (test-alist= test-name actual expected) (test-assert test-name (alist=? actual expected))) (define-syntax-rule (test-email= test-name actual expected) (test-assert test-name (email=? actual expected))) (test-begin "email") ;;; ;;; Emails ;;; (test-alist= "parse email headers" (parse-email-headers "Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST) Received: from ooo.ooo.com (ooo.ooo.com [1.2.3.4]) \tby foo.bar.com (9.9.9+3.2W/3.7W-) with ESMTP id ZZZ55555 \tfor ; Thu, 31 May 2001 16:38:02 -1000 (HST) Received: from zzz ([1.2.3.5]) by ooo.ooo.com with Maccrosoft SMTPSVC(5.5.1877.197.19); \t Thu, 31 May 2001 22:33:16 -0400 Message-ID: Subject: Bogus Tester From: Bogus Sender To: You , Another Date: Fri, 01 Jun 2001 02:37:31 +0530 Mime-Version: 1.0 Content-Type: text/html Content-Transfer-Encoding: quoted-printable X-MSMail-Priority: Normal X-mailer: FooMail 4.0 4.03 (SMT460B92F) Content-Length: 4349 ") `((trace (received "by foo.bar.com id ZZZ55555" ,(make-date 0 4 38 16 31 5 2001 -36000)) (received "from ooo.ooo.com by foo.bar.com with ESMTP id ZZZ55555 for " ,(make-date 0 2 38 16 31 5 2001 -36000)) (received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC" ,(make-date 0 16 33 22 31 5 2001 -14400))) (message-id . "beefbeefbeefbeef@ooo.ooo.com") (subject . "Bogus Tester") (from ((name . "Bogus Sender") (address . "bogus@ooo.com"))) (to ((name . "You") (address . "you@bar.com")) ((name . "Another") (address . "another@ooo.com"))) (date . ,(make-date 0 31 37 2 1 6 2001 19800)) (mime-version . "1.0") (content-type (type . text) (subtype . html) (charset . "utf-8")) (content-transfer-encoding . quoted-printable) (x-msmail-priority . "Normal") (x-mailer . "FooMail 4.0 4.03 (SMT460B92F)") (content-length . "4349"))) (test-email= "RFC5322 A.1.1. A message from one person to another with simple addressing" (parse-email "From: John Doe Sender: Michael Jones To: Mary Smith Subject: Saying Hello Date: Fri, 21 Nov 1997 09:55:06 -0600 Message-ID: <1234@local.machine.example> This is a message just to say hello. So, \"Hello\".") (make-email `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit) (from ((name . "John Doe") (address . "jdoe@machine.example"))) (sender (name . "Michael Jones") (address . "mjones@machine.example")) (to ((name . "Mary Smith") (address . "mary@example.net"))) (subject . "Saying Hello") (date . ,(make-date 0 6 55 9 21 11 1997 -21600)) (message-id . "1234@local.machine.example")) "This is a message just to say hello. So, \"Hello\".")) (test-email= "RFC5322 A.1.2. Different types of mailboxes" (parse-email "From: \"Joe Q. Public\" To: Mary Smith , jdoe@example.org, Who? Cc: , \"Giant; \\\"Big\\\" Box\" Date: Tue, 1 Jul 2003 10:52:37 +0200 Message-ID: <5678.21-Nov-1997@example.com> Hi everyone.") (make-email `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit) (from ((name . "Joe Q. Public") (address . "john.q.public@example.com"))) (to ((name . "Mary Smith") (address . "mary@x.test")) ((address . "jdoe@example.org")) ((name . "Who?") (address . "one@y.test"))) (cc ((address . "boss@nil.test")) ((name . "Giant; \"Big\" Box") (address . "sysservices@example.net"))) (date . ,(make-date 0 37 52 10 1 7 2003 7200)) (message-id . "5678.21-Nov-1997@example.com")) "Hi everyone.")) (test-email= "RFC5322 A.6.1. Obsolete addressing" (parse-email (string->bytevector "From: Joe Q. Public To: Mary Smith <@node.test:mary@example.net>, , jdoe@test . example Date: Tue, 1 Jul 2003 10:52:37 +0200 Message-ID: <5678.21-Nov-1997@example.com> Hi everyone. " "utf-8")) (make-email `((from ((name . "Joe Q. Public") (address . "john.q.public@example.com"))) (to ((name . "Mary Smith") (address . "mary@example.net")) ((address . "jdoe@test . example"))) (date . ,(make-date 0 37 52 10 1 7 2003 7200)) (message-id . "5678.21-Nov-1997@example.com") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit)) "Hi everyone.")) (test-email= "RFC5322 A.6.2. Obsolete dates" (parse-email (string->bytevector "From: John Doe To: Mary Smith Subject: Saying Hello Date: 21 Nov 97 09:55:06 GMT Message-ID: <1234@local.machine.example> This is a message just to say hello. So, \"Hello\". " "utf-8")) (make-email `((from ((name . "John Doe") (address . "jdoe@machine.example"))) (to ((name . "Mary Smith") (address . "mary@example.net"))) (subject . "Saying Hello") (date . ,(make-date 0 6 55 9 21 11 1997 0)) (message-id . "1234@local.machine.example") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit)) "This is a message just to say hello. So, \"Hello\".")) (test-email= "RFC5322 A.6.3. Obsolete white space and comments" (parse-email (string->bytevector "From : John Doe To : Mary Smith Subject : Saying Hello Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600 Message-ID : <1234 @ local(blah) .machine .example> This is a message just to say hello. So, \"Hello\". " "utf-8")) (make-email `((from ((name . "John Doe") (address . "jdoe@machine.example"))) (to ((name . "Mary Smith") (address . "mary@example.net"))) (subject . "Saying Hello") (date . ,(make-date 0 6 55 9 21 11 1997 -21600)) (message-id . "1234@local.machine.example") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit)) "This is a message just to say hello. So, \"Hello\".")) (test-email= "RFC2046 5.1.1. Common syntax" (parse-email (string->bytevector "From: Nathaniel Borenstein To: Ned Freed Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST) Subject: Sample message MIME-Version: 1.0 Content-type: multipart/mixed; boundary=\"simple boundary\" This is the preamble. It is to be ignored, though it is a handy place for composition agents to include an explanatory note to non-MIME conformant readers. --simple boundary This is implicitly typed plain US-ASCII text. It does NOT end with a linebreak. --simple boundary Content-type: text/plain; charset=us-ascii This is explicitly typed plain US-ASCII text. It DOES end with a linebreak. --simple boundary-- This is the epilogue. It is also to be ignored. " "utf-8")) (make-email `((content-transfer-encoding . 7bit) (from ((name . "Nathaniel Borenstein") (address . "nsb@bellcore.com"))) (to ((name . "Ned Freed") (address . "ned@innosoft.com"))) (date . ,(make-date 0 48 56 23 21 3 1993 -28800)) (subject . "Sample message") (mime-version . "1.0") (content-type (type . multipart) (subtype . mixed) (boundary . "simple boundary"))) (list (make-mime-entity '((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit)) "This is implicitly typed plain US-ASCII text. It does NOT end with a linebreak.") (make-mime-entity '((content-transfer-encoding . 7bit) (content-type (type . text) (subtype . plain) (charset . "us-ascii"))) "This is explicitly typed plain US-ASCII text. It DOES end with a linebreak.")))) (test-equal "decode MIME entity without headers" ((module-ref (resolve-module '(email email)) 'parse-mime-entity) '((content-type (type . multipart) (subtype . mixed))) (string->bytevector " This is implicitly typed plain US-ASCII text. It does NOT end with a linebreak. " "utf-8")) (make-mime-entity '((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit)) "This is implicitly typed plain US-ASCII text. It does NOT end with a linebreak.")) (test-email= "email with 8 bit encoding and non UTF-8 charset" (call-with-input-file "tests/email-with-8bit-encoding-and-non-utf8-charset" (compose parse-email get-bytevector-all)) (make-email `((from ((name . "John Doe") (address . "jdoe@machine.example"))) (to ((name . "Mary Smith") (address . "mary@example.net"))) (subject . "Saying Hello") (date . ,(make-date 0 6 55 9 21 11 1997 -21600)) (message-id . "1234@local.machine.example") (content-type (type . text) (subtype . plain) (charset . "ISO-8859-7")) (content-transfer-encoding . 8bit)) "Hello Foo’.")) (test-email= "multipart email with a 8 bit encoding and non UTF-8 charset part" (call-with-input-file "tests/multipart-email-with-a-8bit-encoding-and-non-utf8-charset-part" (compose parse-email get-bytevector-all)) (make-email `((content-transfer-encoding . 7bit) (from ((name . "John Doe") (address . "jdoe@machine.example"))) (to ((name . "Mary Smith") (address . "mary@example.net"))) (subject . "Saying Hello") (date . ,(make-date 0 6 55 9 21 11 1997 -21600)) (message-id . "1234@local.machine.example") (content-type (type . multipart) (subtype . mixed) (boundary . "boundary"))) (list (make-mime-entity `((content-type (type . text) (subtype . plain) (charset . "ISO-8859-7")) (content-transfer-encoding . 8bit)) "Hello Foo’.")))) (test-equal "handle truncated multipart message gracefully" ((module-ref (resolve-module '(email email)) 'body->mime-entities) (string->bytevector "--boundary Content-Type: text/plain foo " "utf-8") "boundary") (list (string->bytevector "Content-Type: text/plain foo " "utf-8"))) (test-email= "decode utf-8 characters in headers" (parse-email (string->bytevector "From: foo@bar.org (Foo Bãr) body" "utf-8")) (make-email `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit) (from ((name . "Foo Bãr") (address . "foo@bar.org")))) "body")) (test-email= "tolerate non-ascii non-utf-8 characters in headers" (parse-email (string->bytevector "From: foo@bar.org (Foo Bãr) body" "iso-8859-1")) (make-email `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit) (from ((name . "Foo B�r") (address . "foo@bar.org")))) "body")) (test-alist= "tolerate invalid charset" (parse-email-headers "Content-Type: text/plain; charset=foo ") `((content-transfer-encoding . 7bit) (content-type (type . text) (subtype . plain) (charset . "utf-8")))) (test-email= "tolerate decoding errors in body" (parse-email "Content-Transfer-Encoding: quoted-printable copyright =A9") (make-email `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . quoted-printable)) "copyright �")) (test-alist= "Keywords header must be a list" (parse-email-headers "Keywords: foo, bar ") `((keywords " foo" " bar") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-alist= "blank Subject header must be treated as having the null string as value" (parse-email-headers "Subject: ") '((subject . "") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-expect-fail "References header with only one reference must be a singleton list, not a string") (test-alist= "References header with only one reference must be a singleton list, not a string" (parse-email-headers "References: ") '((references . ("foo@bar.org")) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-expect-fail "Trace with only one Received header should be a list of received traces, not a single received trace") (test-alist= "Trace with only one Received header should be a list of received traces, not a single received trace" (parse-email-headers "Received: by foo.bar.com id ZZZ55555; Thu, 31 May 2001 16:38:04 -1000 (HST) ") `((trace (received "by foo.bar.com id ZZZ55555" ,(make-date 0 4 38 16 31 5 2001 -36000))) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-alist= "parse obsolete Received header" (parse-email-headers "Received: by foo.bar.com id ZZZ55555 Received: from zzz ([1.2.3.5]) by ooo.ooo.com with Maccrosoft SMTPSVC(5.5.1877.197.19) ") '((trace (received "by foo.bar.com id ZZZ55555") (received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC")) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-alist= "parse names with more than two words" (parse-email-headers "From: Foo Bar Foobar ") `((from ((name . "Foo Bar Foobar") (address . "foo@bar.org"))) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) ;; See §6.4 in RFC2045. (test-alist= "assume application/octet-stream Content-Type if Content-Transfer-Encoding is unrecognized" (parse-email-headers "Content-Transfer-Encoding: some-unrecognized-encoding Content-Type: text/plain; charset=utf-8 ") `((content-type (type . application) (subtype . octet-stream)) (content-transfer-encoding . binary))) (test-expect-fail "Trace with only one Received header must be a list of lists, not a list") (test-alist= "Trace with only one Received header must be a list of lists, not a list" (parse-email-headers "Received: from foo; Sun, 24 Jan 2021 13:45:20 -0500 ") `((trace (received "from foo" ,(make-date 0 20 45 13 24 1 2021 -18000))) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) ;; TODO: Fix this test once previous test is addressed. (test-alist= "parse Received header with two tokens but no timestamp" (parse-email-headers "Received: from foo ") `((trace received "from foo") (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) (test-alist= "parse Date without seconds" (parse-email-headers "Date: Tue, 22 Nov 94 17:52 GMT ") `((date . ,(make-date 0 0 52 17 22 11 1994 0)) (content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit))) ;;; ;;; Email addresses ;;; (test-equal "parse name-addr email address" (parse-email-address "Foo ") '((name . "Foo") (address . "foo@example.org"))) (test-equal "parse addr-spec email address" (parse-email-address "foo@example.org") '((address . "foo@example.org"))) (test-equal "parse emacs message mode parens style email address" (parse-email-address "foo@example.org (Foo)") '((name . "Foo") (address . "foo@example.org"))) (test-equal "parse email addresses with period in name" (parse-email-address "Foo P. Bar ") '((name . "Foo P. Bar") (address . "foo@example.com"))) (test-equal "tolerate email addresses with parentheses in name" (parse-email-address "Foo(Bar ") '((name . "Foo(Bar") (address . "foo@example.com"))) ;;; ;;; MIME encoded words ;;; (test-equal "decode MIME encoded word: wikipedia example" ((module-ref (resolve-module '(email email)) 'decode-mime-encoded-word) "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=") "¡Hola, señor!") (test-equal "decode MIME encoded phrases that mix ASCII text and MIME encoded words" ((module-ref (resolve-module '(email email)) 'decode-mime-encoded-word) "Foo =?UTF-8?Q?B=C3=A3r?=") "Foo Bãr") (test-equal "decode MIME encoded phrases that contain multiple MIME encoded words each with their own encoding" ((module-ref (resolve-module '(email email)) 'decode-mime-encoded-word) "=?iso-8859-1?Q?=A1Hola,_se=F1or!?= =?UTF-8?Q?B=C3=A3r?=") "¡Hola, señor! Bãr") (test-alist= "decode MIME encoded words in Subject header" (parse-email-headers "Subject: Foo =?UTF-8?Q?B=C3=A3r?= ") `((content-type (type . text) (subtype . plain) (charset . "utf-8")) (content-transfer-encoding . 7bit) (subject . "Foo Bãr"))) (test-equal "tolerate decoding errors in MIME encoded words" ((module-ref (resolve-module '(email email)) 'decode-mime-encoded-word) "=?UTF-8?Q?B=E8r?=") "B�r") (test-end "email")