summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2023-09-03 18:01:05 +0100
committerArun Isaac2023-09-03 18:01:05 +0100
commit900f720a7a0893f37f8c6b328f183e43086c1e52 (patch)
treebf01acd42a0533990ff19a38f0e8a132f3bdb1ca
parent0701c48d1eb43fb09b0f9a465a8d805df3ec161b (diff)
downloadguile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.tar.gz
guile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.tar.lz
guile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.zip
email: Tolerate parentheses in display names.
* email/email.scm (define-atom-pattern): Support customization of the atext pattern as well. (define-phrase-pattern): New macro. (obs-phrase): Define using define-phrase-pattern. (liberal-atext, liberal-cfws-captured-atom, liberal-cfws-captured-word, liberal-phrase): New patterns. (display-name): Use liberal-phrase instead of phrase. * tests/email.scm ("tolerate email addresses with parentheses in name"): New test.
-rw-r--r--email/email.scm26
-rw-r--r--tests/email.scm4
2 files changed, 26 insertions, 4 deletions
diff --git a/email/email.scm b/email/email.scm
index 0d51eef..a29a532 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -1,5 +1,5 @@
;;; guile-email --- Guile email parser
-;;; Copyright © 2018, 2019, 2020, 2021 Arun Isaac <arunisaac@systemreboot.net>
+;;; Copyright © 2018, 2019, 2020, 2021, 2023 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2021 Mathieu Othacehe <othacehe@gnu.org>
;;; Copyright © 2023 Andrew Whatson <whatson@tailcall.au>
;;;
@@ -179,6 +179,9 @@
(and (ignore (? cfws)) (+ atext) (ignore (? cfws)))))
((define-atom-pattern name cfws)
(define-peg-pattern name body
+ (and (? cfws) (+ atext) (? cfws))))
+ ((define-atom-pattern name cfws atext)
+ (define-peg-pattern name body
(and (? cfws) (+ atext) (? cfws))))))
(define-atom-pattern atom)
@@ -225,8 +228,11 @@
(define-atom-pattern cfws-captured-atom cfws)
(define-word-pattern cfws-captured-word cfws-captured-atom)
-(define-peg-pattern obs-phrase body
- (and cfws-captured-word (* (or cfws-captured-word "." cfws))))
+(define-syntax-rule (define-phrase-pattern name word)
+ (define-peg-pattern name body
+ (and word (* (or word "." cfws)))))
+
+(define-phrase-pattern obs-phrase cfws-captured-word)
;; We set phrase to be the same as obs-phrase since, according to
;; their definitions in RFC5322, all phrases are obs-phrases.
@@ -350,8 +356,20 @@
;;; Address specification
+;; People tend to put strange characters in their display names, and
+;; MUAs tend to pass on these non-standard names without any
+;; quoting. Tolerate such names.
+
+;; Tolerate non-standard ( and ) in atext.
+(define-peg-pattern liberal-atext body
+ (or atext "(" ")"))
+
+(define-atom-pattern liberal-cfws-captured-atom cfws liberal-atext)
+(define-word-pattern liberal-cfws-captured-word liberal-cfws-captured-atom)
+(define-phrase-pattern liberal-phrase liberal-cfws-captured-word)
+
(define-peg-pattern display-name all
- phrase)
+ liberal-phrase)
(define-peg-pattern obs-local-part body
(and word (* (and "." word))))
diff --git a/tests/email.scm b/tests/email.scm
index 50d1bea..dd305b6 100644
--- a/tests/email.scm
+++ b/tests/email.scm
@@ -590,6 +590,10 @@ Content-Type: text/plain; charset=utf-8
(parse-email-address "Foo P. Bar <foo@example.com>")
'((name . "Foo P. Bar") (address . "foo@example.com")))
+(test-equal "tolerate email addresses with parentheses in name"
+ (parse-email-address "Foo(Bar <foo@example.com>")
+ '((name . "Foo(Bar") (address . "foo@example.com")))
+
;;;
;;; MIME encoded words