summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2023-09-03 18:01:05 +0100
committerArun Isaac2023-09-03 18:01:05 +0100
commit900f720a7a0893f37f8c6b328f183e43086c1e52 (patch)
treebf01acd42a0533990ff19a38f0e8a132f3bdb1ca
parent0701c48d1eb43fb09b0f9a465a8d805df3ec161b (diff)
downloadguile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.tar.gz
guile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.tar.lz
guile-email-900f720a7a0893f37f8c6b328f183e43086c1e52.zip
email: Tolerate parentheses in display names.
* email/email.scm (define-atom-pattern): Support customization of the
atext pattern as well.
(define-phrase-pattern): New macro.
(obs-phrase): Define using define-phrase-pattern.
(liberal-atext, liberal-cfws-captured-atom,
liberal-cfws-captured-word, liberal-phrase): New patterns.
(display-name): Use liberal-phrase instead of phrase.
* tests/email.scm ("tolerate email addresses with parentheses in
name"): New test.
-rw-r--r--email/email.scm26
-rw-r--r--tests/email.scm4
2 files changed, 26 insertions, 4 deletions
diff --git a/email/email.scm b/email/email.scm
index 0d51eef..a29a532 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -1,5 +1,5 @@
 ;;; guile-email --- Guile email parser
-;;; Copyright © 2018, 2019, 2020, 2021 Arun Isaac <arunisaac@systemreboot.net>
+;;; Copyright © 2018, 2019, 2020, 2021, 2023 Arun Isaac <arunisaac@systemreboot.net>
 ;;; Copyright © 2021 Mathieu Othacehe <othacehe@gnu.org>
 ;;; Copyright © 2023 Andrew Whatson <whatson@tailcall.au>
 ;;;
@@ -179,6 +179,9 @@
        (and (ignore (? cfws)) (+ atext) (ignore (? cfws)))))
     ((define-atom-pattern name cfws)
      (define-peg-pattern name body
+       (and (? cfws) (+ atext) (? cfws))))
+    ((define-atom-pattern name cfws atext)
+     (define-peg-pattern name body
        (and (? cfws) (+ atext) (? cfws))))))
 
 (define-atom-pattern atom)
@@ -225,8 +228,11 @@
 (define-atom-pattern cfws-captured-atom cfws)
 (define-word-pattern cfws-captured-word cfws-captured-atom)
 
-(define-peg-pattern obs-phrase body
-  (and cfws-captured-word (* (or cfws-captured-word "." cfws))))
+(define-syntax-rule (define-phrase-pattern name word)
+  (define-peg-pattern name body
+    (and word (* (or word "." cfws)))))
+
+(define-phrase-pattern obs-phrase cfws-captured-word)
 
 ;; We set phrase to be the same as obs-phrase since, according to
 ;; their definitions in RFC5322, all phrases are obs-phrases.
@@ -350,8 +356,20 @@
 
 ;;; Address specification
 
+;; People tend to put strange characters in their display names, and
+;; MUAs tend to pass on these non-standard names without any
+;; quoting. Tolerate such names.
+
+;; Tolerate non-standard ( and ) in atext.
+(define-peg-pattern liberal-atext body
+  (or atext "(" ")"))
+
+(define-atom-pattern liberal-cfws-captured-atom cfws liberal-atext)
+(define-word-pattern liberal-cfws-captured-word liberal-cfws-captured-atom)
+(define-phrase-pattern liberal-phrase liberal-cfws-captured-word)
+
 (define-peg-pattern display-name all
-  phrase)
+  liberal-phrase)
 
 (define-peg-pattern obs-local-part body
   (and word (* (and "." word))))
diff --git a/tests/email.scm b/tests/email.scm
index 50d1bea..dd305b6 100644
--- a/tests/email.scm
+++ b/tests/email.scm
@@ -590,6 +590,10 @@ Content-Type: text/plain; charset=utf-8
   (parse-email-address "Foo P. Bar <foo@example.com>")
   '((name . "Foo P. Bar") (address . "foo@example.com")))
 
+(test-equal "tolerate email addresses with parentheses in name"
+  (parse-email-address "Foo(Bar <foo@example.com>")
+  '((name . "Foo(Bar") (address . "foo@example.com")))
+
 
 ;;;
 ;;; MIME encoded words