From 900f720a7a0893f37f8c6b328f183e43086c1e52 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 3 Sep 2023 18:01:05 +0100 Subject: email: Tolerate parentheses in display names. * email/email.scm (define-atom-pattern): Support customization of the atext pattern as well. (define-phrase-pattern): New macro. (obs-phrase): Define using define-phrase-pattern. (liberal-atext, liberal-cfws-captured-atom, liberal-cfws-captured-word, liberal-phrase): New patterns. (display-name): Use liberal-phrase instead of phrase. * tests/email.scm ("tolerate email addresses with parentheses in name"): New test. --- email/email.scm | 26 ++++++++++++++++++++++---- tests/email.scm | 4 ++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/email/email.scm b/email/email.scm index 0d51eef..a29a532 100644 --- a/email/email.scm +++ b/email/email.scm @@ -1,5 +1,5 @@ ;;; guile-email --- Guile email parser -;;; Copyright © 2018, 2019, 2020, 2021 Arun Isaac +;;; Copyright © 2018, 2019, 2020, 2021, 2023 Arun Isaac ;;; Copyright © 2021 Mathieu Othacehe ;;; Copyright © 2023 Andrew Whatson ;;; @@ -178,6 +178,9 @@ (define-peg-pattern name body (and (ignore (? cfws)) (+ atext) (ignore (? cfws))))) ((define-atom-pattern name cfws) + (define-peg-pattern name body + (and (? cfws) (+ atext) (? cfws)))) + ((define-atom-pattern name cfws atext) (define-peg-pattern name body (and (? cfws) (+ atext) (? cfws)))))) @@ -225,8 +228,11 @@ (define-atom-pattern cfws-captured-atom cfws) (define-word-pattern cfws-captured-word cfws-captured-atom) -(define-peg-pattern obs-phrase body - (and cfws-captured-word (* (or cfws-captured-word "." cfws)))) +(define-syntax-rule (define-phrase-pattern name word) + (define-peg-pattern name body + (and word (* (or word "." cfws))))) + +(define-phrase-pattern obs-phrase cfws-captured-word) ;; We set phrase to be the same as obs-phrase since, according to ;; their definitions in RFC5322, all phrases are obs-phrases. @@ -350,8 +356,20 @@ ;;; Address specification +;; People tend to put strange characters in their display names, and +;; MUAs tend to pass on these non-standard names without any +;; quoting. Tolerate such names. + +;; Tolerate non-standard ( and ) in atext. +(define-peg-pattern liberal-atext body + (or atext "(" ")")) + +(define-atom-pattern liberal-cfws-captured-atom cfws liberal-atext) +(define-word-pattern liberal-cfws-captured-word liberal-cfws-captured-atom) +(define-phrase-pattern liberal-phrase liberal-cfws-captured-word) + (define-peg-pattern display-name all - phrase) + liberal-phrase) (define-peg-pattern obs-local-part body (and word (* (and "." word)))) diff --git a/tests/email.scm b/tests/email.scm index 50d1bea..dd305b6 100644 --- a/tests/email.scm +++ b/tests/email.scm @@ -590,6 +590,10 @@ Content-Type: text/plain; charset=utf-8 (parse-email-address "Foo P. Bar ") '((name . "Foo P. Bar") (address . "foo@example.com"))) +(test-equal "tolerate email addresses with parentheses in name" + (parse-email-address "Foo(Bar ") + '((name . "Foo(Bar") (address . "foo@example.com"))) + ;;; ;;; MIME encoded words -- cgit v1.2.3