From 37e245539bc9e27db35731e7b517cdde397834e2 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Sat, 5 Dec 2020 00:19:16 +0530
Subject: email: Support obsolete addressing.

* email/email.scm (obs-qp, obs-fws, obs-no-ws-ctl, obs-ctext,
obs-qtext, obs-phrase, obs-local-part, obs-dtext, obs-domain,
obs-domain-list, obs-route, obs-angle-addr, captured-atom,
captured-obs-domain, captured-domain, obs-mbox-list, obs-group-list,
obs-addr-list, obs-id-left, obs-id-right): New patterns.
(quoted-pair, fws, ctext, qtext, phrase, dtext,
define-angle-addr-pattern, mailbox-list, group-list, address-list,
define-field-pattern, from, sender, bcc, id-left, id-right,
resent-from, resent-sender, resent-bcc, obs-resent-rply): Include
obsolete pattern.
(define-printable-ascii-character-pattern-with-obsolete,
define-atom-pattern, define-obs-domain-pattern): New macros.
(define-domain-pattern): Accept obs-domain as a new argument.
(fields): Include obs-resent-rply.
* tests/email.scm ("RFC5322 A.6.1. Obsolete addressing"): New test.
("parse email addresses with period in name"): Mark as passing.
---
 email/email.scm | 170 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 140 insertions(+), 30 deletions(-)

(limited to 'email')

diff --git a/email/email.scm b/email/email.scm
index 361a566..52f7f58 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -112,17 +112,41 @@
     (and (not-followed-by (or . exceptions))
          vchar)))
 
+(define-syntax-rule (define-printable-ascii-character-pattern-with-obsolete
+                      name obsolete-pattern . exceptions)
+  (define-peg-pattern name body
+    (or (and (not-followed-by (or . exceptions))
+             vchar)
+        obsolete-pattern)))
+
 ;;; Quoted characters
 
+(define-peg-pattern obs-qp body
+  (and "\\" (or "\x00" obs-no-ws-ctl "\n" "\r")))
+
 (define-peg-pattern quoted-pair body
-  (and (ignore "\\") (or vchar wsp)))
+  (or (and (ignore "\\") (or vchar wsp))
+      obs-qp))
 
 ;;; Folding white space and comments
 
+(define-peg-pattern obs-fws body
+  (and (+ wsp) (* (and crlf (+ wsp)))))
+
 (define-peg-pattern fws body
-  (and (? (and (* wsp) crlf)) (+ wsp)))
+  (or (and (? (and (* wsp) crlf)) (+ wsp))
+      obs-fws))
 
-(define-printable-ascii-character-pattern ctext "(" ")" "\\")
+(define-peg-pattern obs-no-ws-ctl body
+  (and (not-followed-by (or "\t" "\n" "\r"))
+       (or (range #\soh #\us)
+           "\x7f")))
+
+(define-peg-pattern obs-ctext body
+  obs-no-ws-ctl)
+
+(define-printable-ascii-character-pattern-with-obsolete ctext obs-ctext
+  "(" ")" "\\")
 
 (define-syntax-rule (define-comment-pattern name capture-type)
   (define-peg-pattern name capture-type
@@ -144,8 +168,11 @@
 (define-printable-ascii-character-pattern atext
   "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]")
 
-(define-peg-pattern atom body
-  (and (? cfws) (+ atext) (? cfws)))
+(define-syntax-rule (define-atom-pattern name cfws)
+  (define-peg-pattern name body
+    (and (? cfws) (+ atext) (? cfws))))
+
+(define-atom-pattern atom cfws)
 
 (define-peg-pattern dot-atom-text body
   (and (+ atext) (* (and "." (+ atext)))))
@@ -158,7 +185,11 @@
 
 ;;; Quoted strings
 
-(define-printable-ascii-character-pattern qtext "\\" "\"")
+(define-peg-pattern obs-qtext body
+  obs-no-ws-ctl)
+
+(define-printable-ascii-character-pattern-with-obsolete qtext obs-qtext
+  "\\" "\"")
 
 (define-peg-pattern qcontent body
   (or qtext quoted-pair))
@@ -175,8 +206,14 @@
 (define-peg-pattern word body
   (or atom quoted-string))
 
+(define-peg-pattern obs-phrase body
+  (and word (* (or word "." cfws))))
+
+;; We set phrase to be the same as obs-phrase since, according to
+;; their definitions in RFC5322, all phrases are obs-phrases.
 (define-peg-pattern phrase body
-  (+ word))
+  obs-phrase)
+
 
 ;; ABNF modified to ignore leading whitespace
 ;; ABNF modified to allow for blank lines in folded field
@@ -248,19 +285,31 @@
 (define-peg-pattern display-name all
   phrase)
 
+(define-peg-pattern obs-local-part body
+  (and word (* (and "." word))))
+
 (define-peg-pattern local-part body
-  (or dot-atom quoted-string))
+  (or obs-local-part dot-atom quoted-string))
+
+(define-peg-pattern obs-dtext body
+  (or obs-no-ws-ctl quoted-pair))
 
-(define-printable-ascii-character-pattern dtext "[" "]" "\\")
+(define-printable-ascii-character-pattern-with-obsolete dtext obs-dtext "[" "]" "\\")
 
 (define-peg-pattern domain-literal body
   (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws)))
 
-(define-syntax-rule (define-domain-pattern name dot-atom)
+(define-syntax-rule (define-obs-domain-pattern name atom)
   (define-peg-pattern name body
-    (or dot-atom domain-literal)))
+    (and atom (* (and "." atom)))))
 
-(define-domain-pattern domain dot-atom)
+(define-obs-domain-pattern obs-domain atom)
+
+(define-syntax-rule (define-domain-pattern name obs-domain dot-atom)
+  (define-peg-pattern name body
+    (or obs-domain dot-atom domain-literal)))
+
+(define-domain-pattern domain obs-domain dot-atom)
 
 (define-syntax-rule (define-addr-spec-pattern name domain)
   (define-peg-pattern name body
@@ -268,12 +317,25 @@
 
 (define-addr-spec-pattern addr-spec domain)
 
+(define-peg-pattern obs-domain-list body
+  (and (* (or cfws ",")) "@" domain
+       (* (and "," (? cfws) (? (and "@" domain))))))
+
+(define-peg-pattern obs-route none
+  (and obs-domain-list ":"))
+
+(define-peg-pattern obs-angle-addr body
+  (and (ignore (and (? cfws) "<"))
+       obs-route addr-spec
+       (ignore (and ">" (? cfws)))))
+
 (define-syntax-rule (define-angle-addr-pattern
                       name opening-bracket closing-bracket)
   (define-peg-pattern name body
-    (and (ignore (? cfws))
-         opening-bracket addr-spec closing-bracket
-         (ignore (? cfws)))))
+    (or (and (ignore (? cfws))
+             opening-bracket addr-spec closing-bracket
+             (ignore (? cfws)))
+        obs-angle-addr)))
 
 (define-angle-addr-pattern angle-addr "<" ">")
 
@@ -294,36 +356,67 @@
 ;; parts of the email. In order to facilitate this, we define a set of
 ;; captured-* patterns which are used in address fields.
 
+(define-atom-pattern captured-atom captured-cfws)
 (define-comment-pattern captured-comment all)
 (define-cfws-pattern captured-cfws captured-comment)
 (define-dot-atom-pattern captured-dot-atom captured-cfws)
-(define-domain-pattern captured-domain captured-dot-atom)
+(define-obs-domain-pattern captured-obs-domain captured-atom)
+(define-domain-pattern captured-domain captured-obs-domain captured-dot-atom)
 (define-addr-spec-pattern captured-addr-spec captured-domain)
 
 (define-peg-pattern mailbox all
   (or name-addr captured-addr-spec))
 
+(define-peg-pattern obs-mbox-list body
+  (and (* (and (? cfws) ",")) mailbox
+       (* (and "," (? (or mailbox cfws))))))
+
+;; We set mailbox-list to be the same as obs-mbox-list since,
+;; according to their definitions in RFC5322, all mailbox-lists are
+;; obs-mbox-lists.
 (define-peg-pattern mailbox-list all
-  (and mailbox (* (and (ignore ",") mailbox))))
+  obs-mbox-list)
 
 (define-peg-pattern group all
   (and display-name (ignore ":")
        (? group-list) (ignore ";") (? cfws)))
 
+(define-peg-pattern obs-group-list body
+  (and (+ (and (? cfws) ","))
+       (? cfws)))
+
 (define-peg-pattern group-list all
-  (or mailbox-list cfws))
+  (or mailbox-list cfws obs-group-list))
 
 (define-peg-pattern address body
   (or mailbox group))
 
+(define-peg-pattern obs-addr-list body
+  (and (* (and (? cfws) ",")) address
+       (* (and "," (? (or address cfws))))))
+
+;; We set address-list to be the same as obs-addr-list since,
+;; according to their definitions in RFC5322, all address-lists are
+;; obs-address-lists.
 (define-peg-pattern address-list all
-  (and address (* (and (ignore ",") address))))
+  obs-addr-list)
 
 ;;; Fields
 
-(define-syntax-rule (define-field-pattern name header pattern)
-  (define-peg-pattern name all
-    (and (ignore (string-ci header)) (ignore ":") pattern crlf)))
+;; We have compressed the current field pattern and the obsolete field
+;; pattern into a single PEG pattern.
+(define-syntax define-field-pattern
+  (syntax-rules ()
+    ((define-field-pattern name header pattern)
+     (define-peg-pattern name all
+       (and (ignore (string-ci header))
+            (ignore (and (* wsp) ":"))
+            pattern crlf)))
+    ((define-field-pattern name header pattern obsolete-pattern)
+     (define-peg-pattern name all
+       (and (ignore (string-ci header))
+            (ignore (and (* wsp) ":"))
+            (or pattern obsolete-pattern) crlf)))))
 
 ;;; Origination date field
 
@@ -331,26 +424,34 @@
 
 ;;; Originator fields
 
-(define-field-pattern from "From" (or mailbox-list address-list))
-(define-field-pattern sender "Sender" (or mailbox address))
+(define-field-pattern from "From" mailbox-list (or mailbox-list address-list))
+(define-field-pattern sender "Sender" mailbox (or mailbox address))
 (define-field-pattern reply-to "Reply-To" address-list)
 
 ;; Destination address fields
 
 (define-field-pattern to "To" address-list)
 (define-field-pattern cc "Cc" address-list)
-(define-field-pattern bcc "Bcc" (? (or address-list cfws)))
+(define-field-pattern bcc "Bcc"
+  (? (or address-list cfws))
+  (or address-list (and (* (and (? cfws) ",")) (? cfws))))
 
 ;;; Identification fields
 
 (define-peg-pattern no-fold-literal body
   (and "[" (* dtext) "]"))
 
+(define-peg-pattern obs-id-left body
+  local-part)
+
 (define-peg-pattern id-left body
-  dot-atom-text)
+  (or dot-atom-text obs-id-left))
+
+(define-peg-pattern obs-id-right body
+  domain)
 
 (define-peg-pattern id-right body
-  (or dot-atom-text no-fold-literal))
+  (or dot-atom-text no-fold-literal obs-id-right))
 
 (define-peg-pattern msg-id all
   (and (ignore (? cfws)) (ignore "<")
@@ -370,12 +471,20 @@
 ;;; Resent fields
 
 (define-field-pattern resent-date "Resent-Date" date-time)
-(define-field-pattern resent-from "Resent-From" (or mailbox-list address-list))
-(define-field-pattern resent-sender "Resent-Sender" (or mailbox address))
+(define-field-pattern resent-from "Resent-From"
+  (or mailbox-list address-list)
+  mailbox-list)
+(define-field-pattern resent-sender "Resent-Sender"
+  (or mailbox address)
+  mailbox)
 (define-field-pattern resent-to "Resent-To" address-list)
 (define-field-pattern resent-cc "Resent-Cc" address-list)
-(define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws)))
+(define-field-pattern resent-bcc "Resent-Bcc"
+  (? (or address-list cfws))
+  (or address-list
+      (and (* (and (? cfws) ",")) (? cfws))))
 (define-field-pattern resent-msg-id "Resent-Message-ID" msg-id)
+(define-field-pattern obs-resent-rply "Resent-Reply-To" address-list)
 
 ;;; Trace fields
 
@@ -519,6 +628,7 @@
          resent-cc
          resent-bcc
          resent-msg-id
+         obs-resent-rply
          orig-date
          from
          sender
-- 
cgit v1.2.3