summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--email/email.scm170
-rw-r--r--tests/email.scm27
2 files changed, 165 insertions, 32 deletions
diff --git a/email/email.scm b/email/email.scm
index 361a566..52f7f58 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -112,17 +112,41 @@
     (and (not-followed-by (or . exceptions))
          vchar)))
 
+(define-syntax-rule (define-printable-ascii-character-pattern-with-obsolete
+                      name obsolete-pattern . exceptions)
+  (define-peg-pattern name body
+    (or (and (not-followed-by (or . exceptions))
+             vchar)
+        obsolete-pattern)))
+
 ;;; Quoted characters
 
+(define-peg-pattern obs-qp body
+  (and "\\" (or "\x00" obs-no-ws-ctl "\n" "\r")))
+
 (define-peg-pattern quoted-pair body
-  (and (ignore "\\") (or vchar wsp)))
+  (or (and (ignore "\\") (or vchar wsp))
+      obs-qp))
 
 ;;; Folding white space and comments
 
+(define-peg-pattern obs-fws body
+  (and (+ wsp) (* (and crlf (+ wsp)))))
+
 (define-peg-pattern fws body
-  (and (? (and (* wsp) crlf)) (+ wsp)))
+  (or (and (? (and (* wsp) crlf)) (+ wsp))
+      obs-fws))
 
-(define-printable-ascii-character-pattern ctext "(" ")" "\\")
+(define-peg-pattern obs-no-ws-ctl body
+  (and (not-followed-by (or "\t" "\n" "\r"))
+       (or (range #\soh #\us)
+           "\x7f")))
+
+(define-peg-pattern obs-ctext body
+  obs-no-ws-ctl)
+
+(define-printable-ascii-character-pattern-with-obsolete ctext obs-ctext
+  "(" ")" "\\")
 
 (define-syntax-rule (define-comment-pattern name capture-type)
   (define-peg-pattern name capture-type
@@ -144,8 +168,11 @@
 (define-printable-ascii-character-pattern atext
   "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]")
 
-(define-peg-pattern atom body
-  (and (? cfws) (+ atext) (? cfws)))
+(define-syntax-rule (define-atom-pattern name cfws)
+  (define-peg-pattern name body
+    (and (? cfws) (+ atext) (? cfws))))
+
+(define-atom-pattern atom cfws)
 
 (define-peg-pattern dot-atom-text body
   (and (+ atext) (* (and "." (+ atext)))))
@@ -158,7 +185,11 @@
 
 ;;; Quoted strings
 
-(define-printable-ascii-character-pattern qtext "\\" "\"")
+(define-peg-pattern obs-qtext body
+  obs-no-ws-ctl)
+
+(define-printable-ascii-character-pattern-with-obsolete qtext obs-qtext
+  "\\" "\"")
 
 (define-peg-pattern qcontent body
   (or qtext quoted-pair))
@@ -175,8 +206,14 @@
 (define-peg-pattern word body
   (or atom quoted-string))
 
+(define-peg-pattern obs-phrase body
+  (and word (* (or word "." cfws))))
+
+;; We set phrase to be the same as obs-phrase since, according to
+;; their definitions in RFC5322, all phrases are obs-phrases.
 (define-peg-pattern phrase body
-  (+ word))
+  obs-phrase)
+
 
 ;; ABNF modified to ignore leading whitespace
 ;; ABNF modified to allow for blank lines in folded field
@@ -248,19 +285,31 @@
 (define-peg-pattern display-name all
   phrase)
 
+(define-peg-pattern obs-local-part body
+  (and word (* (and "." word))))
+
 (define-peg-pattern local-part body
-  (or dot-atom quoted-string))
+  (or obs-local-part dot-atom quoted-string))
+
+(define-peg-pattern obs-dtext body
+  (or obs-no-ws-ctl quoted-pair))
 
-(define-printable-ascii-character-pattern dtext "[" "]" "\\")
+(define-printable-ascii-character-pattern-with-obsolete dtext obs-dtext "[" "]" "\\")
 
 (define-peg-pattern domain-literal body
   (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws)))
 
-(define-syntax-rule (define-domain-pattern name dot-atom)
+(define-syntax-rule (define-obs-domain-pattern name atom)
   (define-peg-pattern name body
-    (or dot-atom domain-literal)))
+    (and atom (* (and "." atom)))))
 
-(define-domain-pattern domain dot-atom)
+(define-obs-domain-pattern obs-domain atom)
+
+(define-syntax-rule (define-domain-pattern name obs-domain dot-atom)
+  (define-peg-pattern name body
+    (or obs-domain dot-atom domain-literal)))
+
+(define-domain-pattern domain obs-domain dot-atom)
 
 (define-syntax-rule (define-addr-spec-pattern name domain)
   (define-peg-pattern name body
@@ -268,12 +317,25 @@
 
 (define-addr-spec-pattern addr-spec domain)
 
+(define-peg-pattern obs-domain-list body
+  (and (* (or cfws ",")) "@" domain
+       (* (and "," (? cfws) (? (and "@" domain))))))
+
+(define-peg-pattern obs-route none
+  (and obs-domain-list ":"))
+
+(define-peg-pattern obs-angle-addr body
+  (and (ignore (and (? cfws) "<"))
+       obs-route addr-spec
+       (ignore (and ">" (? cfws)))))
+
 (define-syntax-rule (define-angle-addr-pattern
                       name opening-bracket closing-bracket)
   (define-peg-pattern name body
-    (and (ignore (? cfws))
-         opening-bracket addr-spec closing-bracket
-         (ignore (? cfws)))))
+    (or (and (ignore (? cfws))
+             opening-bracket addr-spec closing-bracket
+             (ignore (? cfws)))
+        obs-angle-addr)))
 
 (define-angle-addr-pattern angle-addr "<" ">")
 
@@ -294,36 +356,67 @@
 ;; parts of the email. In order to facilitate this, we define a set of
 ;; captured-* patterns which are used in address fields.
 
+(define-atom-pattern captured-atom captured-cfws)
 (define-comment-pattern captured-comment all)
 (define-cfws-pattern captured-cfws captured-comment)
 (define-dot-atom-pattern captured-dot-atom captured-cfws)
-(define-domain-pattern captured-domain captured-dot-atom)
+(define-obs-domain-pattern captured-obs-domain captured-atom)
+(define-domain-pattern captured-domain captured-obs-domain captured-dot-atom)
 (define-addr-spec-pattern captured-addr-spec captured-domain)
 
 (define-peg-pattern mailbox all
   (or name-addr captured-addr-spec))
 
+(define-peg-pattern obs-mbox-list body
+  (and (* (and (? cfws) ",")) mailbox
+       (* (and "," (? (or mailbox cfws))))))
+
+;; We set mailbox-list to be the same as obs-mbox-list since,
+;; according to their definitions in RFC5322, all mailbox-lists are
+;; obs-mbox-lists.
 (define-peg-pattern mailbox-list all
-  (and mailbox (* (and (ignore ",") mailbox))))
+  obs-mbox-list)
 
 (define-peg-pattern group all
   (and display-name (ignore ":")
        (? group-list) (ignore ";") (? cfws)))
 
+(define-peg-pattern obs-group-list body
+  (and (+ (and (? cfws) ","))
+       (? cfws)))
+
 (define-peg-pattern group-list all
-  (or mailbox-list cfws))
+  (or mailbox-list cfws obs-group-list))
 
 (define-peg-pattern address body
   (or mailbox group))
 
+(define-peg-pattern obs-addr-list body
+  (and (* (and (? cfws) ",")) address
+       (* (and "," (? (or address cfws))))))
+
+;; We set address-list to be the same as obs-addr-list since,
+;; according to their definitions in RFC5322, all address-lists are
+;; obs-address-lists.
 (define-peg-pattern address-list all
-  (and address (* (and (ignore ",") address))))
+  obs-addr-list)
 
 ;;; Fields
 
-(define-syntax-rule (define-field-pattern name header pattern)
-  (define-peg-pattern name all
-    (and (ignore (string-ci header)) (ignore ":") pattern crlf)))
+;; We have compressed the current field pattern and the obsolete field
+;; pattern into a single PEG pattern.
+(define-syntax define-field-pattern
+  (syntax-rules ()
+    ((define-field-pattern name header pattern)
+     (define-peg-pattern name all
+       (and (ignore (string-ci header))
+            (ignore (and (* wsp) ":"))
+            pattern crlf)))
+    ((define-field-pattern name header pattern obsolete-pattern)
+     (define-peg-pattern name all
+       (and (ignore (string-ci header))
+            (ignore (and (* wsp) ":"))
+            (or pattern obsolete-pattern) crlf)))))
 
 ;;; Origination date field
 
@@ -331,26 +424,34 @@
 
 ;;; Originator fields
 
-(define-field-pattern from "From" (or mailbox-list address-list))
-(define-field-pattern sender "Sender" (or mailbox address))
+(define-field-pattern from "From" mailbox-list (or mailbox-list address-list))
+(define-field-pattern sender "Sender" mailbox (or mailbox address))
 (define-field-pattern reply-to "Reply-To" address-list)
 
 ;; Destination address fields
 
 (define-field-pattern to "To" address-list)
 (define-field-pattern cc "Cc" address-list)
-(define-field-pattern bcc "Bcc" (? (or address-list cfws)))
+(define-field-pattern bcc "Bcc"
+  (? (or address-list cfws))
+  (or address-list (and (* (and (? cfws) ",")) (? cfws))))
 
 ;;; Identification fields
 
 (define-peg-pattern no-fold-literal body
   (and "[" (* dtext) "]"))
 
+(define-peg-pattern obs-id-left body
+  local-part)
+
 (define-peg-pattern id-left body
-  dot-atom-text)
+  (or dot-atom-text obs-id-left))
+
+(define-peg-pattern obs-id-right body
+  domain)
 
 (define-peg-pattern id-right body
-  (or dot-atom-text no-fold-literal))
+  (or dot-atom-text no-fold-literal obs-id-right))
 
 (define-peg-pattern msg-id all
   (and (ignore (? cfws)) (ignore "<")
@@ -370,12 +471,20 @@
 ;;; Resent fields
 
 (define-field-pattern resent-date "Resent-Date" date-time)
-(define-field-pattern resent-from "Resent-From" (or mailbox-list address-list))
-(define-field-pattern resent-sender "Resent-Sender" (or mailbox address))
+(define-field-pattern resent-from "Resent-From"
+  (or mailbox-list address-list)
+  mailbox-list)
+(define-field-pattern resent-sender "Resent-Sender"
+  (or mailbox address)
+  mailbox)
 (define-field-pattern resent-to "Resent-To" address-list)
 (define-field-pattern resent-cc "Resent-Cc" address-list)
-(define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws)))
+(define-field-pattern resent-bcc "Resent-Bcc"
+  (? (or address-list cfws))
+  (or address-list
+      (and (* (and (? cfws) ",")) (? cfws))))
 (define-field-pattern resent-msg-id "Resent-Message-ID" msg-id)
+(define-field-pattern obs-resent-rply "Resent-Reply-To" address-list)
 
 ;;; Trace fields
 
@@ -519,6 +628,7 @@
          resent-cc
          resent-bcc
          resent-msg-id
+         obs-resent-rply
          orig-date
          from
          sender
diff --git a/tests/email.scm b/tests/email.scm
index a2b6361..8a7be9b 100644
--- a/tests/email.scm
+++ b/tests/email.scm
@@ -190,6 +190,31 @@ Hi everyone.")
      (message-id . "5678.21-Nov-1997@example.com"))
    "Hi everyone."))
 
+(test-email= "RFC5322 A.6.1. Obsolete addressing"
+  (parse-email
+   (string->bytevector
+    "From: Joe Q. Public <john.q.public@example.com>
+To: Mary Smith <@node.test:mary@example.net>, , jdoe@test  . example
+Date: Tue, 1 Jul 2003 10:52:37 +0200
+Message-ID: <5678.21-Nov-1997@example.com>
+
+Hi everyone.
+"
+    "utf-8"))
+  (make-email
+   `((from ((name . "Joe Q. Public")
+            (address . "john.q.public@example.com")))
+     (to ((name . "Mary Smith")
+          (address . "mary@example.net"))
+         ((address . "jdoe@test  . example")))
+     (date . ,(make-date 0 37 52 10 1 7 2003 7200))
+     (message-id . "5678.21-Nov-1997@example.com")
+     (content-type (type . text)
+                   (subtype . plain)
+                   (charset . "utf-8"))
+     (content-transfer-encoding . 7bit))
+   "Hi everyone."))
+
 (test-email= "RFC2046 5.1.1. Common syntax"
   (parse-email
    (string->bytevector
@@ -410,8 +435,6 @@ copyright =A9")
   (parse-email-address "foo@example.org (Foo)")
   '((name . "Foo") (address . "foo@example.org")))
 
-(test-expect-fail "parse email addresses with period in name")
-
 (test-equal "parse email addresses with period in name"
   (parse-email-address "Foo P. Bar <foo@example.com>")
   '((name . "Foo P. Bar") (address . "foo@example.com")))