From 37e245539bc9e27db35731e7b517cdde397834e2 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sat, 5 Dec 2020 00:19:16 +0530 Subject: email: Support obsolete addressing. * email/email.scm (obs-qp, obs-fws, obs-no-ws-ctl, obs-ctext, obs-qtext, obs-phrase, obs-local-part, obs-dtext, obs-domain, obs-domain-list, obs-route, obs-angle-addr, captured-atom, captured-obs-domain, captured-domain, obs-mbox-list, obs-group-list, obs-addr-list, obs-id-left, obs-id-right): New patterns. (quoted-pair, fws, ctext, qtext, phrase, dtext, define-angle-addr-pattern, mailbox-list, group-list, address-list, define-field-pattern, from, sender, bcc, id-left, id-right, resent-from, resent-sender, resent-bcc, obs-resent-rply): Include obsolete pattern. (define-printable-ascii-character-pattern-with-obsolete, define-atom-pattern, define-obs-domain-pattern): New macros. (define-domain-pattern): Accept obs-domain as a new argument. (fields): Include obs-resent-rply. * tests/email.scm ("RFC5322 A.6.1. Obsolete addressing"): New test. ("parse email addresses with period in name"): Mark as passing. --- email/email.scm | 170 ++++++++++++++++++++++++++++++++++++++++++++++---------- tests/email.scm | 27 ++++++++- 2 files changed, 165 insertions(+), 32 deletions(-) diff --git a/email/email.scm b/email/email.scm index 361a566..52f7f58 100644 --- a/email/email.scm +++ b/email/email.scm @@ -112,17 +112,41 @@ (and (not-followed-by (or . exceptions)) vchar))) +(define-syntax-rule (define-printable-ascii-character-pattern-with-obsolete + name obsolete-pattern . exceptions) + (define-peg-pattern name body + (or (and (not-followed-by (or . exceptions)) + vchar) + obsolete-pattern))) + ;;; Quoted characters +(define-peg-pattern obs-qp body + (and "\\" (or "\x00" obs-no-ws-ctl "\n" "\r"))) + (define-peg-pattern quoted-pair body - (and (ignore "\\") (or vchar wsp))) + (or (and (ignore "\\") (or vchar wsp)) + obs-qp)) ;;; Folding white space and comments +(define-peg-pattern obs-fws body + (and (+ wsp) (* (and crlf (+ wsp))))) + (define-peg-pattern fws body - (and (? (and (* wsp) crlf)) (+ wsp))) + (or (and (? (and (* wsp) crlf)) (+ wsp)) + obs-fws)) -(define-printable-ascii-character-pattern ctext "(" ")" "\\") +(define-peg-pattern obs-no-ws-ctl body + (and (not-followed-by (or "\t" "\n" "\r")) + (or (range #\soh #\us) + "\x7f"))) + +(define-peg-pattern obs-ctext body + obs-no-ws-ctl) + +(define-printable-ascii-character-pattern-with-obsolete ctext obs-ctext + "(" ")" "\\") (define-syntax-rule (define-comment-pattern name capture-type) (define-peg-pattern name capture-type @@ -144,8 +168,11 @@ (define-printable-ascii-character-pattern atext "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]") -(define-peg-pattern atom body - (and (? cfws) (+ atext) (? cfws))) +(define-syntax-rule (define-atom-pattern name cfws) + (define-peg-pattern name body + (and (? cfws) (+ atext) (? cfws)))) + +(define-atom-pattern atom cfws) (define-peg-pattern dot-atom-text body (and (+ atext) (* (and "." (+ atext))))) @@ -158,7 +185,11 @@ ;;; Quoted strings -(define-printable-ascii-character-pattern qtext "\\" "\"") +(define-peg-pattern obs-qtext body + obs-no-ws-ctl) + +(define-printable-ascii-character-pattern-with-obsolete qtext obs-qtext + "\\" "\"") (define-peg-pattern qcontent body (or qtext quoted-pair)) @@ -175,8 +206,14 @@ (define-peg-pattern word body (or atom quoted-string)) +(define-peg-pattern obs-phrase body + (and word (* (or word "." cfws)))) + +;; We set phrase to be the same as obs-phrase since, according to +;; their definitions in RFC5322, all phrases are obs-phrases. (define-peg-pattern phrase body - (+ word)) + obs-phrase) + ;; ABNF modified to ignore leading whitespace ;; ABNF modified to allow for blank lines in folded field @@ -248,19 +285,31 @@ (define-peg-pattern display-name all phrase) +(define-peg-pattern obs-local-part body + (and word (* (and "." word)))) + (define-peg-pattern local-part body - (or dot-atom quoted-string)) + (or obs-local-part dot-atom quoted-string)) + +(define-peg-pattern obs-dtext body + (or obs-no-ws-ctl quoted-pair)) -(define-printable-ascii-character-pattern dtext "[" "]" "\\") +(define-printable-ascii-character-pattern-with-obsolete dtext obs-dtext "[" "]" "\\") (define-peg-pattern domain-literal body (and (? cfws) "[" (* (and (? fws) dtext)) (? fws) "]" (? cfws))) -(define-syntax-rule (define-domain-pattern name dot-atom) +(define-syntax-rule (define-obs-domain-pattern name atom) (define-peg-pattern name body - (or dot-atom domain-literal))) + (and atom (* (and "." atom))))) -(define-domain-pattern domain dot-atom) +(define-obs-domain-pattern obs-domain atom) + +(define-syntax-rule (define-domain-pattern name obs-domain dot-atom) + (define-peg-pattern name body + (or obs-domain dot-atom domain-literal))) + +(define-domain-pattern domain obs-domain dot-atom) (define-syntax-rule (define-addr-spec-pattern name domain) (define-peg-pattern name body @@ -268,12 +317,25 @@ (define-addr-spec-pattern addr-spec domain) +(define-peg-pattern obs-domain-list body + (and (* (or cfws ",")) "@" domain + (* (and "," (? cfws) (? (and "@" domain)))))) + +(define-peg-pattern obs-route none + (and obs-domain-list ":")) + +(define-peg-pattern obs-angle-addr body + (and (ignore (and (? cfws) "<")) + obs-route addr-spec + (ignore (and ">" (? cfws))))) + (define-syntax-rule (define-angle-addr-pattern name opening-bracket closing-bracket) (define-peg-pattern name body - (and (ignore (? cfws)) - opening-bracket addr-spec closing-bracket - (ignore (? cfws))))) + (or (and (ignore (? cfws)) + opening-bracket addr-spec closing-bracket + (ignore (? cfws))) + obs-angle-addr))) (define-angle-addr-pattern angle-addr "<" ">") @@ -294,36 +356,67 @@ ;; parts of the email. In order to facilitate this, we define a set of ;; captured-* patterns which are used in address fields. +(define-atom-pattern captured-atom captured-cfws) (define-comment-pattern captured-comment all) (define-cfws-pattern captured-cfws captured-comment) (define-dot-atom-pattern captured-dot-atom captured-cfws) -(define-domain-pattern captured-domain captured-dot-atom) +(define-obs-domain-pattern captured-obs-domain captured-atom) +(define-domain-pattern captured-domain captured-obs-domain captured-dot-atom) (define-addr-spec-pattern captured-addr-spec captured-domain) (define-peg-pattern mailbox all (or name-addr captured-addr-spec)) +(define-peg-pattern obs-mbox-list body + (and (* (and (? cfws) ",")) mailbox + (* (and "," (? (or mailbox cfws)))))) + +;; We set mailbox-list to be the same as obs-mbox-list since, +;; according to their definitions in RFC5322, all mailbox-lists are +;; obs-mbox-lists. (define-peg-pattern mailbox-list all - (and mailbox (* (and (ignore ",") mailbox)))) + obs-mbox-list) (define-peg-pattern group all (and display-name (ignore ":") (? group-list) (ignore ";") (? cfws))) +(define-peg-pattern obs-group-list body + (and (+ (and (? cfws) ",")) + (? cfws))) + (define-peg-pattern group-list all - (or mailbox-list cfws)) + (or mailbox-list cfws obs-group-list)) (define-peg-pattern address body (or mailbox group)) +(define-peg-pattern obs-addr-list body + (and (* (and (? cfws) ",")) address + (* (and "," (? (or address cfws)))))) + +;; We set address-list to be the same as obs-addr-list since, +;; according to their definitions in RFC5322, all address-lists are +;; obs-address-lists. (define-peg-pattern address-list all - (and address (* (and (ignore ",") address)))) + obs-addr-list) ;;; Fields -(define-syntax-rule (define-field-pattern name header pattern) - (define-peg-pattern name all - (and (ignore (string-ci header)) (ignore ":") pattern crlf))) +;; We have compressed the current field pattern and the obsolete field +;; pattern into a single PEG pattern. +(define-syntax define-field-pattern + (syntax-rules () + ((define-field-pattern name header pattern) + (define-peg-pattern name all + (and (ignore (string-ci header)) + (ignore (and (* wsp) ":")) + pattern crlf))) + ((define-field-pattern name header pattern obsolete-pattern) + (define-peg-pattern name all + (and (ignore (string-ci header)) + (ignore (and (* wsp) ":")) + (or pattern obsolete-pattern) crlf))))) ;;; Origination date field @@ -331,26 +424,34 @@ ;;; Originator fields -(define-field-pattern from "From" (or mailbox-list address-list)) -(define-field-pattern sender "Sender" (or mailbox address)) +(define-field-pattern from "From" mailbox-list (or mailbox-list address-list)) +(define-field-pattern sender "Sender" mailbox (or mailbox address)) (define-field-pattern reply-to "Reply-To" address-list) ;; Destination address fields (define-field-pattern to "To" address-list) (define-field-pattern cc "Cc" address-list) -(define-field-pattern bcc "Bcc" (? (or address-list cfws))) +(define-field-pattern bcc "Bcc" + (? (or address-list cfws)) + (or address-list (and (* (and (? cfws) ",")) (? cfws)))) ;;; Identification fields (define-peg-pattern no-fold-literal body (and "[" (* dtext) "]")) +(define-peg-pattern obs-id-left body + local-part) + (define-peg-pattern id-left body - dot-atom-text) + (or dot-atom-text obs-id-left)) + +(define-peg-pattern obs-id-right body + domain) (define-peg-pattern id-right body - (or dot-atom-text no-fold-literal)) + (or dot-atom-text no-fold-literal obs-id-right)) (define-peg-pattern msg-id all (and (ignore (? cfws)) (ignore "<") @@ -370,12 +471,20 @@ ;;; Resent fields (define-field-pattern resent-date "Resent-Date" date-time) -(define-field-pattern resent-from "Resent-From" (or mailbox-list address-list)) -(define-field-pattern resent-sender "Resent-Sender" (or mailbox address)) +(define-field-pattern resent-from "Resent-From" + (or mailbox-list address-list) + mailbox-list) +(define-field-pattern resent-sender "Resent-Sender" + (or mailbox address) + mailbox) (define-field-pattern resent-to "Resent-To" address-list) (define-field-pattern resent-cc "Resent-Cc" address-list) -(define-field-pattern resent-bcc "Resent-Bcc" (? (or address-list cfws))) +(define-field-pattern resent-bcc "Resent-Bcc" + (? (or address-list cfws)) + (or address-list + (and (* (and (? cfws) ",")) (? cfws)))) (define-field-pattern resent-msg-id "Resent-Message-ID" msg-id) +(define-field-pattern obs-resent-rply "Resent-Reply-To" address-list) ;;; Trace fields @@ -519,6 +628,7 @@ resent-cc resent-bcc resent-msg-id + obs-resent-rply orig-date from sender diff --git a/tests/email.scm b/tests/email.scm index a2b6361..8a7be9b 100644 --- a/tests/email.scm +++ b/tests/email.scm @@ -190,6 +190,31 @@ Hi everyone.") (message-id . "5678.21-Nov-1997@example.com")) "Hi everyone.")) +(test-email= "RFC5322 A.6.1. Obsolete addressing" + (parse-email + (string->bytevector + "From: Joe Q. Public +To: Mary Smith <@node.test:mary@example.net>, , jdoe@test . example +Date: Tue, 1 Jul 2003 10:52:37 +0200 +Message-ID: <5678.21-Nov-1997@example.com> + +Hi everyone. +" + "utf-8")) + (make-email + `((from ((name . "Joe Q. Public") + (address . "john.q.public@example.com"))) + (to ((name . "Mary Smith") + (address . "mary@example.net")) + ((address . "jdoe@test . example"))) + (date . ,(make-date 0 37 52 10 1 7 2003 7200)) + (message-id . "5678.21-Nov-1997@example.com") + (content-type (type . text) + (subtype . plain) + (charset . "utf-8")) + (content-transfer-encoding . 7bit)) + "Hi everyone.")) + (test-email= "RFC2046 5.1.1. Common syntax" (parse-email (string->bytevector @@ -410,8 +435,6 @@ copyright =A9") (parse-email-address "foo@example.org (Foo)") '((name . "Foo") (address . "foo@example.org"))) -(test-expect-fail "parse email addresses with period in name") - (test-equal "parse email addresses with period in name" (parse-email-address "Foo P. Bar ") '((name . "Foo P. Bar") (address . "foo@example.com"))) -- cgit v1.2.3