From 1f042830391494f4ad48036359d2788bf0527dba Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sat, 5 Dec 2020 19:33:52 +0530 Subject: email: Do not capture cfws in atoms and dot-atoms. * email/email.scm (define-atom-pattern): Do not capture cfws unless specified. (atom): Do not specify cfws. (define-dot-atom-pattern): Do not capture cfws. (define-word-pattern): New macro. (cfws-captured-atom, cfws-captured-word): New patterns. (obs-phrase): Use cfws-captured-word. (received-token): Capture all. (parse-mime-entity): Post process received and received-token. * tests/email.scm ("parse email headers"): Fix test. --- email/email.scm | 40 ++++++++++++++++++++++++++++++---------- tests/email.scm | 6 +++--- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/email/email.scm b/email/email.scm index 56c1b57..398ddb3 100644 --- a/email/email.scm +++ b/email/email.scm @@ -168,18 +168,23 @@ (define-printable-ascii-character-pattern atext "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]") -(define-syntax-rule (define-atom-pattern name cfws) - (define-peg-pattern name body - (and (? cfws) (+ atext) (? cfws)))) +(define-syntax define-atom-pattern + (syntax-rules () + ((define-atom-pattern name) + (define-peg-pattern name body + (and (ignore (? cfws)) (+ atext) (ignore (? cfws))))) + ((define-atom-pattern name cfws) + (define-peg-pattern name body + (and (? cfws) (+ atext) (? cfws)))))) -(define-atom-pattern atom cfws) +(define-atom-pattern atom) (define-peg-pattern dot-atom-text body (and (+ atext) (* (and "." (+ atext))))) (define-syntax-rule (define-dot-atom-pattern name cfws) (define-peg-pattern name body - (and (? cfws) dot-atom-text (? cfws)))) + (and (ignore (? cfws)) dot-atom-text (ignore (? cfws))))) (define-dot-atom-pattern dot-atom cfws) @@ -203,11 +208,21 @@ ;;; Miscellaneous tokens -(define-peg-pattern word body - (or atom quoted-string)) +;; According to RFC5322ยง3.2.3, the leading and trailing cfws in atom +;; are semantically not part of it, and should be ignored. But, to +;; support obs-phrase, we need to capture it. Hence, we define a set +;; of cfws-captured-* patterns needed by obs-phrase. + +(define-syntax-rule (define-word-pattern name atom) + (define-peg-pattern name body + (or atom quoted-string))) + +(define-word-pattern word atom) +(define-atom-pattern cfws-captured-atom cfws) +(define-word-pattern cfws-captured-word cfws-captured-atom) (define-peg-pattern obs-phrase body - (and word (* (or word "." cfws)))) + (and cfws-captured-word (* (or "." cfws word)))) ;; We set phrase to be the same as obs-phrase since, according to ;; their definitions in RFC5322, all phrases are obs-phrases. @@ -526,7 +541,7 @@ ;;; Trace fields ;; word is given last priority in the ordered choice -(define-peg-pattern received-token body +(define-peg-pattern received-token all (or angle-addr addr-spec domain word)) (define-field-pattern received "Received" @@ -988,7 +1003,12 @@ list of header keys and values." (pre-post-order (peg:tree (match-pattern fields headers)) - `((date-time . ,(lambda node + `((received . ,(match-lambda* + (`(received ,tokens ,timestamp) + (list 'received (string-join tokens) timestamp)))) + (received-token . ,(match-lambda* + (`(received-token ,token) token))) + (date-time . ,(lambda node (match-let ((`((day ,day) (month ,month) (year ,year) (hours ,hours) (minutes ,minutes) (seconds ,seconds) (zone . ,zone)) diff --git a/tests/email.scm b/tests/email.scm index d3801a9..766c402 100644 --- a/tests/email.scm +++ b/tests/email.scm @@ -110,11 +110,11 @@ X-MSMail-Priority: Normal X-mailer: FooMail 4.0 4.03 (SMT460B92F) Content-Length: 4349 ") - `((trace (received " by foo.bar.com id ZZZ55555" + `((trace (received "by foo.bar.com id ZZZ55555" ,(make-date 0 4 38 16 31 5 2001 -36000)) - (received " from ooo.ooo.com \tby foo.bar.com with ESMTP id ZZZ55555\tfor " + (received "from ooo.ooo.com by foo.bar.com with ESMTP id ZZZ55555 for " ,(make-date 0 2 38 16 31 5 2001 -36000)) - (received " from zzz by ooo.ooo.com with Maccrosoft SMTPSVC" + (received "from zzz by ooo.ooo.com with Maccrosoft SMTPSVC" ,(make-date 0 16 33 22 31 5 2001 -14400))) (message-id . "beefbeefbeefbeef@ooo.ooo.com") (subject . "Bogus Tester") -- cgit v1.2.3