From 1f042830391494f4ad48036359d2788bf0527dba Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sat, 5 Dec 2020 19:33:52 +0530 Subject: email: Do not capture cfws in atoms and dot-atoms. * email/email.scm (define-atom-pattern): Do not capture cfws unless specified. (atom): Do not specify cfws. (define-dot-atom-pattern): Do not capture cfws. (define-word-pattern): New macro. (cfws-captured-atom, cfws-captured-word): New patterns. (obs-phrase): Use cfws-captured-word. (received-token): Capture all. (parse-mime-entity): Post process received and received-token. * tests/email.scm ("parse email headers"): Fix test. --- email/email.scm | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'email') diff --git a/email/email.scm b/email/email.scm index 56c1b57..398ddb3 100644 --- a/email/email.scm +++ b/email/email.scm @@ -168,18 +168,23 @@ (define-printable-ascii-character-pattern atext "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]") -(define-syntax-rule (define-atom-pattern name cfws) - (define-peg-pattern name body - (and (? cfws) (+ atext) (? cfws)))) +(define-syntax define-atom-pattern + (syntax-rules () + ((define-atom-pattern name) + (define-peg-pattern name body + (and (ignore (? cfws)) (+ atext) (ignore (? cfws))))) + ((define-atom-pattern name cfws) + (define-peg-pattern name body + (and (? cfws) (+ atext) (? cfws)))))) -(define-atom-pattern atom cfws) +(define-atom-pattern atom) (define-peg-pattern dot-atom-text body (and (+ atext) (* (and "." (+ atext))))) (define-syntax-rule (define-dot-atom-pattern name cfws) (define-peg-pattern name body - (and (? cfws) dot-atom-text (? cfws)))) + (and (ignore (? cfws)) dot-atom-text (ignore (? cfws))))) (define-dot-atom-pattern dot-atom cfws) @@ -203,11 +208,21 @@ ;;; Miscellaneous tokens -(define-peg-pattern word body - (or atom quoted-string)) +;; According to RFC5322ยง3.2.3, the leading and trailing cfws in atom +;; are semantically not part of it, and should be ignored. But, to +;; support obs-phrase, we need to capture it. Hence, we define a set +;; of cfws-captured-* patterns needed by obs-phrase. + +(define-syntax-rule (define-word-pattern name atom) + (define-peg-pattern name body + (or atom quoted-string))) + +(define-word-pattern word atom) +(define-atom-pattern cfws-captured-atom cfws) +(define-word-pattern cfws-captured-word cfws-captured-atom) (define-peg-pattern obs-phrase body - (and word (* (or word "." cfws)))) + (and cfws-captured-word (* (or "." cfws word)))) ;; We set phrase to be the same as obs-phrase since, according to ;; their definitions in RFC5322, all phrases are obs-phrases. @@ -526,7 +541,7 @@ ;;; Trace fields ;; word is given last priority in the ordered choice -(define-peg-pattern received-token body +(define-peg-pattern received-token all (or angle-addr addr-spec domain word)) (define-field-pattern received "Received" @@ -988,7 +1003,12 @@ list of header keys and values." (pre-post-order (peg:tree (match-pattern fields headers)) - `((date-time . ,(lambda node + `((received . ,(match-lambda* + (`(received ,tokens ,timestamp) + (list 'received (string-join tokens) timestamp)))) + (received-token . ,(match-lambda* + (`(received-token ,token) token))) + (date-time . ,(lambda node (match-let ((`((day ,day) (month ,month) (year ,year) (hours ,hours) (minutes ,minutes) (seconds ,seconds) (zone . ,zone)) -- cgit v1.2.3