diff options
author | Arun Isaac | 2020-12-05 19:33:52 +0530 |
---|---|---|
committer | Arun Isaac | 2020-12-05 19:33:52 +0530 |
commit | 1f042830391494f4ad48036359d2788bf0527dba (patch) | |
tree | 2da68f9b46fdbba5744dc4b99985c96aa693a45e /email | |
parent | e65b6bacc2adc0c72711ed3b39e533cb6186f3b1 (diff) | |
download | guile-email-1f042830391494f4ad48036359d2788bf0527dba.tar.gz guile-email-1f042830391494f4ad48036359d2788bf0527dba.tar.lz guile-email-1f042830391494f4ad48036359d2788bf0527dba.zip |
email: Do not capture cfws in atoms and dot-atoms.
* email/email.scm (define-atom-pattern): Do not capture cfws unless
specified.
(atom): Do not specify cfws.
(define-dot-atom-pattern): Do not capture cfws.
(define-word-pattern): New macro.
(cfws-captured-atom, cfws-captured-word): New patterns.
(obs-phrase): Use cfws-captured-word.
(received-token): Capture all.
(parse-mime-entity): Post process received and received-token.
* tests/email.scm ("parse email headers"): Fix test.
Diffstat (limited to 'email')
-rw-r--r-- | email/email.scm | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/email/email.scm b/email/email.scm index 56c1b57..398ddb3 100644 --- a/email/email.scm +++ b/email/email.scm @@ -168,18 +168,23 @@ (define-printable-ascii-character-pattern atext "\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]") -(define-syntax-rule (define-atom-pattern name cfws) - (define-peg-pattern name body - (and (? cfws) (+ atext) (? cfws)))) +(define-syntax define-atom-pattern + (syntax-rules () + ((define-atom-pattern name) + (define-peg-pattern name body + (and (ignore (? cfws)) (+ atext) (ignore (? cfws))))) + ((define-atom-pattern name cfws) + (define-peg-pattern name body + (and (? cfws) (+ atext) (? cfws)))))) -(define-atom-pattern atom cfws) +(define-atom-pattern atom) (define-peg-pattern dot-atom-text body (and (+ atext) (* (and "." (+ atext))))) (define-syntax-rule (define-dot-atom-pattern name cfws) (define-peg-pattern name body - (and (? cfws) dot-atom-text (? cfws)))) + (and (ignore (? cfws)) dot-atom-text (ignore (? cfws))))) (define-dot-atom-pattern dot-atom cfws) @@ -203,11 +208,21 @@ ;;; Miscellaneous tokens -(define-peg-pattern word body - (or atom quoted-string)) +;; According to RFC5322ยง3.2.3, the leading and trailing cfws in atom +;; are semantically not part of it, and should be ignored. But, to +;; support obs-phrase, we need to capture it. Hence, we define a set +;; of cfws-captured-* patterns needed by obs-phrase. + +(define-syntax-rule (define-word-pattern name atom) + (define-peg-pattern name body + (or atom quoted-string))) + +(define-word-pattern word atom) +(define-atom-pattern cfws-captured-atom cfws) +(define-word-pattern cfws-captured-word cfws-captured-atom) (define-peg-pattern obs-phrase body - (and word (* (or word "." cfws)))) + (and cfws-captured-word (* (or "." cfws word)))) ;; We set phrase to be the same as obs-phrase since, according to ;; their definitions in RFC5322, all phrases are obs-phrases. @@ -526,7 +541,7 @@ ;;; Trace fields ;; word is given last priority in the ordered choice -(define-peg-pattern received-token body +(define-peg-pattern received-token all (or angle-addr addr-spec domain word)) (define-field-pattern received "Received" @@ -988,7 +1003,12 @@ list of header keys and values." (pre-post-order (peg:tree (match-pattern fields headers)) - `((date-time . ,(lambda node + `((received . ,(match-lambda* + (`(received ,tokens ,timestamp) + (list 'received (string-join tokens) timestamp)))) + (received-token . ,(match-lambda* + (`(received-token ,token) token))) + (date-time . ,(lambda node (match-let ((`((day ,day) (month ,month) (year ,year) (hours ,hours) (minutes ,minutes) (seconds ,seconds) (zone . ,zone)) |