summaryrefslogtreecommitdiff
path: root/email
diff options
context:
space:
mode:
authorArun Isaac2020-12-05 19:33:52 +0530
committerArun Isaac2020-12-05 19:33:52 +0530
commit1f042830391494f4ad48036359d2788bf0527dba (patch)
tree2da68f9b46fdbba5744dc4b99985c96aa693a45e /email
parente65b6bacc2adc0c72711ed3b39e533cb6186f3b1 (diff)
downloadguile-email-1f042830391494f4ad48036359d2788bf0527dba.tar.gz
guile-email-1f042830391494f4ad48036359d2788bf0527dba.tar.lz
guile-email-1f042830391494f4ad48036359d2788bf0527dba.zip
email: Do not capture cfws in atoms and dot-atoms.
* email/email.scm (define-atom-pattern): Do not capture cfws unless specified. (atom): Do not specify cfws. (define-dot-atom-pattern): Do not capture cfws. (define-word-pattern): New macro. (cfws-captured-atom, cfws-captured-word): New patterns. (obs-phrase): Use cfws-captured-word. (received-token): Capture all. (parse-mime-entity): Post process received and received-token. * tests/email.scm ("parse email headers"): Fix test.
Diffstat (limited to 'email')
-rw-r--r--email/email.scm40
1 files changed, 30 insertions, 10 deletions
diff --git a/email/email.scm b/email/email.scm
index 56c1b57..398ddb3 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -168,18 +168,23 @@
(define-printable-ascii-character-pattern atext
"\"" "(" ")" "," "." ":" ";" "<" ">" "@" "[" "\\" "]")
-(define-syntax-rule (define-atom-pattern name cfws)
- (define-peg-pattern name body
- (and (? cfws) (+ atext) (? cfws))))
+(define-syntax define-atom-pattern
+ (syntax-rules ()
+ ((define-atom-pattern name)
+ (define-peg-pattern name body
+ (and (ignore (? cfws)) (+ atext) (ignore (? cfws)))))
+ ((define-atom-pattern name cfws)
+ (define-peg-pattern name body
+ (and (? cfws) (+ atext) (? cfws))))))
-(define-atom-pattern atom cfws)
+(define-atom-pattern atom)
(define-peg-pattern dot-atom-text body
(and (+ atext) (* (and "." (+ atext)))))
(define-syntax-rule (define-dot-atom-pattern name cfws)
(define-peg-pattern name body
- (and (? cfws) dot-atom-text (? cfws))))
+ (and (ignore (? cfws)) dot-atom-text (ignore (? cfws)))))
(define-dot-atom-pattern dot-atom cfws)
@@ -203,11 +208,21 @@
;;; Miscellaneous tokens
-(define-peg-pattern word body
- (or atom quoted-string))
+;; According to RFC5322ยง3.2.3, the leading and trailing cfws in atom
+;; are semantically not part of it, and should be ignored. But, to
+;; support obs-phrase, we need to capture it. Hence, we define a set
+;; of cfws-captured-* patterns needed by obs-phrase.
+
+(define-syntax-rule (define-word-pattern name atom)
+ (define-peg-pattern name body
+ (or atom quoted-string)))
+
+(define-word-pattern word atom)
+(define-atom-pattern cfws-captured-atom cfws)
+(define-word-pattern cfws-captured-word cfws-captured-atom)
(define-peg-pattern obs-phrase body
- (and word (* (or word "." cfws))))
+ (and cfws-captured-word (* (or "." cfws word))))
;; We set phrase to be the same as obs-phrase since, according to
;; their definitions in RFC5322, all phrases are obs-phrases.
@@ -526,7 +541,7 @@
;;; Trace fields
;; word is given last priority in the ordered choice
-(define-peg-pattern received-token body
+(define-peg-pattern received-token all
(or angle-addr addr-spec domain word))
(define-field-pattern received "Received"
@@ -988,7 +1003,12 @@ list of header keys and values."
(pre-post-order
(peg:tree
(match-pattern fields headers))
- `((date-time . ,(lambda node
+ `((received . ,(match-lambda*
+ (`(received ,tokens ,timestamp)
+ (list 'received (string-join tokens) timestamp))))
+ (received-token . ,(match-lambda*
+ (`(received-token ,token) token)))
+ (date-time . ,(lambda node
(match-let
((`((day ,day) (month ,month) (year ,year)
(hours ,hours) (minutes ,minutes) (seconds ,seconds) (zone . ,zone))