aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac <arunisaac@systemreboot.net>2019-10-01 22:07:32 +0530
committerArun Isaac <arunisaac@systemreboot.net>2019-10-01 22:07:32 +0530
commit6159257c9da3664c516a26fad1ee942068fbbdd2 (patch)
tree3504d2a434d0e8989bae9084063df1d0c89d63ce
parent9d82904011516530b6ef1bcd53cef220db485e7a (diff)
downloadguile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.gz
guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.lz
guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.zip
email: Tolerate invalid charset.
* email/email.scm (post-process-content-type): If charset is invalid, assume default UTF-8 as charset. * tests/email.scm ("tolerate invalid charset"): New test. Reported-by: Ricardo Wurmus <rekado@elephly.net>
-rw-r--r--email/email.scm51
-rw-r--r--tests/email.scm10
2 files changed, 44 insertions, 17 deletions
diff --git a/email/email.scm b/email/email.scm
index a791a0e..666bd3e 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -643,28 +643,45 @@ values. The returned headers is a string and body is a bytevector."
(define (post-process-content-transfer-encoding _ value)
(list 'content-transfer-encoding (string->lcase-symbol value)))
-(define post-process-content-type
- (match-lambda*
+(define (post-process-content-type . args)
+ (define (valid-charset? charset)
+ (catch #t
+ (lambda ()
+ (bytevector->string (make-bytevector 0 0) charset)
+ #t)
+ (const #f)))
+
+ (match args
(`(content (type ,type)
(subtype ,subtype)
. ,parameters)
(let ((type (string->lcase-symbol type))
- (subtype (string->lcase-symbol subtype)))
+ (subtype (string->lcase-symbol subtype))
+ (parameters
+ (map (match-lambda
+ (`(parameter (attribute ,attribute)
+ (value ,value))
+ (cons (string->lcase-symbol attribute) value)))
+ (flatten-and-filter '(parameter) parameters))))
`(content-type
- ,(acons* 'type type
- 'subtype subtype
- (let ((parameters
- (map (match-lambda
- (`(parameter (attribute ,attribute)
- (value ,value))
- (cons (string->lcase-symbol attribute) value)))
- (flatten-and-filter '(parameter) parameters))))
- ;; RFC6657 specifies UTF-8 as the default charset
- ;; for text/* media types.
- (if (and (eq? type 'text)
- (not (assoc-ref parameters 'charset)))
- (acons 'charset "utf-8" parameters)
- parameters))))))))
+ ,(acons*
+ 'type type
+ 'subtype subtype
+ (cond
+ ((and (eq? type 'text)
+ (or
+ ;; RFC6657 specifies UTF-8 as the default charset for
+ ;; text/* media types.
+ (not (assoc-ref parameters 'charset))
+ ;; RFC2045 recommends that the default be assumed
+ ;; when a syntactically invalid Content-Type header
+ ;; is encountered. In this implementation, we are
+ ;; only checking for the validity of the
+ ;; charset. Should we check for the validity of the
+ ;; entire Content-Type header? If so, how?
+ (not (valid-charset? (assoc-ref parameters 'charset)))))
+ (acons 'charset "utf-8" parameters))
+ (else parameters))))))))
(define post-process-content-disposition
(match-lambda*
diff --git a/tests/email.scm b/tests/email.scm
index ca0ebc3..1a0ec34 100644
--- a/tests/email.scm
+++ b/tests/email.scm
@@ -312,6 +312,16 @@ body" "iso-8859-1"))
(address . "foo@bar.org"))))
"body"))
+(test-equal "tolerate invalid charset"
+ (parse-email-headers
+ "Content-Type: text/plain; charset=foo
+")
+ `((content-transfer-encoding . 7bit)
+ (content-type (type . text)
+ (subtype . plain)
+ (charset . "utf-8")
+ (charset . "foo"))))
+
(test-equal "parse name-addr email address"
(parse-email-address "Foo <foo@example.org>")
'((name . "Foo") (address . "foo@example.org")))