aboutsummaryrefslogtreecommitdiff
path: root/email
diff options
context:
space:
mode:
authorArun Isaac2019-10-01 22:07:32 +0530
committerArun Isaac2019-10-01 22:07:32 +0530
commit6159257c9da3664c516a26fad1ee942068fbbdd2 (patch)
tree3504d2a434d0e8989bae9084063df1d0c89d63ce /email
parent9d82904011516530b6ef1bcd53cef220db485e7a (diff)
downloadguile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.gz
guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.lz
guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.zip
email: Tolerate invalid charset.
* email/email.scm (post-process-content-type): If charset is invalid, assume default UTF-8 as charset. * tests/email.scm ("tolerate invalid charset"): New test. Reported-by: Ricardo Wurmus <rekado@elephly.net>
Diffstat (limited to 'email')
-rw-r--r--email/email.scm51
1 files changed, 34 insertions, 17 deletions
diff --git a/email/email.scm b/email/email.scm
index a791a0e..666bd3e 100644
--- a/email/email.scm
+++ b/email/email.scm
@@ -643,28 +643,45 @@ values. The returned headers is a string and body is a bytevector."
(define (post-process-content-transfer-encoding _ value)
(list 'content-transfer-encoding (string->lcase-symbol value)))
-(define post-process-content-type
- (match-lambda*
+(define (post-process-content-type . args)
+ (define (valid-charset? charset)
+ (catch #t
+ (lambda ()
+ (bytevector->string (make-bytevector 0 0) charset)
+ #t)
+ (const #f)))
+
+ (match args
(`(content (type ,type)
(subtype ,subtype)
. ,parameters)
(let ((type (string->lcase-symbol type))
- (subtype (string->lcase-symbol subtype)))
+ (subtype (string->lcase-symbol subtype))
+ (parameters
+ (map (match-lambda
+ (`(parameter (attribute ,attribute)
+ (value ,value))
+ (cons (string->lcase-symbol attribute) value)))
+ (flatten-and-filter '(parameter) parameters))))
`(content-type
- ,(acons* 'type type
- 'subtype subtype
- (let ((parameters
- (map (match-lambda
- (`(parameter (attribute ,attribute)
- (value ,value))
- (cons (string->lcase-symbol attribute) value)))
- (flatten-and-filter '(parameter) parameters))))
- ;; RFC6657 specifies UTF-8 as the default charset
- ;; for text/* media types.
- (if (and (eq? type 'text)
- (not (assoc-ref parameters 'charset)))
- (acons 'charset "utf-8" parameters)
- parameters))))))))
+ ,(acons*
+ 'type type
+ 'subtype subtype
+ (cond
+ ((and (eq? type 'text)
+ (or
+ ;; RFC6657 specifies UTF-8 as the default charset for
+ ;; text/* media types.
+ (not (assoc-ref parameters 'charset))
+ ;; RFC2045 recommends that the default be assumed
+ ;; when a syntactically invalid Content-Type header
+ ;; is encountered. In this implementation, we are
+ ;; only checking for the validity of the
+ ;; charset. Should we check for the validity of the
+ ;; entire Content-Type header? If so, how?
+ (not (valid-charset? (assoc-ref parameters 'charset)))))
+ (acons 'charset "utf-8" parameters))
+ (else parameters))))))))
(define post-process-content-disposition
(match-lambda*