diff options
author | Arun Isaac | 2019-10-01 22:07:32 +0530 |
---|---|---|
committer | Arun Isaac | 2019-10-01 22:07:32 +0530 |
commit | 6159257c9da3664c516a26fad1ee942068fbbdd2 (patch) | |
tree | 3504d2a434d0e8989bae9084063df1d0c89d63ce /email | |
parent | 9d82904011516530b6ef1bcd53cef220db485e7a (diff) | |
download | guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.gz guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.tar.lz guile-email-6159257c9da3664c516a26fad1ee942068fbbdd2.zip |
email: Tolerate invalid charset.
* email/email.scm (post-process-content-type): If charset is invalid,
assume default UTF-8 as charset.
* tests/email.scm ("tolerate invalid charset"): New test.
Reported-by: Ricardo Wurmus <rekado@elephly.net>
Diffstat (limited to 'email')
-rw-r--r-- | email/email.scm | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/email/email.scm b/email/email.scm index a791a0e..666bd3e 100644 --- a/email/email.scm +++ b/email/email.scm @@ -643,28 +643,45 @@ values. The returned headers is a string and body is a bytevector." (define (post-process-content-transfer-encoding _ value) (list 'content-transfer-encoding (string->lcase-symbol value))) -(define post-process-content-type - (match-lambda* +(define (post-process-content-type . args) + (define (valid-charset? charset) + (catch #t + (lambda () + (bytevector->string (make-bytevector 0 0) charset) + #t) + (const #f))) + + (match args (`(content (type ,type) (subtype ,subtype) . ,parameters) (let ((type (string->lcase-symbol type)) - (subtype (string->lcase-symbol subtype))) + (subtype (string->lcase-symbol subtype)) + (parameters + (map (match-lambda + (`(parameter (attribute ,attribute) + (value ,value)) + (cons (string->lcase-symbol attribute) value))) + (flatten-and-filter '(parameter) parameters)))) `(content-type - ,(acons* 'type type - 'subtype subtype - (let ((parameters - (map (match-lambda - (`(parameter (attribute ,attribute) - (value ,value)) - (cons (string->lcase-symbol attribute) value))) - (flatten-and-filter '(parameter) parameters)))) - ;; RFC6657 specifies UTF-8 as the default charset - ;; for text/* media types. - (if (and (eq? type 'text) - (not (assoc-ref parameters 'charset))) - (acons 'charset "utf-8" parameters) - parameters)))))))) + ,(acons* + 'type type + 'subtype subtype + (cond + ((and (eq? type 'text) + (or + ;; RFC6657 specifies UTF-8 as the default charset for + ;; text/* media types. + (not (assoc-ref parameters 'charset)) + ;; RFC2045 recommends that the default be assumed + ;; when a syntactically invalid Content-Type header + ;; is encountered. In this implementation, we are + ;; only checking for the validity of the + ;; charset. Should we check for the validity of the + ;; entire Content-Type header? If so, how? + (not (valid-charset? (assoc-ref parameters 'charset))))) + (acons 'charset "utf-8" parameters)) + (else parameters)))))))) (define post-process-content-disposition (match-lambda* |