From 6159257c9da3664c516a26fad1ee942068fbbdd2 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Tue, 1 Oct 2019 22:07:32 +0530 Subject: email: Tolerate invalid charset. * email/email.scm (post-process-content-type): If charset is invalid, assume default UTF-8 as charset. * tests/email.scm ("tolerate invalid charset"): New test. Reported-by: Ricardo Wurmus --- email/email.scm | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) (limited to 'email/email.scm') diff --git a/email/email.scm b/email/email.scm index a791a0e..666bd3e 100644 --- a/email/email.scm +++ b/email/email.scm @@ -643,28 +643,45 @@ values. The returned headers is a string and body is a bytevector." (define (post-process-content-transfer-encoding _ value) (list 'content-transfer-encoding (string->lcase-symbol value))) -(define post-process-content-type - (match-lambda* +(define (post-process-content-type . args) + (define (valid-charset? charset) + (catch #t + (lambda () + (bytevector->string (make-bytevector 0 0) charset) + #t) + (const #f))) + + (match args (`(content (type ,type) (subtype ,subtype) . ,parameters) (let ((type (string->lcase-symbol type)) - (subtype (string->lcase-symbol subtype))) + (subtype (string->lcase-symbol subtype)) + (parameters + (map (match-lambda + (`(parameter (attribute ,attribute) + (value ,value)) + (cons (string->lcase-symbol attribute) value))) + (flatten-and-filter '(parameter) parameters)))) `(content-type - ,(acons* 'type type - 'subtype subtype - (let ((parameters - (map (match-lambda - (`(parameter (attribute ,attribute) - (value ,value)) - (cons (string->lcase-symbol attribute) value))) - (flatten-and-filter '(parameter) parameters)))) - ;; RFC6657 specifies UTF-8 as the default charset - ;; for text/* media types. - (if (and (eq? type 'text) - (not (assoc-ref parameters 'charset))) - (acons 'charset "utf-8" parameters) - parameters)))))))) + ,(acons* + 'type type + 'subtype subtype + (cond + ((and (eq? type 'text) + (or + ;; RFC6657 specifies UTF-8 as the default charset for + ;; text/* media types. + (not (assoc-ref parameters 'charset)) + ;; RFC2045 recommends that the default be assumed + ;; when a syntactically invalid Content-Type header + ;; is encountered. In this implementation, we are + ;; only checking for the validity of the + ;; charset. Should we check for the validity of the + ;; entire Content-Type header? If so, how? + (not (valid-charset? (assoc-ref parameters 'charset))))) + (acons 'charset "utf-8" parameters)) + (else parameters)))))))) (define post-process-content-disposition (match-lambda* -- cgit v1.2.3