From 9ac10570fca04800f04a439e484f41421864b99f Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sat, 27 Jul 2019 20:25:29 +0530 Subject: email: Decode MIME entities without headers. * email/email.scm (email->headers+body): If there are no headers, return "" as headers not an eof-object. (parse-email-body): Parse headers of parent entity or email to parse-mime-entity. (add-default-mime-entity-headers): New function. (parse-mime-entity): Use add-default-mime-entity-headers instead of add-default-headers. Handle MIME entities without headers. * tests/email.scm ("decode MIME entity without headers"): New test. --- email/email.scm | 39 ++++++++++++++++++++++++++++++++++----- tests/email.scm | 17 +++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/email/email.scm b/email/email.scm index 381c81a..565f8e7 100644 --- a/email/email.scm +++ b/email/email.scm @@ -611,7 +611,8 @@ values. The returned headers is a string and body is a bytevector." (not (or (string= line "\n") (string= line "\r\n"))))))) (get-line-with-delimiter port) - (values headers (get-bytevector-all port)))))) + (values (if (eof-object? headers) "" headers) + (get-bytevector-all port)))))) (define (post-process-mailbox . args) (define process-name @@ -731,7 +732,7 @@ message. Else, return a single record." (let ((content-type (assoc-ref headers 'content-type))) (case (assoc-ref content-type 'type) ((multipart) - (map parse-mime-entity + (map (cut parse-mime-entity headers <>) (body->mime-entities body (assoc-ref content-type 'boundary)))) ((text) (string-trim-both @@ -756,7 +757,29 @@ message. Else, return a single record." default-headers) headers))) -(define (parse-mime-entity bv) +(define (add-default-mime-entity-headers parent-headers headers) + ;; Default Content-Type and Content-Transfer-Encoding headers as + ;; specified in RFC2045 and RFC2046 + (let* ((parent-content-type (assoc-ref parent-headers 'content-type)) + (default-headers + (acons* 'content-type `(,@(if (and (eq? (assoc-ref parent-content-type 'type) 'multipart) + (eq? (assoc-ref parent-content-type 'subtype) 'digest)) + '((type . message) + (subtype . rfc822)) + '((type . text) + (subtype . plain))) + ;; UTF-8 is specified as the default + ;; charset in RFC6657 + (charset . "utf-8")) + 'content-transfer-encoding '#{7bit}#))) + (append (alist-delete* (append (if (assoc-ref headers 'content-type) + (list 'content-type) (list)) + (if (assoc-ref headers 'content-transfer-encoding) + (list 'content-transfer-encoding) (list))) + default-headers) + headers))) + +(define (parse-mime-entity parent-headers bv) (let-values (((headers body) (email->headers+body bv))) (let ((headers (pre-post-order @@ -767,14 +790,20 @@ message. Else, return a single record." (disposition . ,post-process-content-disposition) (optional-field . ,post-process-optional-field) (mime-entity-fields . ,(lambda (_ . mime-entity-fields) - (add-default-headers + (add-default-mime-entity-headers + parent-headers (map (match-lambda ((mime-entity-field value) (cons mime-entity-field value)) ((mime-entity-field . values) (cons mime-entity-field values))) mime-entity-fields)))) - (*text* . ,(lambda (_ text) text)) + (*text* . ,(match-lambda* + ;; Handle MIME entities that have no + ;; headers. + ((_ 'mime-entity-fields) + (add-default-mime-entity-headers parent-headers '())) + ((_ text) text))) (*default* . ,(lambda tree tree)))))) (make-mime-entity headers (parse-email-body headers body))))) diff --git a/tests/email.scm b/tests/email.scm index 856a5b9..177391e 100644 --- a/tests/email.scm +++ b/tests/email.scm @@ -102,6 +102,23 @@ Content-Length: 4349 (x-mailer . "FooMail 4.0 4.03 (SMT460B92F)") (content-length . "4349"))) +(test-equal "decode MIME entity without headers" + ((module-ref (resolve-module '(email email)) + 'parse-mime-entity) + '((content-type (type . multipart) + (subtype . mixed))) + (string->bytevector + " +This is implicitly typed plain US-ASCII text. +It does NOT end with a linebreak. +" "utf-8")) + (make-mime-entity '((content-type (type . text) + (subtype . plain) + (charset . "utf-8")) + (content-transfer-encoding . 7bit)) + "This is implicitly typed plain US-ASCII text. +It does NOT end with a linebreak.")) + (test-equal "email with 8 bit encoding and non UTF-8 charset" (call-with-input-file "tests/email-with-8bit-encoding-and-non-utf8-charset" (compose parse-email get-bytevector-all)) -- cgit v1.2.3