summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--email/base64.scm394
1 files changed, 151 insertions, 243 deletions
diff --git a/email/base64.scm b/email/base64.scm
index e638b55..bcf9f97 100644
--- a/email/base64.scm
+++ b/email/base64.scm
@@ -1,259 +1,167 @@
-;; -*- mode: scheme; coding: utf-8 -*-
-;;
-;; This module was renamed from (weinholt text base64 (1 0 20100612)) to
-;; (guix base64) by Nikita Karetnikov <nikita@karetnikov.org> on
-;; February 12, 2014.
-;;
-;; Some optimizations made by Ludovic Courtès <ludo@gnu.org>, 2015.
-;; Turned into a Guile module (instead of R6RS).
-;;
-;; This module was imported into the source tree of guile-email by
-;; Arun Isaac <arunisaac@systemreboot.net> on September 6, 2018.
-;;
-;; This program is free software: you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation, either version 3 of the License, or
-;; (at your option) any later version.
-;;
-;; This program is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
-;;
-;; This file incorporates work covered by the following copyright and  
-;; permission notice:
-;;
-;;   Copyright © 2009, 2010 Göran Weinholt <goran@weinholt.se>
-;;
-;;   Permission is hereby granted, free of charge, to any person obtaining a
-;;   copy of this software and associated documentation files (the "Software"),
-;;   to deal in the Software without restriction, including without limitation
-;;   the rights to use, copy, modify, merge, publish, distribute, sublicense,
-;;   and/or sell copies of the Software, and to permit persons to whom the
-;;   Software is furnished to do so, subject to the following conditions:
-;;
-;;   The above copyright notice and this permission notice shall be included in
-;;   all copies or substantial portions of the Software.
-;;
-;;   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-;;   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-;;   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-;;   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-;;   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-;;   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-;;   DEALINGS IN THE SOFTWARE.
-
-;; RFC 4648 Base-N Encodings
+;;; guile-email --- Guile email parser
+;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email.  If not, see
+;;; <http://www.gnu.org/licenses/>.
 
 (define-module (email base64)
   #:export (base64-encode
-            base64-decode
-            base64-alphabet
-            base64url-alphabet
-            get-delimited-base64
-            put-delimited-base64)
-  #:use-module (rnrs arithmetic bitwise)
-  #:use-module (rnrs arithmetic fixnums)
-  #:use-module (rnrs base)
+            base64-decode)
   #:use-module (rnrs bytevectors)
   #:use-module (rnrs io ports)
-  #:use-module ((srfi srfi-13)
-                #:select (string-index
-                          string-prefix? string-suffix?
-                          string-concatenate string-trim-both)))
-
-(define-syntax define-alias
-  (syntax-rules ()
-    ((_ new old)
-     (define-syntax new (identifier-syntax old)))))
+  #:use-module ((ice-9 iconv) #:prefix iconv:)
+  #:use-module (ice-9 match)
+  #:use-module (srfi srfi-60))
 
-;; Force the use of Guile's own primitives to avoid the overhead of its 'fx'
-;; procedures.
+(define base64-group-size-characters 4)
+(define base64-group-size-octets 3)
 
-(define-alias fxbit-field bitwise-bit-field)
-(define-alias fxarithmetic-shift ash)
-(define-alias fxarithmetic-shift-left ash)
-(define-alias fxand logand)
-(define-alias fxior logior)
-(define-alias fxxor logxor)
+(define (decode-base64-char c)
+  "Return numeric value corresponding to character C in the Base64
+alphabet. If C is not a valid character in the Base64 alphabet, return
+#f."
+  (cond
+   ((char-upper-case? c)
+    (- (char->integer c) (char->integer #\A)))
+   ((char-lower-case? c)
+    (+ (- (char->integer c) (char->integer #\a))
+       26))
+   ((char-numeric? c)
+    (+ (- (char->integer c) (char->integer #\0))
+       52))
+   ((char=? c #\+) 62)
+   ((char=? c #\/) 63)
+   ((char=? c #\=) 'pad)
+   (else #f)))
 
-(define base64-alphabet
-  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+;; To speed up decoding, build a lookup table for all ASCII
+;; characters.
+(define decoding-lookup-table
+  (list->vector
+   (map (lambda (i)
+          (decode-base64-char (integer->char i)))
+        (iota 128))))
 
-(define base64url-alphabet
-  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_")
+(define (decode-base64-char-lookup octet)
+  "Decode OCTET by looking up the pre-built lookup table."
+  (vector-ref decoding-lookup-table octet))
 
-(define base64-encode
-  (case-lambda
-    ;; Simple interface. Returns a string containing the canonical
-    ;; base64 representation of the given bytevector.
-    ((bv)
-     (base64-encode bv 0 (bytevector-length bv) #f #f base64-alphabet #f))
-    ((bv start)
-     (base64-encode bv start (bytevector-length bv) #f #f base64-alphabet #f))
-    ((bv start end)
-     (base64-encode bv start end #f #f base64-alphabet #f))
-    ((bv start end line-length)
-     (base64-encode bv start end line-length #f base64-alphabet #f))
-    ((bv start end line-length no-padding)
-     (base64-encode bv start end line-length no-padding base64-alphabet #f))
-    ((bv start end line-length no-padding alphabet)
-     (base64-encode bv start end line-length no-padding alphabet #f))
-    ;; Base64 encodes the bytes [start,end[ in the given bytevector.
-    ;; Lines are limited to line-length characters (unless #f),
-    ;; which must be a multiple of four. To omit the padding
-    ;; characters (#\=) set no-padding to a true value. If port is
-    ;; #f, returns a string.
-    ((bv start end line-length no-padding alphabet port)
-     (assert (or (not line-length) (zero? (mod line-length 4))))
-     (let-values (((p extract) (if port
-                                   (values port (lambda () (values)))
-                                   (open-string-output-port))))
-       (letrec ((put (if line-length
-                         (let ((chars 0))
-                           (lambda (p c)
-                             (when (fx=? chars line-length)
-                               (set! chars 0)
-                               (put-char p #\linefeed))
-                             (set! chars (fx+ chars 1))
-                             (put-char p c)))
-                         put-char)))
-         (let lp ((i start))
-           (cond ((= i end))
-                 ((<= (+ i 3) end)
-                  (let ((x (bytevector-uint-ref bv i (endianness big) 3)))
-                    (put p (string-ref alphabet (fxbit-field x 18 24)))
-                    (put p (string-ref alphabet (fxbit-field x 12 18)))
-                    (put p (string-ref alphabet (fxbit-field x 6 12)))
-                    (put p (string-ref alphabet (fxbit-field x 0 6)))
-                    (lp (+ i 3))))
-                 ((<= (+ i 2) end)
-                  (let ((x (fxarithmetic-shift-left (bytevector-u16-ref bv i (endianness big)) 8)))
-                    (put p (string-ref alphabet (fxbit-field x 18 24)))
-                    (put p (string-ref alphabet (fxbit-field x 12 18)))
-                    (put p (string-ref alphabet (fxbit-field x 6 12)))
-                    (unless no-padding
-                      (put p #\=))))
-                 (else
-                  (let ((x (fxarithmetic-shift-left (bytevector-u8-ref bv i) 16)))
-                    (put p (string-ref alphabet (fxbit-field x 18 24)))
-                    (put p (string-ref alphabet (fxbit-field x 12 18)))
-                    (unless no-padding
-                      (put p #\=)
-                      (put p #\=)))))))
-       (extract)))))
+(define (bytevector-index bv pred start)
+  "Search the bytevector BV from left to right and return the index of
+the first occurrence of an octet that satisfies predicate PRED."
+  (let loop ((i start))
+    (cond
+     ((= i (bytevector-length bv)) #f)
+     ((pred (bytevector-u8-ref bv i)) i)
+     (else (loop (1+ i))))))
 
-;; Decodes a base64 string. The string must contain only pure
-;; unpadded base64 data.
+(define (base64-decode bv)
+  (define (decode-group out start)
+    (let loop ((i1 start) (i2 (1+ start)) (group-position 0))
+      (let ((v1 (decode-base64-char-lookup (bytevector-u8-ref bv i1)))
+            (v2 (decode-base64-char-lookup (bytevector-u8-ref bv i2))))
+        (cond
+         (v2
+          (case group-position
+            ((0) (put-u8 out (logior (ash v1 2) (bit-field v2 4 6))))
+            ((1) (unless (eq? v2 'pad)
+                   (put-u8 out (logior (ash (bit-field v1 0 4) 4) (bit-field v2 2 6)))))
+            ((2) (unless (or (eq? v1 'pad) (eq? v2 'pad))
+                   (put-u8 out (logior (ash (bit-field v1 0 2) 6) v2)))))
+          ;; The next i1 should always be computed from the previous
+          ;; i2 since there may be invalid characters between them
+          ;; that we need to step over.
+          (case group-position
+            ;; Step to the next octet of this group.
+            ((0 1) (loop i2 (1+ i2) (1+ group-position)))
+            ;; This group is finished, return the possible start of
+            ;; the next group.
+            ((2) (1+ i2))))
+         ;; v2 is invalid, try to stride one step further for v2
+         ;; alone.
+         (else (loop i1 (1+ i2) group-position))))))
 
-(define base64-decode
-  (case-lambda
-    ((str)
-     (base64-decode str base64-alphabet #f))
-    ((str alphabet)
-     (base64-decode str alphabet #f))
-    ((str alphabet port)
-     (unless (zero? (mod (string-length str) 4))
-       (error 'base64-decode
-              "input string must be a multiple of four characters"))
-     (let-values (((p extract) (if port
-                                   (values port (lambda () (values)))
-                                   (open-bytevector-output-port))))
-       (do ((i 0 (+ i 4)))
-           ((= i (string-length str))
-            (extract))
-         (let ((c1 (string-ref str i))
-               (c2 (string-ref str (+ i 1)))
-               (c3 (string-ref str (+ i 2)))
-               (c4 (string-ref str (+ i 3))))
-           ;; TODO: be more clever than string-index
-           (let ((i1 (string-index alphabet c1))
-                 (i2 (string-index alphabet c2))
-                 (i3 (string-index alphabet c3))
-                 (i4 (string-index alphabet c4)))
-             (cond ((and i1 i2 i3 i4)
-                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
-                                    (fxarithmetic-shift-left i2 12)
-                                    (fxarithmetic-shift-left i3 6)
-                                    i4)))
-                      (put-u8 p (fxbit-field x 16 24))
-                      (put-u8 p (fxbit-field x 8 16))
-                      (put-u8 p (fxbit-field x 0 8))))
-                   ((and i1 i2 i3 (char=? c4 #\=)
-                         (= i (- (string-length str) 4)))
-                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
-                                    (fxarithmetic-shift-left i2 12)
-                                    (fxarithmetic-shift-left i3 6))))
-                      (put-u8 p (fxbit-field x 16 24))
-                      (put-u8 p (fxbit-field x 8 16))))
-                   ((and i1 i2 (char=? c3 #\=) (char=? c4 #\=)
-                         (= i (- (string-length str) 4)))
-                    (let ((x (fxior (fxarithmetic-shift-left i1 18)
-                                    (fxarithmetic-shift-left i2 12))))
-                      (put-u8 p (fxbit-field x 16 24))))
-                   (else
-                    (error 'base64-decode "invalid input"
-                           (list c1 c2 c3 c4)))))))))))
+  (match bv
+    ((? bytevector? bv)
+     (call-with-bytevector-output-port
+      (lambda (out)
+        (let loop ((start 0))
+          (let ((group-start (bytevector-index bv decode-base64-char-lookup start)))
+            (when (and group-start
+                       (< group-start (bytevector-length bv)))
+              (loop (decode-group out group-start))))))))
+    ((? string? str)
+     (base64-decode (iconv:string->bytevector str "us-ascii")))))
 
-(define (get-line-comp f port)
-  (if (port-eof? port)
-      (eof-object)
-      (f (get-line port))))
+(define (encode-base64-value n)
+  "Return character corresponding to numeric value N in the Base64
+alphabet. If N is the symbol pad, return the = character."
+  (cond
+   ((eq? n 'pad) #\=)
+   ((or (< n 0) (> n 63))
+    (error "Invalid Base64 value"))
+   ((< n 26)
+    (integer->char (+ (char->integer #\A) n)))
+   ((< n 52)
+    (integer->char (+ (char->integer #\a) (- n 26))))
+   ((< n 62)
+    (integer->char (+ (char->integer #\0) (- n 52))))
+   ((= n 62) #\+)
+   ((= n 63) #\/)))
 
-;; Reads the common -----BEGIN/END type----- delimited format from
-;; the given port. Returns two values: a string with the type and a
-;; bytevector containing the base64 decoded data. The second value
-;; is the eof object if there is an eof before the BEGIN delimiter.
+(define (base64-encode bv)
+  (define (bytevector-ref-maybe bv index)
+    (if (< index (bytevector-length bv))
+        (bytevector-u8-ref bv index)
+        #f))
 
-(define (get-delimited-base64 port)
-  (define (get-first-data-line port)
-    ;; Some MIME data has header fields in the same format as mail
-    ;; or http. These are ignored.
-    (let ((line (get-line-comp string-trim-both port)))
-      (cond ((eof-object? line) line)
-            ((string-index line #\:)
-             (let lp ()                           ;read until empty line
-               (let ((line (get-line-comp string-trim-both port)))
-                 (if (string=? line "")
-                     (get-line-comp string-trim-both port)
-                     (lp)))))
-            (else line))))
-  (let ((line (get-line-comp string-trim-both port)))
-    (cond ((eof-object? line)
-           (values "" (eof-object)))
-          ((string=? line "")
-           (get-delimited-base64 port))
-          ((and (string-prefix? "-----BEGIN " line)
-                (string-suffix? "-----" line))
-           (let* ((type (substring line 11 (- (string-length line) 5)))
-                  (endline (string-append "-----END " type "-----")))
-             (let-values (((outp extract) (open-bytevector-output-port)))
-               (let lp ((line (get-first-data-line port)))
-                 (cond ((eof-object? line)
-                        (error 'get-delimited-base64
-                               "unexpected end of file"))
-                       ((string-prefix? "-" line)
-                        (unless (string=? line endline)
-                          (error 'get-delimited-base64
-                                 "bad end delimiter" type line))
-                        (values type (extract)))
-                       (else
-                        (unless (and (= (string-length line) 5)
-                                     (string-prefix? "=" line)) ;Skip Radix-64 checksum
-                          (base64-decode line base64-alphabet outp))
-                        (lp (get-line-comp string-trim-both port))))))))
-          (else     ;skip garbage (like in openssl x509 -in foo -text output).
-           (get-delimited-base64 port)))))
+  (define number-of-chars-per-line 76)
 
-(define put-delimited-base64
-  (case-lambda
-    ((port type bv line-length)
-     (display (string-append "-----BEGIN " type "-----\n") port)
-     (base64-encode bv 0 (bytevector-length bv)
-                    line-length #f base64-alphabet port)
-     (display (string-append "\n-----END " type "-----\n") port))
-    ((port type bv)
-     (put-delimited-base64 port type bv 76))))
+  (call-with-output-string
+    (lambda (out)
+      (let loop ((group-start 0)
+                 (number-of-chars-left-on-this-line number-of-chars-per-line))
+        (cond
+         ((< number-of-chars-left-on-this-line base64-group-size-characters)
+          (put-char out #\newline)
+          (loop group-start number-of-chars-per-line))
+         ((< group-start (bytevector-length bv))
+          (let ((o1 (bytevector-u8-ref bv group-start))
+                (o2 (bytevector-ref-maybe bv (+ group-start 1)))
+                (o3 (bytevector-ref-maybe bv (+ group-start 2))))
+            (for-each (lambda (n)
+                        (put-char out (encode-base64-value n)))
+                      (cond
+                       ((and o1 o2 o3)
+                        (list (bit-field o1 2 8)
+                              (logior (ash (bit-field o1 0 2) 4)
+                                      (bit-field o2 4 8))
+                              (logior (ash (bit-field o2 0 4) 2)
+                                      (bit-field o3 6 8))
+                              (bit-field o3 0 6)))
+                       ((and o1 o2 (not o3))
+                        (list (bit-field o1 2 8)
+                              (logior (ash (bit-field o1 0 2) 4)
+                                      (bit-field o2 4 8))
+                              (ash (bit-field o2 0 4) 2)
+                              'pad))
+                       ((and o1 (not o2) (not o3))
+                        (list (bit-field o1 2 8)
+                              (ash (bit-field o1 0 2) 4)
+                              'pad 'pad))))
+            (loop (+ group-start base64-group-size-octets)
+                  (- number-of-chars-left-on-this-line
+                     base64-group-size-characters)))))))))