aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2020-05-25 10:40:23 +0530
committerArun Isaac2020-05-25 19:09:15 +0530
commit4d8ddfb30b5792280d11d58b2e1faf01f91121a9 (patch)
tree8824d46fc025939715a1c232836e98b64f13570a
parent4d1e46f50c7c3ae6e9a62fe89d8ed3638c64f9b9 (diff)
downloadguile-email-4d8ddfb30b5792280d11d58b2e1faf01f91121a9.tar.gz
guile-email-4d8ddfb30b5792280d11d58b2e1faf01f91121a9.tar.lz
guile-email-4d8ddfb30b5792280d11d58b2e1faf01f91121a9.zip
base64: Reimplement from scratch.
* email/base64.scm: Replace file.
-rw-r--r--email/base64.scm394
1 files changed, 151 insertions, 243 deletions
diff --git a/email/base64.scm b/email/base64.scm
index e638b55..bcf9f97 100644
--- a/email/base64.scm
+++ b/email/base64.scm
@@ -1,259 +1,167 @@
-;; -*- mode: scheme; coding: utf-8 -*-
-;;
-;; This module was renamed from (weinholt text base64 (1 0 20100612)) to
-;; (guix base64) by Nikita Karetnikov <nikita@karetnikov.org> on
-;; February 12, 2014.
-;;
-;; Some optimizations made by Ludovic Courtès <ludo@gnu.org>, 2015.
-;; Turned into a Guile module (instead of R6RS).
-;;
-;; This module was imported into the source tree of guile-email by
-;; Arun Isaac <arunisaac@systemreboot.net> on September 6, 2018.
-;;
-;; This program is free software: you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation, either version 3 of the License, or
-;; (at your option) any later version.
-;;
-;; This program is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with this program. If not, see <http://www.gnu.org/licenses/>.
-;;
-;; This file incorporates work covered by the following copyright and
-;; permission notice:
-;;
-;; Copyright © 2009, 2010 Göran Weinholt <goran@weinholt.se>
-;;
-;; Permission is hereby granted, free of charge, to any person obtaining a
-;; copy of this software and associated documentation files (the "Software"),
-;; to deal in the Software without restriction, including without limitation
-;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
-;; and/or sell copies of the Software, and to permit persons to whom the
-;; Software is furnished to do so, subject to the following conditions:
-;;
-;; The above copyright notice and this permission notice shall be included in
-;; all copies or substantial portions of the Software.
-;;
-;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-;; DEALINGS IN THE SOFTWARE.
-
-;; RFC 4648 Base-N Encodings
+;;; guile-email --- Guile email parser
+;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of guile-email.
+;;;
+;;; guile-email is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU Affero General Public License as
+;;; published by the Free Software Foundation; either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; guile-email is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public
+;;; License along with guile-email. If not, see
+;;; <http://www.gnu.org/licenses/>.
(define-module (email base64)
#:export (base64-encode
- base64-decode
- base64-alphabet
- base64url-alphabet
- get-delimited-base64
- put-delimited-base64)
- #:use-module (rnrs arithmetic bitwise)
- #:use-module (rnrs arithmetic fixnums)
- #:use-module (rnrs base)
+ base64-decode)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
- #:use-module ((srfi srfi-13)
- #:select (string-index
- string-prefix? string-suffix?
- string-concatenate string-trim-both)))
-
-(define-syntax define-alias
- (syntax-rules ()
- ((_ new old)
- (define-syntax new (identifier-syntax old)))))
+ #:use-module ((ice-9 iconv) #:prefix iconv:)
+ #:use-module (ice-9 match)
+ #:use-module (srfi srfi-60))
-;; Force the use of Guile's own primitives to avoid the overhead of its 'fx'
-;; procedures.
+(define base64-group-size-characters 4)
+(define base64-group-size-octets 3)
-(define-alias fxbit-field bitwise-bit-field)
-(define-alias fxarithmetic-shift ash)
-(define-alias fxarithmetic-shift-left ash)
-(define-alias fxand logand)
-(define-alias fxior logior)
-(define-alias fxxor logxor)
+(define (decode-base64-char c)
+ "Return numeric value corresponding to character C in the Base64
+alphabet. If C is not a valid character in the Base64 alphabet, return
+#f."
+ (cond
+ ((char-upper-case? c)
+ (- (char->integer c) (char->integer #\A)))
+ ((char-lower-case? c)
+ (+ (- (char->integer c) (char->integer #\a))
+ 26))
+ ((char-numeric? c)
+ (+ (- (char->integer c) (char->integer #\0))
+ 52))
+ ((char=? c #\+) 62)
+ ((char=? c #\/) 63)
+ ((char=? c #\=) 'pad)
+ (else #f)))
-(define base64-alphabet
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+;; To speed up decoding, build a lookup table for all ASCII
+;; characters.
+(define decoding-lookup-table
+ (list->vector
+ (map (lambda (i)
+ (decode-base64-char (integer->char i)))
+ (iota 128))))
-(define base64url-alphabet
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_")
+(define (decode-base64-char-lookup octet)
+ "Decode OCTET by looking up the pre-built lookup table."
+ (vector-ref decoding-lookup-table octet))
-(define base64-encode
- (case-lambda
- ;; Simple interface. Returns a string containing the canonical
- ;; base64 representation of the given bytevector.
- ((bv)
- (base64-encode bv 0 (bytevector-length bv) #f #f base64-alphabet #f))
- ((bv start)
- (base64-encode bv start (bytevector-length bv) #f #f base64-alphabet #f))
- ((bv start end)
- (base64-encode bv start end #f #f base64-alphabet #f))
- ((bv start end line-length)
- (base64-encode bv start end line-length #f base64-alphabet #f))
- ((bv start end line-length no-padding)
- (base64-encode bv start end line-length no-padding base64-alphabet #f))
- ((bv start end line-length no-padding alphabet)
- (base64-encode bv start end line-length no-padding alphabet #f))
- ;; Base64 encodes the bytes [start,end[ in the given bytevector.
- ;; Lines are limited to line-length characters (unless #f),
- ;; which must be a multiple of four. To omit the padding
- ;; characters (#\=) set no-padding to a true value. If port is
- ;; #f, returns a string.
- ((bv start end line-length no-padding alphabet port)
- (assert (or (not line-length) (zero? (mod line-length 4))))
- (let-values (((p extract) (if port
- (values port (lambda () (values)))
- (open-string-output-port))))
- (letrec ((put (if line-length
- (let ((chars 0))
- (lambda (p c)
- (when (fx=? chars line-length)
- (set! chars 0)
- (put-char p #\linefeed))
- (set! chars (fx+ chars 1))
- (put-char p c)))
- put-char)))
- (let lp ((i start))
- (cond ((= i end))
- ((<= (+ i 3) end)
- (let ((x (bytevector-uint-ref bv i (endianness big) 3)))
- (put p (string-ref alphabet (fxbit-field x 18 24)))
- (put p (string-ref alphabet (fxbit-field x 12 18)))
- (put p (string-ref alphabet (fxbit-field x 6 12)))
- (put p (string-ref alphabet (fxbit-field x 0 6)))
- (lp (+ i 3))))
- ((<= (+ i 2) end)
- (let ((x (fxarithmetic-shift-left (bytevector-u16-ref bv i (endianness big)) 8)))
- (put p (string-ref alphabet (fxbit-field x 18 24)))
- (put p (string-ref alphabet (fxbit-field x 12 18)))
- (put p (string-ref alphabet (fxbit-field x 6 12)))
- (unless no-padding
- (put p #\=))))
- (else
- (let ((x (fxarithmetic-shift-left (bytevector-u8-ref bv i) 16)))
- (put p (string-ref alphabet (fxbit-field x 18 24)))
- (put p (string-ref alphabet (fxbit-field x 12 18)))
- (unless no-padding
- (put p #\=)
- (put p #\=)))))))
- (extract)))))
+(define (bytevector-index bv pred start)
+ "Search the bytevector BV from left to right and return the index of
+the first occurrence of an octet that satisfies predicate PRED."
+ (let loop ((i start))
+ (cond
+ ((= i (bytevector-length bv)) #f)
+ ((pred (bytevector-u8-ref bv i)) i)
+ (else (loop (1+ i))))))
-;; Decodes a base64 string. The string must contain only pure
-;; unpadded base64 data.
+(define (base64-decode bv)
+ (define (decode-group out start)
+ (let loop ((i1 start) (i2 (1+ start)) (group-position 0))
+ (let ((v1 (decode-base64-char-lookup (bytevector-u8-ref bv i1)))
+ (v2 (decode-base64-char-lookup (bytevector-u8-ref bv i2))))
+ (cond
+ (v2
+ (case group-position
+ ((0) (put-u8 out (logior (ash v1 2) (bit-field v2 4 6))))
+ ((1) (unless (eq? v2 'pad)
+ (put-u8 out (logior (ash (bit-field v1 0 4) 4) (bit-field v2 2 6)))))
+ ((2) (unless (or (eq? v1 'pad) (eq? v2 'pad))
+ (put-u8 out (logior (ash (bit-field v1 0 2) 6) v2)))))
+ ;; The next i1 should always be computed from the previous
+ ;; i2 since there may be invalid characters between them
+ ;; that we need to step over.
+ (case group-position
+ ;; Step to the next octet of this group.
+ ((0 1) (loop i2 (1+ i2) (1+ group-position)))
+ ;; This group is finished, return the possible start of
+ ;; the next group.
+ ((2) (1+ i2))))
+ ;; v2 is invalid, try to stride one step further for v2
+ ;; alone.
+ (else (loop i1 (1+ i2) group-position))))))
-(define base64-decode
- (case-lambda
- ((str)
- (base64-decode str base64-alphabet #f))
- ((str alphabet)
- (base64-decode str alphabet #f))
- ((str alphabet port)
- (unless (zero? (mod (string-length str) 4))
- (error 'base64-decode
- "input string must be a multiple of four characters"))
- (let-values (((p extract) (if port
- (values port (lambda () (values)))
- (open-bytevector-output-port))))
- (do ((i 0 (+ i 4)))
- ((= i (string-length str))
- (extract))
- (let ((c1 (string-ref str i))
- (c2 (string-ref str (+ i 1)))
- (c3 (string-ref str (+ i 2)))
- (c4 (string-ref str (+ i 3))))
- ;; TODO: be more clever than string-index
- (let ((i1 (string-index alphabet c1))
- (i2 (string-index alphabet c2))
- (i3 (string-index alphabet c3))
- (i4 (string-index alphabet c4)))
- (cond ((and i1 i2 i3 i4)
- (let ((x (fxior (fxarithmetic-shift-left i1 18)
- (fxarithmetic-shift-left i2 12)
- (fxarithmetic-shift-left i3 6)
- i4)))
- (put-u8 p (fxbit-field x 16 24))
- (put-u8 p (fxbit-field x 8 16))
- (put-u8 p (fxbit-field x 0 8))))
- ((and i1 i2 i3 (char=? c4 #\=)
- (= i (- (string-length str) 4)))
- (let ((x (fxior (fxarithmetic-shift-left i1 18)
- (fxarithmetic-shift-left i2 12)
- (fxarithmetic-shift-left i3 6))))
- (put-u8 p (fxbit-field x 16 24))
- (put-u8 p (fxbit-field x 8 16))))
- ((and i1 i2 (char=? c3 #\=) (char=? c4 #\=)
- (= i (- (string-length str) 4)))
- (let ((x (fxior (fxarithmetic-shift-left i1 18)
- (fxarithmetic-shift-left i2 12))))
- (put-u8 p (fxbit-field x 16 24))))
- (else
- (error 'base64-decode "invalid input"
- (list c1 c2 c3 c4)))))))))))
+ (match bv
+ ((? bytevector? bv)
+ (call-with-bytevector-output-port
+ (lambda (out)
+ (let loop ((start 0))
+ (let ((group-start (bytevector-index bv decode-base64-char-lookup start)))
+ (when (and group-start
+ (< group-start (bytevector-length bv)))
+ (loop (decode-group out group-start))))))))
+ ((? string? str)
+ (base64-decode (iconv:string->bytevector str "us-ascii")))))
-(define (get-line-comp f port)
- (if (port-eof? port)
- (eof-object)
- (f (get-line port))))
+(define (encode-base64-value n)
+ "Return character corresponding to numeric value N in the Base64
+alphabet. If N is the symbol pad, return the = character."
+ (cond
+ ((eq? n 'pad) #\=)
+ ((or (< n 0) (> n 63))
+ (error "Invalid Base64 value"))
+ ((< n 26)
+ (integer->char (+ (char->integer #\A) n)))
+ ((< n 52)
+ (integer->char (+ (char->integer #\a) (- n 26))))
+ ((< n 62)
+ (integer->char (+ (char->integer #\0) (- n 52))))
+ ((= n 62) #\+)
+ ((= n 63) #\/)))
-;; Reads the common -----BEGIN/END type----- delimited format from
-;; the given port. Returns two values: a string with the type and a
-;; bytevector containing the base64 decoded data. The second value
-;; is the eof object if there is an eof before the BEGIN delimiter.
+(define (base64-encode bv)
+ (define (bytevector-ref-maybe bv index)
+ (if (< index (bytevector-length bv))
+ (bytevector-u8-ref bv index)
+ #f))
-(define (get-delimited-base64 port)
- (define (get-first-data-line port)
- ;; Some MIME data has header fields in the same format as mail
- ;; or http. These are ignored.
- (let ((line (get-line-comp string-trim-both port)))
- (cond ((eof-object? line) line)
- ((string-index line #\:)
- (let lp () ;read until empty line
- (let ((line (get-line-comp string-trim-both port)))
- (if (string=? line "")
- (get-line-comp string-trim-both port)
- (lp)))))
- (else line))))
- (let ((line (get-line-comp string-trim-both port)))
- (cond ((eof-object? line)
- (values "" (eof-object)))
- ((string=? line "")
- (get-delimited-base64 port))
- ((and (string-prefix? "-----BEGIN " line)
- (string-suffix? "-----" line))
- (let* ((type (substring line 11 (- (string-length line) 5)))
- (endline (string-append "-----END " type "-----")))
- (let-values (((outp extract) (open-bytevector-output-port)))
- (let lp ((line (get-first-data-line port)))
- (cond ((eof-object? line)
- (error 'get-delimited-base64
- "unexpected end of file"))
- ((string-prefix? "-" line)
- (unless (string=? line endline)
- (error 'get-delimited-base64
- "bad end delimiter" type line))
- (values type (extract)))
- (else
- (unless (and (= (string-length line) 5)
- (string-prefix? "=" line)) ;Skip Radix-64 checksum
- (base64-decode line base64-alphabet outp))
- (lp (get-line-comp string-trim-both port))))))))
- (else ;skip garbage (like in openssl x509 -in foo -text output).
- (get-delimited-base64 port)))))
+ (define number-of-chars-per-line 76)
-(define put-delimited-base64
- (case-lambda
- ((port type bv line-length)
- (display (string-append "-----BEGIN " type "-----\n") port)
- (base64-encode bv 0 (bytevector-length bv)
- line-length #f base64-alphabet port)
- (display (string-append "\n-----END " type "-----\n") port))
- ((port type bv)
- (put-delimited-base64 port type bv 76))))
+ (call-with-output-string
+ (lambda (out)
+ (let loop ((group-start 0)
+ (number-of-chars-left-on-this-line number-of-chars-per-line))
+ (cond
+ ((< number-of-chars-left-on-this-line base64-group-size-characters)
+ (put-char out #\newline)
+ (loop group-start number-of-chars-per-line))
+ ((< group-start (bytevector-length bv))
+ (let ((o1 (bytevector-u8-ref bv group-start))
+ (o2 (bytevector-ref-maybe bv (+ group-start 1)))
+ (o3 (bytevector-ref-maybe bv (+ group-start 2))))
+ (for-each (lambda (n)
+ (put-char out (encode-base64-value n)))
+ (cond
+ ((and o1 o2 o3)
+ (list (bit-field o1 2 8)
+ (logior (ash (bit-field o1 0 2) 4)
+ (bit-field o2 4 8))
+ (logior (ash (bit-field o2 0 4) 2)
+ (bit-field o3 6 8))
+ (bit-field o3 0 6)))
+ ((and o1 o2 (not o3))
+ (list (bit-field o1 2 8)
+ (logior (ash (bit-field o1 0 2) 4)
+ (bit-field o2 4 8))
+ (ash (bit-field o2 0 4) 2)
+ 'pad))
+ ((and o1 (not o2) (not o3))
+ (list (bit-field o1 2 8)
+ (ash (bit-field o1 0 2) 4)
+ 'pad 'pad))))
+ (loop (+ group-start base64-group-size-octets)
+ (- number-of-chars-left-on-this-line
+ base64-group-size-characters)))))))))