From 4d8ddfb30b5792280d11d58b2e1faf01f91121a9 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Mon, 25 May 2020 10:40:23 +0530 Subject: base64: Reimplement from scratch. * email/base64.scm: Replace file. --- email/base64.scm | 394 +++++++++++++++++++++---------------------------------- 1 file changed, 151 insertions(+), 243 deletions(-) diff --git a/email/base64.scm b/email/base64.scm index e638b55..bcf9f97 100644 --- a/email/base64.scm +++ b/email/base64.scm @@ -1,259 +1,167 @@ -;; -*- mode: scheme; coding: utf-8 -*- -;; -;; This module was renamed from (weinholt text base64 (1 0 20100612)) to -;; (guix base64) by Nikita Karetnikov on -;; February 12, 2014. -;; -;; Some optimizations made by Ludovic Courtès , 2015. -;; Turned into a Guile module (instead of R6RS). -;; -;; This module was imported into the source tree of guile-email by -;; Arun Isaac on September 6, 2018. -;; -;; This program is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. -;; -;; This program is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with this program. If not, see . -;; -;; This file incorporates work covered by the following copyright and -;; permission notice: -;; -;; Copyright © 2009, 2010 Göran Weinholt -;; -;; Permission is hereby granted, free of charge, to any person obtaining a -;; copy of this software and associated documentation files (the "Software"), -;; to deal in the Software without restriction, including without limitation -;; the rights to use, copy, modify, merge, publish, distribute, sublicense, -;; and/or sell copies of the Software, and to permit persons to whom the -;; Software is furnished to do so, subject to the following conditions: -;; -;; The above copyright notice and this permission notice shall be included in -;; all copies or substantial portions of the Software. -;; -;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -;; DEALINGS IN THE SOFTWARE. - -;; RFC 4648 Base-N Encodings +;;; guile-email --- Guile email parser +;;; Copyright © 2020 Arun Isaac +;;; +;;; This file is part of guile-email. +;;; +;;; guile-email is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU Affero General Public License as +;;; published by the Free Software Foundation; either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; guile-email is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public +;;; License along with guile-email. If not, see +;;; . (define-module (email base64) #:export (base64-encode - base64-decode - base64-alphabet - base64url-alphabet - get-delimited-base64 - put-delimited-base64) - #:use-module (rnrs arithmetic bitwise) - #:use-module (rnrs arithmetic fixnums) - #:use-module (rnrs base) + base64-decode) #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) - #:use-module ((srfi srfi-13) - #:select (string-index - string-prefix? string-suffix? - string-concatenate string-trim-both))) - -(define-syntax define-alias - (syntax-rules () - ((_ new old) - (define-syntax new (identifier-syntax old))))) + #:use-module ((ice-9 iconv) #:prefix iconv:) + #:use-module (ice-9 match) + #:use-module (srfi srfi-60)) -;; Force the use of Guile's own primitives to avoid the overhead of its 'fx' -;; procedures. +(define base64-group-size-characters 4) +(define base64-group-size-octets 3) -(define-alias fxbit-field bitwise-bit-field) -(define-alias fxarithmetic-shift ash) -(define-alias fxarithmetic-shift-left ash) -(define-alias fxand logand) -(define-alias fxior logior) -(define-alias fxxor logxor) +(define (decode-base64-char c) + "Return numeric value corresponding to character C in the Base64 +alphabet. If C is not a valid character in the Base64 alphabet, return +#f." + (cond + ((char-upper-case? c) + (- (char->integer c) (char->integer #\A))) + ((char-lower-case? c) + (+ (- (char->integer c) (char->integer #\a)) + 26)) + ((char-numeric? c) + (+ (- (char->integer c) (char->integer #\0)) + 52)) + ((char=? c #\+) 62) + ((char=? c #\/) 63) + ((char=? c #\=) 'pad) + (else #f))) -(define base64-alphabet - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") +;; To speed up decoding, build a lookup table for all ASCII +;; characters. +(define decoding-lookup-table + (list->vector + (map (lambda (i) + (decode-base64-char (integer->char i))) + (iota 128)))) -(define base64url-alphabet - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_") +(define (decode-base64-char-lookup octet) + "Decode OCTET by looking up the pre-built lookup table." + (vector-ref decoding-lookup-table octet)) -(define base64-encode - (case-lambda - ;; Simple interface. Returns a string containing the canonical - ;; base64 representation of the given bytevector. - ((bv) - (base64-encode bv 0 (bytevector-length bv) #f #f base64-alphabet #f)) - ((bv start) - (base64-encode bv start (bytevector-length bv) #f #f base64-alphabet #f)) - ((bv start end) - (base64-encode bv start end #f #f base64-alphabet #f)) - ((bv start end line-length) - (base64-encode bv start end line-length #f base64-alphabet #f)) - ((bv start end line-length no-padding) - (base64-encode bv start end line-length no-padding base64-alphabet #f)) - ((bv start end line-length no-padding alphabet) - (base64-encode bv start end line-length no-padding alphabet #f)) - ;; Base64 encodes the bytes [start,end[ in the given bytevector. - ;; Lines are limited to line-length characters (unless #f), - ;; which must be a multiple of four. To omit the padding - ;; characters (#\=) set no-padding to a true value. If port is - ;; #f, returns a string. - ((bv start end line-length no-padding alphabet port) - (assert (or (not line-length) (zero? (mod line-length 4)))) - (let-values (((p extract) (if port - (values port (lambda () (values))) - (open-string-output-port)))) - (letrec ((put (if line-length - (let ((chars 0)) - (lambda (p c) - (when (fx=? chars line-length) - (set! chars 0) - (put-char p #\linefeed)) - (set! chars (fx+ chars 1)) - (put-char p c))) - put-char))) - (let lp ((i start)) - (cond ((= i end)) - ((<= (+ i 3) end) - (let ((x (bytevector-uint-ref bv i (endianness big) 3))) - (put p (string-ref alphabet (fxbit-field x 18 24))) - (put p (string-ref alphabet (fxbit-field x 12 18))) - (put p (string-ref alphabet (fxbit-field x 6 12))) - (put p (string-ref alphabet (fxbit-field x 0 6))) - (lp (+ i 3)))) - ((<= (+ i 2) end) - (let ((x (fxarithmetic-shift-left (bytevector-u16-ref bv i (endianness big)) 8))) - (put p (string-ref alphabet (fxbit-field x 18 24))) - (put p (string-ref alphabet (fxbit-field x 12 18))) - (put p (string-ref alphabet (fxbit-field x 6 12))) - (unless no-padding - (put p #\=)))) - (else - (let ((x (fxarithmetic-shift-left (bytevector-u8-ref bv i) 16))) - (put p (string-ref alphabet (fxbit-field x 18 24))) - (put p (string-ref alphabet (fxbit-field x 12 18))) - (unless no-padding - (put p #\=) - (put p #\=))))))) - (extract))))) +(define (bytevector-index bv pred start) + "Search the bytevector BV from left to right and return the index of +the first occurrence of an octet that satisfies predicate PRED." + (let loop ((i start)) + (cond + ((= i (bytevector-length bv)) #f) + ((pred (bytevector-u8-ref bv i)) i) + (else (loop (1+ i)))))) -;; Decodes a base64 string. The string must contain only pure -;; unpadded base64 data. +(define (base64-decode bv) + (define (decode-group out start) + (let loop ((i1 start) (i2 (1+ start)) (group-position 0)) + (let ((v1 (decode-base64-char-lookup (bytevector-u8-ref bv i1))) + (v2 (decode-base64-char-lookup (bytevector-u8-ref bv i2)))) + (cond + (v2 + (case group-position + ((0) (put-u8 out (logior (ash v1 2) (bit-field v2 4 6)))) + ((1) (unless (eq? v2 'pad) + (put-u8 out (logior (ash (bit-field v1 0 4) 4) (bit-field v2 2 6))))) + ((2) (unless (or (eq? v1 'pad) (eq? v2 'pad)) + (put-u8 out (logior (ash (bit-field v1 0 2) 6) v2))))) + ;; The next i1 should always be computed from the previous + ;; i2 since there may be invalid characters between them + ;; that we need to step over. + (case group-position + ;; Step to the next octet of this group. + ((0 1) (loop i2 (1+ i2) (1+ group-position))) + ;; This group is finished, return the possible start of + ;; the next group. + ((2) (1+ i2)))) + ;; v2 is invalid, try to stride one step further for v2 + ;; alone. + (else (loop i1 (1+ i2) group-position)))))) -(define base64-decode - (case-lambda - ((str) - (base64-decode str base64-alphabet #f)) - ((str alphabet) - (base64-decode str alphabet #f)) - ((str alphabet port) - (unless (zero? (mod (string-length str) 4)) - (error 'base64-decode - "input string must be a multiple of four characters")) - (let-values (((p extract) (if port - (values port (lambda () (values))) - (open-bytevector-output-port)))) - (do ((i 0 (+ i 4))) - ((= i (string-length str)) - (extract)) - (let ((c1 (string-ref str i)) - (c2 (string-ref str (+ i 1))) - (c3 (string-ref str (+ i 2))) - (c4 (string-ref str (+ i 3)))) - ;; TODO: be more clever than string-index - (let ((i1 (string-index alphabet c1)) - (i2 (string-index alphabet c2)) - (i3 (string-index alphabet c3)) - (i4 (string-index alphabet c4))) - (cond ((and i1 i2 i3 i4) - (let ((x (fxior (fxarithmetic-shift-left i1 18) - (fxarithmetic-shift-left i2 12) - (fxarithmetic-shift-left i3 6) - i4))) - (put-u8 p (fxbit-field x 16 24)) - (put-u8 p (fxbit-field x 8 16)) - (put-u8 p (fxbit-field x 0 8)))) - ((and i1 i2 i3 (char=? c4 #\=) - (= i (- (string-length str) 4))) - (let ((x (fxior (fxarithmetic-shift-left i1 18) - (fxarithmetic-shift-left i2 12) - (fxarithmetic-shift-left i3 6)))) - (put-u8 p (fxbit-field x 16 24)) - (put-u8 p (fxbit-field x 8 16)))) - ((and i1 i2 (char=? c3 #\=) (char=? c4 #\=) - (= i (- (string-length str) 4))) - (let ((x (fxior (fxarithmetic-shift-left i1 18) - (fxarithmetic-shift-left i2 12)))) - (put-u8 p (fxbit-field x 16 24)))) - (else - (error 'base64-decode "invalid input" - (list c1 c2 c3 c4))))))))))) + (match bv + ((? bytevector? bv) + (call-with-bytevector-output-port + (lambda (out) + (let loop ((start 0)) + (let ((group-start (bytevector-index bv decode-base64-char-lookup start))) + (when (and group-start + (< group-start (bytevector-length bv))) + (loop (decode-group out group-start)))))))) + ((? string? str) + (base64-decode (iconv:string->bytevector str "us-ascii"))))) -(define (get-line-comp f port) - (if (port-eof? port) - (eof-object) - (f (get-line port)))) +(define (encode-base64-value n) + "Return character corresponding to numeric value N in the Base64 +alphabet. If N is the symbol pad, return the = character." + (cond + ((eq? n 'pad) #\=) + ((or (< n 0) (> n 63)) + (error "Invalid Base64 value")) + ((< n 26) + (integer->char (+ (char->integer #\A) n))) + ((< n 52) + (integer->char (+ (char->integer #\a) (- n 26)))) + ((< n 62) + (integer->char (+ (char->integer #\0) (- n 52)))) + ((= n 62) #\+) + ((= n 63) #\/))) -;; Reads the common -----BEGIN/END type----- delimited format from -;; the given port. Returns two values: a string with the type and a -;; bytevector containing the base64 decoded data. The second value -;; is the eof object if there is an eof before the BEGIN delimiter. +(define (base64-encode bv) + (define (bytevector-ref-maybe bv index) + (if (< index (bytevector-length bv)) + (bytevector-u8-ref bv index) + #f)) -(define (get-delimited-base64 port) - (define (get-first-data-line port) - ;; Some MIME data has header fields in the same format as mail - ;; or http. These are ignored. - (let ((line (get-line-comp string-trim-both port))) - (cond ((eof-object? line) line) - ((string-index line #\:) - (let lp () ;read until empty line - (let ((line (get-line-comp string-trim-both port))) - (if (string=? line "") - (get-line-comp string-trim-both port) - (lp))))) - (else line)))) - (let ((line (get-line-comp string-trim-both port))) - (cond ((eof-object? line) - (values "" (eof-object))) - ((string=? line "") - (get-delimited-base64 port)) - ((and (string-prefix? "-----BEGIN " line) - (string-suffix? "-----" line)) - (let* ((type (substring line 11 (- (string-length line) 5))) - (endline (string-append "-----END " type "-----"))) - (let-values (((outp extract) (open-bytevector-output-port))) - (let lp ((line (get-first-data-line port))) - (cond ((eof-object? line) - (error 'get-delimited-base64 - "unexpected end of file")) - ((string-prefix? "-" line) - (unless (string=? line endline) - (error 'get-delimited-base64 - "bad end delimiter" type line)) - (values type (extract))) - (else - (unless (and (= (string-length line) 5) - (string-prefix? "=" line)) ;Skip Radix-64 checksum - (base64-decode line base64-alphabet outp)) - (lp (get-line-comp string-trim-both port)))))))) - (else ;skip garbage (like in openssl x509 -in foo -text output). - (get-delimited-base64 port))))) + (define number-of-chars-per-line 76) -(define put-delimited-base64 - (case-lambda - ((port type bv line-length) - (display (string-append "-----BEGIN " type "-----\n") port) - (base64-encode bv 0 (bytevector-length bv) - line-length #f base64-alphabet port) - (display (string-append "\n-----END " type "-----\n") port)) - ((port type bv) - (put-delimited-base64 port type bv 76)))) + (call-with-output-string + (lambda (out) + (let loop ((group-start 0) + (number-of-chars-left-on-this-line number-of-chars-per-line)) + (cond + ((< number-of-chars-left-on-this-line base64-group-size-characters) + (put-char out #\newline) + (loop group-start number-of-chars-per-line)) + ((< group-start (bytevector-length bv)) + (let ((o1 (bytevector-u8-ref bv group-start)) + (o2 (bytevector-ref-maybe bv (+ group-start 1))) + (o3 (bytevector-ref-maybe bv (+ group-start 2)))) + (for-each (lambda (n) + (put-char out (encode-base64-value n))) + (cond + ((and o1 o2 o3) + (list (bit-field o1 2 8) + (logior (ash (bit-field o1 0 2) 4) + (bit-field o2 4 8)) + (logior (ash (bit-field o2 0 4) 2) + (bit-field o3 6 8)) + (bit-field o3 0 6))) + ((and o1 o2 (not o3)) + (list (bit-field o1 2 8) + (logior (ash (bit-field o1 0 2) 4) + (bit-field o2 4 8)) + (ash (bit-field o2 0 4) 2) + 'pad)) + ((and o1 (not o2) (not o3)) + (list (bit-field o1 2 8) + (ash (bit-field o1 0 2) 4) + 'pad 'pad)))) + (loop (+ group-start base64-group-size-octets) + (- number-of-chars-left-on-this-line + base64-group-size-characters))))))))) -- cgit v1.2.3