aboutsummaryrefslogtreecommitdiff
;;; guile-email --- Guile email parser
;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of guile-email.
;;;
;;; guile-email is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU Affero General Public License as
;;; published by the Free Software Foundation; either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; guile-email is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with guile-email.  If not, see
;;; <http://www.gnu.org/licenses/>.

(define-module (email base64)
  #:export (base64-encode
            base64-decode)
  #:use-module (rnrs bytevectors)
  #:use-module (rnrs io ports)
  #:use-module ((ice-9 iconv) #:prefix iconv:)
  #:use-module (ice-9 match)
  #:use-module (srfi srfi-60))

(define base64-group-size-characters 4)
(define base64-group-size-octets 3)

(define (decode-base64-char c)
  "Return numeric value corresponding to character C in the Base64
alphabet. If C is not a valid character in the Base64 alphabet, return
#f."
  (cond
   ((char-upper-case? c)
    (- (char->integer c) (char->integer #\A)))
   ((char-lower-case? c)
    (+ (- (char->integer c) (char->integer #\a))
       26))
   ((char-numeric? c)
    (+ (- (char->integer c) (char->integer #\0))
       52))
   ((char=? c #\+) 62)
   ((char=? c #\/) 63)
   ((char=? c #\=) 'pad)
   (else #f)))

;; To speed up decoding, build a lookup table for all ASCII
;; characters.
(define decoding-lookup-table
  (list->vector
   (map (lambda (i)
          (decode-base64-char (integer->char i)))
        (iota 128))))

(define (decode-base64-char-lookup octet)
  "Decode OCTET by looking up the pre-built lookup table."
  (vector-ref decoding-lookup-table octet))

(define (bytevector-index bv pred start)
  "Search the bytevector BV from left to right and return the index of
the first occurrence of an octet that satisfies predicate PRED."
  (let loop ((i start))
    (cond
     ((= i (bytevector-length bv)) #f)
     ((pred (bytevector-u8-ref bv i)) i)
     (else (loop (1+ i))))))

(define (base64-decode bv)
  (define (decode-group out start)
    (let loop ((i1 start) (i2 (1+ start)) (group-position 0))
      (let ((v1 (decode-base64-char-lookup (bytevector-u8-ref bv i1)))
            (v2 (decode-base64-char-lookup (bytevector-u8-ref bv i2))))
        (cond
         (v2
          (case group-position
            ((0) (put-u8 out (logior (ash v1 2) (bit-field v2 4 6))))
            ((1) (unless (eq? v2 'pad)
                   (put-u8 out (logior (ash (bit-field v1 0 4) 4) (bit-field v2 2 6)))))
            ((2) (unless (or (eq? v1 'pad) (eq? v2 'pad))
                   (put-u8 out (logior (ash (bit-field v1 0 2) 6) v2)))))
          ;; The next i1 should always be computed from the previous
          ;; i2 since there may be invalid characters between them
          ;; that we need to step over.
          (case group-position
            ;; Step to the next octet of this group.
            ((0 1) (loop i2 (1+ i2) (1+ group-position)))
            ;; This group is finished, return the possible start of
            ;; the next group.
            ((2) (1+ i2))))
         ;; v2 is invalid, try to stride one step further for v2
         ;; alone.
         (else (loop i1 (1+ i2) group-position))))))

  (match bv
    ((? bytevector? bv)
     (call-with-bytevector-output-port
      (lambda (out)
        (let loop ((start 0))
          (let ((group-start (bytevector-index bv decode-base64-char-lookup start)))
            (when (and group-start
                       (< group-start (bytevector-length bv)))
              (loop (decode-group out group-start))))))))
    ((? string? str)
     (base64-decode (iconv:string->bytevector str "us-ascii")))))

(define (encode-base64-value n)
  "Return character corresponding to numeric value N in the Base64
alphabet. If N is the symbol pad, return the = character."
  (cond
   ((eq? n 'pad) #\=)
   ((or (< n 0) (> n 63))
    (error "Invalid Base64 value"))
   ((< n 26)
    (integer->char (+ (char->integer #\A) n)))
   ((< n 52)
    (integer->char (+ (char->integer #\a) (- n 26))))
   ((< n 62)
    (integer->char (+ (char->integer #\0) (- n 52))))
   ((= n 62) #\+)
   ((= n 63) #\/)))

(define (base64-encode bv)
  (define (bytevector-ref-maybe bv index)
    (if (< index (bytevector-length bv))
        (bytevector-u8-ref bv index)
        #f))

  (define number-of-chars-per-line 76)

  (call-with-output-string
    (lambda (out)
      (let loop ((group-start 0)
                 (number-of-chars-left-on-this-line number-of-chars-per-line))
        (cond
         ((< number-of-chars-left-on-this-line base64-group-size-characters)
          (put-char out #\newline)
          (loop group-start number-of-chars-per-line))
         ((< group-start (bytevector-length bv))
          (let ((o1 (bytevector-u8-ref bv group-start))
                (o2 (bytevector-ref-maybe bv (+ group-start 1)))
                (o3 (bytevector-ref-maybe bv (+ group-start 2))))
            (for-each (lambda (n)
                        (put-char out (encode-base64-value n)))
                      (cond
                       ((and o1 o2 o3)
                        (list (bit-field o1 2 8)
                              (logior (ash (bit-field o1 0 2) 4)
                                      (bit-field o2 4 8))
                              (logior (ash (bit-field o2 0 4) 2)
                                      (bit-field o3 6 8))
                              (bit-field o3 0 6)))
                       ((and o1 o2 (not o3))
                        (list (bit-field o1 2 8)
                              (logior (ash (bit-field o1 0 2) 4)
                                      (bit-field o2 4 8))
                              (ash (bit-field o2 0 4) 2)
                              'pad))
                       ((and o1 (not o2) (not o3))
                        (list (bit-field o1 2 8)
                              (ash (bit-field o1 0 2) 4)
                              'pad 'pad))))
            (loop (+ group-start base64-group-size-octets)
                  (- number-of-chars-left-on-this-line
                     base64-group-size-characters)))))))))