aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2020-05-25 05:39:16 +0530
committerArun Isaac2020-05-25 05:44:15 +0530
commit4d1e46f50c7c3ae6e9a62fe89d8ed3638c64f9b9 (patch)
tree0c80d73df3b6c2c01f386729dfd0e99420dbba87
parent4e76a0aeb91348843d779b47372563e977132a29 (diff)
downloadguile-email-4d1e46f50c7c3ae6e9a62fe89d8ed3638c64f9b9.tar.gz
guile-email-4d1e46f50c7c3ae6e9a62fe89d8ed3638c64f9b9.tar.lz
guile-email-4d1e46f50c7c3ae6e9a62fe89d8ed3638c64f9b9.zip
utils: Do not match sequence byte by byte in read-bytes-till.
* email/utils.scm (bytevector-match, bytevector-overlap, lookahead-bytevector-n): New functions. (read-bytes-till): Do not match sequence byte by byte. Process blocks of bytes at a time.
-rw-r--r--email/utils.scm75
1 files changed, 57 insertions, 18 deletions
diff --git a/email/utils.scm b/email/utils.scm
index 97c1b5a..718ad52 100644
--- a/email/utils.scm
+++ b/email/utils.scm
@@ -75,30 +75,69 @@ back into PORT."
read-result
str)))))
+(define (bytevector-match bv1 start1 bv2 start2)
+ "Return #t if bytevector BV1 starting from START1 is equal to
+bytevector BV2 starting from START2, else return #f. If the
+bytevectors are of unequal length, they are only compared up to the
+shorter of their lengths."
+ (let ((len (min (- (bytevector-length bv1) start1)
+ (- (bytevector-length bv2) start2))))
+ (let loop ((i 0))
+ (cond
+ ((= i len) #t)
+ ((not (= (bytevector-u8-ref bv1 (+ start1 i))
+ (bytevector-u8-ref bv2 (+ start2 i))))
+ #f)
+ (else (loop (1+ i)))))))
+
+(define (bytevector-overlap bv1 bv2)
+ "Return the index of bytevector BV1 from which it is equal to
+bytevector BV2 in the sense of bytevector-match. If there is no such
+index, then return the length of BV1."
+ ;; TODO: Maybe implement the Boyer-Moore string search algorithm
+ (let loop ((offset 0))
+ (cond
+ ((bytevector-match bv1 offset bv2 0) offset)
+ (else (loop (1+ offset))))))
+
+(define (lookahead-bytevector-n port count)
+ "Look ahead COUNT bytes into port, and return the seen bytevector."
+ (let ((bv (get-bytevector-n port count)))
+ (unget-bytevector port bv)
+ bv))
+
(define (read-bytes-till port sequence)
"Read bytes from PORT until byte SEQUENCE is seen or end-of-file is
reached. If SEQUENCE is seen, unget it to PORT and return."
- (define (read-bytes-and-write-till in out sequence)
- (let ((octet (get-u8 in)))
- (cond
- ((eof-object? octet) octet)
- ;; If octet read matches first octet of sequence, try matching
- ;; the full sequence.
- ((= octet (bytevector-u8-ref sequence 0))
- (unget-bytevector in sequence 0 1)
- (let ((bv (get-bytevector-n in (bytevector-length sequence))))
- (cond
- ((bytevector=? bv sequence) (unget-bytevector in bv))
- (else (unget-bytevector in bv 1)
- (put-u8 out octet)
- (read-bytes-and-write-till in out sequence)))))
- ;; Else, output the octet and continue reading.
- (else (put-u8 out octet)
- (read-bytes-and-write-till in out sequence)))))
+ (define (read-loop out)
+ (not-end-let (bv (get-bytevector-some port))
+ (let ((offset (bytevector-overlap bv sequence)))
+ ;; Write bytes before match point to output
+ (put-bytevector out bv 0 offset)
+ (cond
+ ;; Matched nothing, continue searching
+ ((= offset (bytevector-length bv))
+ (read-loop out))
+ ;; Matched full sequence, unget bytes from match point and
+ ;; quit searching
+ ((> (- (bytevector-length bv) offset)
+ (bytevector-length sequence))
+ (unget-bytevector port bv offset))
+ ;; Matched partial sequence, try to match the rest
+ (else
+ (unget-bytevector port bv offset)
+ (not-end-let
+ (lookahead (lookahead-bytevector-n
+ port (bytevector-length sequence)))
+ ;; Failed to match, take back the first matched byte,
+ ;; write it to the output and continue searching
+ (unless (bytevector=? lookahead sequence)
+ (put-u8 out (get-u8 port))
+ (read-loop out))))))))
(call-with-values open-bytevector-output-port
(lambda (out get-bytevector)
- (let ((read-result (read-bytes-and-write-till port out sequence))
+ (let ((read-result (read-loop out))
(bv (get-bytevector)))
(if (and (eof-object? read-result)
(bytevector=? bv #vu8()))